webpeel 0.8.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -5
- package/dist/cli.js +1299 -85
- package/dist/cli.js.map +1 -1
- package/dist/core/application-tracker.d.ts +85 -0
- package/dist/core/application-tracker.d.ts.map +1 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/application-tracker.js.map +1 -0
- package/dist/core/apply.d.ts +163 -0
- package/dist/core/apply.d.ts.map +1 -0
- package/dist/core/apply.js +817 -0
- package/dist/core/apply.js.map +1 -0
- package/dist/core/branding.d.ts +1 -1
- package/dist/core/branding.d.ts.map +1 -1
- package/dist/core/budget.d.ts +43 -0
- package/dist/core/budget.d.ts.map +1 -0
- package/dist/core/budget.js +325 -0
- package/dist/core/budget.js.map +1 -0
- package/dist/core/challenge-detection.d.ts +27 -0
- package/dist/core/challenge-detection.d.ts.map +1 -0
- package/dist/core/challenge-detection.js +436 -0
- package/dist/core/challenge-detection.js.map +1 -0
- package/dist/core/change-tracking.d.ts.map +1 -1
- package/dist/core/change-tracking.js +10 -1
- package/dist/core/change-tracking.js.map +1 -1
- package/dist/core/crawler.d.ts.map +1 -1
- package/dist/core/crawler.js +17 -4
- package/dist/core/crawler.js.map +1 -1
- package/dist/core/diff.d.ts +62 -0
- package/dist/core/diff.d.ts.map +1 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/diff.js.map +1 -0
- package/dist/core/extract-listings.d.ts +39 -0
- package/dist/core/extract-listings.d.ts.map +1 -0
- package/dist/core/extract-listings.js +331 -0
- package/dist/core/extract-listings.js.map +1 -0
- package/dist/core/extract.d.ts.map +1 -1
- package/dist/core/extract.js +15 -2
- package/dist/core/extract.js.map +1 -1
- package/dist/core/fetcher.d.ts +29 -3
- package/dist/core/fetcher.d.ts.map +1 -1
- package/dist/core/fetcher.js +158 -20
- package/dist/core/fetcher.js.map +1 -1
- package/dist/core/human.d.ts +176 -0
- package/dist/core/human.d.ts.map +1 -0
- package/dist/core/human.js +681 -0
- package/dist/core/human.js.map +1 -0
- package/dist/core/jobs.d.ts +12 -2
- package/dist/core/jobs.d.ts.map +1 -1
- package/dist/core/jobs.js +124 -2
- package/dist/core/jobs.js.map +1 -1
- package/dist/core/map.d.ts.map +1 -1
- package/dist/core/map.js +14 -2
- package/dist/core/map.js.map +1 -1
- package/dist/core/paginate.d.ts +32 -0
- package/dist/core/paginate.d.ts.map +1 -0
- package/dist/core/paginate.js +107 -0
- package/dist/core/paginate.js.map +1 -0
- package/dist/core/rate-governor.d.ts +81 -0
- package/dist/core/rate-governor.d.ts.map +1 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/rate-governor.js.map +1 -0
- package/dist/core/search-provider.d.ts +5 -0
- package/dist/core/search-provider.d.ts.map +1 -1
- package/dist/core/search-provider.js +81 -2
- package/dist/core/search-provider.js.map +1 -1
- package/dist/core/site-search.d.ts +45 -0
- package/dist/core/site-search.d.ts.map +1 -0
- package/dist/core/site-search.js +253 -0
- package/dist/core/site-search.js.map +1 -0
- package/dist/core/strategies.d.ts +8 -0
- package/dist/core/strategies.d.ts.map +1 -1
- package/dist/core/strategies.js +185 -45
- package/dist/core/strategies.js.map +1 -1
- package/dist/core/strategy-hooks.d.ts +6 -0
- package/dist/core/strategy-hooks.d.ts.map +1 -1
- package/dist/core/strategy-hooks.js.map +1 -1
- package/dist/core/table-format.d.ts +31 -0
- package/dist/core/table-format.d.ts.map +1 -0
- package/dist/core/table-format.js +147 -0
- package/dist/core/table-format.js.map +1 -0
- package/dist/core/user-agents.d.ts +58 -0
- package/dist/core/user-agents.d.ts.map +1 -0
- package/dist/core/user-agents.js +159 -0
- package/dist/core/user-agents.js.map +1 -0
- package/dist/core/watch.d.ts +100 -0
- package/dist/core/watch.d.ts.map +1 -0
- package/dist/core/watch.js +368 -0
- package/dist/core/watch.js.map +1 -0
- package/dist/index.d.ts +13 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +41 -4
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +3 -0
- package/dist/mcp/server.js.map +1 -1
- package/dist/types.d.ts +73 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/llms.txt +1 -1
- package/package.json +3 -3
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebPeel Diff - Semantic content diff against stored snapshots
|
|
3
|
+
*
|
|
4
|
+
* Fetches the current content of a URL, loads the previous snapshot from the
|
|
5
|
+
* change-tracking store, computes a structured diff (field-by-field for JSON,
|
|
6
|
+
* section-by-section for text/markdown), saves the new snapshot, and returns
|
|
7
|
+
* a structured {@link DiffResult}.
|
|
8
|
+
*/
|
|
9
|
+
export interface DiffOptions {
|
|
10
|
+
/** Compare against the last tracked snapshot (default behaviour). */
|
|
11
|
+
last?: boolean;
|
|
12
|
+
/** Only compare these dot-notation fields (for JSON responses only). */
|
|
13
|
+
fields?: string[];
|
|
14
|
+
/** Use browser rendering for the fetch. */
|
|
15
|
+
render?: boolean;
|
|
16
|
+
/** Per-request timeout in milliseconds (default: 30 000). */
|
|
17
|
+
timeout?: number;
|
|
18
|
+
}
|
|
19
|
+
export interface DiffResult {
|
|
20
|
+
url: string;
|
|
21
|
+
/** Whether any changes were detected. */
|
|
22
|
+
changed: boolean;
|
|
23
|
+
/** ISO-8601 timestamp of the current fetch. */
|
|
24
|
+
timestamp: string;
|
|
25
|
+
/** ISO-8601 timestamp of the previous snapshot (undefined if none). */
|
|
26
|
+
previousTimestamp?: string;
|
|
27
|
+
changes: DiffChange[];
|
|
28
|
+
/** Human-readable summary sentence. */
|
|
29
|
+
summary: string;
|
|
30
|
+
}
|
|
31
|
+
export interface DiffChange {
|
|
32
|
+
type: 'added' | 'removed' | 'modified';
|
|
33
|
+
/** For JSON diffs: dot-notation path to the changed field. */
|
|
34
|
+
field?: string;
|
|
35
|
+
/** For text diffs: nearest section heading or line reference. */
|
|
36
|
+
path?: string;
|
|
37
|
+
before?: string;
|
|
38
|
+
after?: string;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Fetch a URL and compute a semantic diff against the last tracked snapshot.
|
|
42
|
+
*
|
|
43
|
+
* The function:
|
|
44
|
+
* 1. Loads the previous snapshot (if any) from the change-tracking store.
|
|
45
|
+
* 2. Fetches the current content via {@link peel}.
|
|
46
|
+
* 3. Saves the current content as the new snapshot (auto-tracking).
|
|
47
|
+
* 4. Computes a structured diff — field-by-field for JSON, section-by-section
|
|
48
|
+
* for text/markdown.
|
|
49
|
+
* 5. Returns a {@link DiffResult}.
|
|
50
|
+
*
|
|
51
|
+
* @example
|
|
52
|
+
* ```typescript
|
|
53
|
+
* const result = await diffUrl('https://api.example.com/health', { last: true });
|
|
54
|
+
* console.log(result.summary);
|
|
55
|
+
* result.changes.forEach(c => {
|
|
56
|
+
* if (c.field) console.log(`${c.type}: ${c.field} ${c.before} → ${c.after}`);
|
|
57
|
+
* });
|
|
58
|
+
* ```
|
|
59
|
+
*/
|
|
60
|
+
export declare function diffUrl(url: string, options?: DiffOptions): Promise<DiffResult>;
|
|
61
|
+
export { trackChange, getSnapshot } from './change-tracking.js';
|
|
62
|
+
//# sourceMappingURL=diff.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"diff.d.ts","sourceRoot":"","sources":["../../src/core/diff.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAOH,MAAM,WAAW,WAAW;IAC1B,qEAAqE;IACrE,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,wEAAwE;IACxE,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,2CAA2C;IAC3C,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,6DAA6D;IAC7D,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,yCAAyC;IACzC,OAAO,EAAE,OAAO,CAAC;IACjB,+CAA+C;IAC/C,SAAS,EAAE,MAAM,CAAC;IAClB,uEAAuE;IACvE,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,OAAO,EAAE,UAAU,EAAE,CAAC;IACtB,uCAAuC;IACvC,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,OAAO,GAAG,SAAS,GAAG,UAAU,CAAC;IACvC,8DAA8D;IAC9D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iEAAiE;IACjE,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AA0OD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAsB,OAAO,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,WAAgB,GAAG,OAAO,CAAC,UAAU,CAAC,CAqEzF;AAID,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC"}
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebPeel Diff - Semantic content diff against stored snapshots
|
|
3
|
+
*
|
|
4
|
+
* Fetches the current content of a URL, loads the previous snapshot from the
|
|
5
|
+
* change-tracking store, computes a structured diff (field-by-field for JSON,
|
|
6
|
+
* section-by-section for text/markdown), saves the new snapshot, and returns
|
|
7
|
+
* a structured {@link DiffResult}.
|
|
8
|
+
*/
|
|
9
|
+
import { peel } from '../index.js';
|
|
10
|
+
import { getSnapshot } from './change-tracking.js';
|
|
11
|
+
// ─── JSON diffing ──────────────────────────────────────────────────────────────
|
|
12
|
+
/**
|
|
13
|
+
* Compare two JSON values recursively, returning structured {@link DiffChange}
|
|
14
|
+
* objects. Non-object values (numbers, strings, arrays) are reported as atomic
|
|
15
|
+
* modifications.
|
|
16
|
+
*
|
|
17
|
+
* @param before - Previous JSON value
|
|
18
|
+
* @param after - Current JSON value
|
|
19
|
+
* @param path - Current dot-notation path (for recursion; start with "")
|
|
20
|
+
* @param fields - Optional allowlist of dot-notation paths to compare
|
|
21
|
+
*/
|
|
22
|
+
function diffJson(before, after, path = '', fields) {
|
|
23
|
+
const changes = [];
|
|
24
|
+
const isPlainObject = (v) => typeof v === 'object' && v !== null && !Array.isArray(v);
|
|
25
|
+
if (!isPlainObject(before) || !isPlainObject(after)) {
|
|
26
|
+
// Atomic comparison.
|
|
27
|
+
if (JSON.stringify(before) !== JSON.stringify(after)) {
|
|
28
|
+
if (!fields || fields.length === 0 || fields.some(f => f === path || path.startsWith(f + '.'))) {
|
|
29
|
+
changes.push({
|
|
30
|
+
type: 'modified',
|
|
31
|
+
field: path || '(root)',
|
|
32
|
+
before: stringify(before),
|
|
33
|
+
after: stringify(after),
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return changes;
|
|
38
|
+
}
|
|
39
|
+
const allKeys = new Set([...Object.keys(before), ...Object.keys(after)]);
|
|
40
|
+
for (const key of allKeys) {
|
|
41
|
+
const fullPath = path ? `${path}.${key}` : key;
|
|
42
|
+
// Apply field filter when specified.
|
|
43
|
+
if (fields && fields.length > 0) {
|
|
44
|
+
const inScope = fields.some(f => fullPath === f || fullPath.startsWith(f + '.') || f.startsWith(fullPath + '.'));
|
|
45
|
+
if (!inScope)
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
const bVal = before[key];
|
|
49
|
+
const aVal = after[key];
|
|
50
|
+
if (bVal === undefined && aVal !== undefined) {
|
|
51
|
+
changes.push({ type: 'added', field: fullPath, after: stringify(aVal) });
|
|
52
|
+
}
|
|
53
|
+
else if (bVal !== undefined && aVal === undefined) {
|
|
54
|
+
changes.push({ type: 'removed', field: fullPath, before: stringify(bVal) });
|
|
55
|
+
}
|
|
56
|
+
else if (JSON.stringify(bVal) !== JSON.stringify(aVal)) {
|
|
57
|
+
// Recurse into nested objects.
|
|
58
|
+
if (isPlainObject(bVal) && isPlainObject(aVal)) {
|
|
59
|
+
changes.push(...diffJson(bVal, aVal, fullPath, fields));
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
changes.push({
|
|
63
|
+
type: 'modified',
|
|
64
|
+
field: fullPath,
|
|
65
|
+
before: stringify(bVal),
|
|
66
|
+
after: stringify(aVal),
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return changes;
|
|
72
|
+
}
|
|
73
|
+
/** Serialize a JSON value concisely for display. */
|
|
74
|
+
function stringify(v) {
|
|
75
|
+
if (typeof v === 'string')
|
|
76
|
+
return v;
|
|
77
|
+
return JSON.stringify(v) ?? 'undefined';
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Diff two text blobs line-by-line using LCS (Longest Common Subsequence).
|
|
81
|
+
* Groups consecutive additions/deletions into sections keyed by the nearest
|
|
82
|
+
* Markdown heading.
|
|
83
|
+
*/
|
|
84
|
+
function diffText(beforeText, afterText) {
|
|
85
|
+
const MAX_LINES = 5_000;
|
|
86
|
+
const bLines = beforeText.split('\n').slice(0, MAX_LINES);
|
|
87
|
+
const aLines = afterText.split('\n').slice(0, MAX_LINES);
|
|
88
|
+
// Build LCS table.
|
|
89
|
+
const m = bLines.length;
|
|
90
|
+
const n = aLines.length;
|
|
91
|
+
const lcs = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
|
|
92
|
+
for (let i = 1; i <= m; i++) {
|
|
93
|
+
for (let j = 1; j <= n; j++) {
|
|
94
|
+
if (bLines[i - 1] === aLines[j - 1]) {
|
|
95
|
+
lcs[i][j] = lcs[i - 1][j - 1] + 1;
|
|
96
|
+
}
|
|
97
|
+
else {
|
|
98
|
+
lcs[i][j] = Math.max(lcs[i - 1][j], lcs[i][j - 1]);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
const ops = [];
|
|
103
|
+
let bi = m;
|
|
104
|
+
let ai = n;
|
|
105
|
+
while (bi > 0 || ai > 0) {
|
|
106
|
+
if (bi > 0 && ai > 0 && bLines[bi - 1] === aLines[ai - 1]) {
|
|
107
|
+
ops.unshift({ op: 'same', line: aLines[ai - 1] });
|
|
108
|
+
bi--;
|
|
109
|
+
ai--;
|
|
110
|
+
}
|
|
111
|
+
else if (ai > 0 && (bi === 0 || lcs[bi][ai - 1] >= lcs[bi - 1][ai])) {
|
|
112
|
+
ops.unshift({ op: 'add', line: aLines[ai - 1] });
|
|
113
|
+
ai--;
|
|
114
|
+
}
|
|
115
|
+
else {
|
|
116
|
+
ops.unshift({ op: 'del', line: bLines[bi - 1] });
|
|
117
|
+
bi--;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
// Group consecutive non-same ops into sections.
|
|
121
|
+
const changes = [];
|
|
122
|
+
let linesAdded = 0;
|
|
123
|
+
let linesRemoved = 0;
|
|
124
|
+
let i = 0;
|
|
125
|
+
while (i < ops.length) {
|
|
126
|
+
if (ops[i].op === 'same') {
|
|
127
|
+
i++;
|
|
128
|
+
continue;
|
|
129
|
+
}
|
|
130
|
+
// Collect the run of changes.
|
|
131
|
+
const added = [];
|
|
132
|
+
const removed = [];
|
|
133
|
+
while (i < ops.length && ops[i].op !== 'same') {
|
|
134
|
+
if (ops[i].op === 'add')
|
|
135
|
+
added.push(ops[i].line);
|
|
136
|
+
if (ops[i].op === 'del')
|
|
137
|
+
removed.push(ops[i].line);
|
|
138
|
+
i++;
|
|
139
|
+
}
|
|
140
|
+
linesAdded += added.length;
|
|
141
|
+
linesRemoved += removed.length;
|
|
142
|
+
// Find nearest Markdown heading in the 'same' lines before this block.
|
|
143
|
+
let sectionLabel = `line ~${i}`;
|
|
144
|
+
for (let k = i - added.length - removed.length - 1; k >= 0; k--) {
|
|
145
|
+
const prevOp = ops[k];
|
|
146
|
+
if (prevOp && prevOp.op === 'same' && /^#{1,6}\s/.test(prevOp.line)) {
|
|
147
|
+
sectionLabel = prevOp.line.trim();
|
|
148
|
+
break;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
if (removed.length > 0 && added.length > 0) {
|
|
152
|
+
changes.push({ type: 'modified', path: sectionLabel, before: removed.join('\n'), after: added.join('\n') });
|
|
153
|
+
}
|
|
154
|
+
else if (added.length > 0) {
|
|
155
|
+
changes.push({ type: 'added', path: sectionLabel, after: added.join('\n') });
|
|
156
|
+
}
|
|
157
|
+
else if (removed.length > 0) {
|
|
158
|
+
changes.push({ type: 'removed', path: sectionLabel, before: removed.join('\n') });
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
return { changes, stats: { linesAdded, linesRemoved } };
|
|
162
|
+
}
|
|
163
|
+
// ─── Summary generation ────────────────────────────────────────────────────────
|
|
164
|
+
function buildSummary(changes, mode, jsonTotalFields, textStats) {
|
|
165
|
+
if (changes.length === 0)
|
|
166
|
+
return 'No changes detected.';
|
|
167
|
+
if (mode === 'json') {
|
|
168
|
+
const unchanged = Math.max(0, (jsonTotalFields ?? 0) - changes.length);
|
|
169
|
+
const parts = [
|
|
170
|
+
`${changes.length} field${changes.length === 1 ? '' : 's'} changed`,
|
|
171
|
+
];
|
|
172
|
+
if (unchanged > 0)
|
|
173
|
+
parts.push(`${unchanged} unchanged`);
|
|
174
|
+
return parts.join(', ') + '.';
|
|
175
|
+
}
|
|
176
|
+
// Text mode.
|
|
177
|
+
const sections = changes.length;
|
|
178
|
+
const added = textStats?.linesAdded ?? 0;
|
|
179
|
+
const removed = textStats?.linesRemoved ?? 0;
|
|
180
|
+
return (`${sections} section${sections === 1 ? '' : 's'} changed` +
|
|
181
|
+
(added > 0 ? `, ${added} lines added` : '') +
|
|
182
|
+
(removed > 0 ? `, ${removed} removed` : '') +
|
|
183
|
+
'.');
|
|
184
|
+
}
|
|
185
|
+
// ─── Key counting helpers ──────────────────────────────────────────────────────
|
|
186
|
+
/** Count the total number of leaf-level keys (dot-notation) in two JSON objects combined. */
|
|
187
|
+
function countTotalFields(a, b) {
|
|
188
|
+
const keys = new Set();
|
|
189
|
+
collectKeys(a, '', keys);
|
|
190
|
+
collectKeys(b, '', keys);
|
|
191
|
+
return keys.size;
|
|
192
|
+
}
|
|
193
|
+
function collectKeys(obj, prefix, acc) {
|
|
194
|
+
if (typeof obj !== 'object' || obj === null || Array.isArray(obj)) {
|
|
195
|
+
if (prefix)
|
|
196
|
+
acc.add(prefix);
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
for (const key of Object.keys(obj)) {
|
|
200
|
+
const path = prefix ? `${prefix}.${key}` : key;
|
|
201
|
+
acc.add(path);
|
|
202
|
+
collectKeys(obj[key], path, acc);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
// ─── Public API ────────────────────────────────────────────────────────────────
|
|
206
|
+
/**
|
|
207
|
+
* Fetch a URL and compute a semantic diff against the last tracked snapshot.
|
|
208
|
+
*
|
|
209
|
+
* The function:
|
|
210
|
+
* 1. Loads the previous snapshot (if any) from the change-tracking store.
|
|
211
|
+
* 2. Fetches the current content via {@link peel}.
|
|
212
|
+
* 3. Saves the current content as the new snapshot (auto-tracking).
|
|
213
|
+
* 4. Computes a structured diff — field-by-field for JSON, section-by-section
|
|
214
|
+
* for text/markdown.
|
|
215
|
+
* 5. Returns a {@link DiffResult}.
|
|
216
|
+
*
|
|
217
|
+
* @example
|
|
218
|
+
* ```typescript
|
|
219
|
+
* const result = await diffUrl('https://api.example.com/health', { last: true });
|
|
220
|
+
* console.log(result.summary);
|
|
221
|
+
* result.changes.forEach(c => {
|
|
222
|
+
* if (c.field) console.log(`${c.type}: ${c.field} ${c.before} → ${c.after}`);
|
|
223
|
+
* });
|
|
224
|
+
* ```
|
|
225
|
+
*/
|
|
226
|
+
export async function diffUrl(url, options = {}) {
|
|
227
|
+
const { fields, render = false, timeout = 30_000 } = options;
|
|
228
|
+
// 1. Load previous snapshot before fetching (fetch overwrites it).
|
|
229
|
+
const prevSnapshot = await getSnapshot(url);
|
|
230
|
+
// 2. Fetch current content. changeTracking: true auto-saves the new snapshot.
|
|
231
|
+
const result = await peel(url, {
|
|
232
|
+
render,
|
|
233
|
+
timeout,
|
|
234
|
+
format: 'markdown',
|
|
235
|
+
changeTracking: true,
|
|
236
|
+
});
|
|
237
|
+
const now = new Date().toISOString();
|
|
238
|
+
// 3. No baseline → return an informational result.
|
|
239
|
+
if (!prevSnapshot) {
|
|
240
|
+
return {
|
|
241
|
+
url: result.url,
|
|
242
|
+
changed: false,
|
|
243
|
+
timestamp: now,
|
|
244
|
+
changes: [],
|
|
245
|
+
summary: 'No previous snapshot found. Current content saved as baseline.',
|
|
246
|
+
};
|
|
247
|
+
}
|
|
248
|
+
const previousTimestamp = new Date(prevSnapshot.timestamp).toISOString();
|
|
249
|
+
const previousContent = prevSnapshot.content;
|
|
250
|
+
const currentContent = result.content;
|
|
251
|
+
// 4. Detect content type and compute appropriate diff.
|
|
252
|
+
let mode = 'text';
|
|
253
|
+
let changes = [];
|
|
254
|
+
let jsonTotalFields = 0;
|
|
255
|
+
let textStats;
|
|
256
|
+
let prevJson = null;
|
|
257
|
+
let currJson = null;
|
|
258
|
+
try {
|
|
259
|
+
prevJson = JSON.parse(previousContent);
|
|
260
|
+
currJson = JSON.parse(currentContent);
|
|
261
|
+
mode = 'json';
|
|
262
|
+
}
|
|
263
|
+
catch {
|
|
264
|
+
/* Not JSON — fall through to text diffing */
|
|
265
|
+
}
|
|
266
|
+
if (mode === 'json' && prevJson !== null && currJson !== null) {
|
|
267
|
+
changes = diffJson(prevJson, currJson, '', fields);
|
|
268
|
+
jsonTotalFields = countTotalFields(prevJson, currJson);
|
|
269
|
+
}
|
|
270
|
+
else {
|
|
271
|
+
const { changes: textChanges, stats } = diffText(previousContent, currentContent);
|
|
272
|
+
changes = textChanges;
|
|
273
|
+
textStats = stats;
|
|
274
|
+
}
|
|
275
|
+
// 5. Handle edge case: content changed but we couldn't detect it (e.g. fingerprint
|
|
276
|
+
// mismatch recorded by peel, but diff shows no changes at field level).
|
|
277
|
+
const changed = changes.length > 0;
|
|
278
|
+
return {
|
|
279
|
+
url: result.url,
|
|
280
|
+
changed,
|
|
281
|
+
timestamp: now,
|
|
282
|
+
previousTimestamp,
|
|
283
|
+
changes,
|
|
284
|
+
summary: buildSummary(changes, mode, jsonTotalFields, textStats),
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
// ─── Re-export trackChange for CLI convenience ─────────────────────────────────
|
|
288
|
+
export { trackChange, getSnapshot } from './change-tracking.js';
|
|
289
|
+
//# sourceMappingURL=diff.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"diff.js","sourceRoot":"","sources":["../../src/core/diff.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AACnC,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAsCnD,kFAAkF;AAElF;;;;;;;;;GASG;AACH,SAAS,QAAQ,CACf,MAAe,EACf,KAAc,EACd,OAAe,EAAE,EACjB,MAAiB;IAEjB,MAAM,OAAO,GAAiB,EAAE,CAAC;IAEjC,MAAM,aAAa,GAAG,CAAC,CAAU,EAAgC,EAAE,CACjE,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAE3D,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,EAAE,CAAC;QACpD,qBAAqB;QACrB,IAAI,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC;YACrD,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC;gBAC/F,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,UAAU;oBAChB,KAAK,EAAE,IAAI,IAAI,QAAQ;oBACvB,MAAM,EAAE,SAAS,CAAC,MAAM,CAAC;oBACzB,KAAK,EAAE,SAAS,CAAC,KAAK,CAAC;iBACxB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QACD,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAEzE,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,MAAM,QAAQ,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAE/C,qCAAqC;QACrC,IAAI,MAAM,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChC,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAC9B,QAAQ,KAAK,CAAC,IAAI,QAAQ,CAAC,UAAU,CAAC,CAAC,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,QAAQ,GAAG,GAAG,CAAC,CAC/E,CAAC;YACF,IAAI,CAAC,OAAO;gBAAE,SAAS;QACzB,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;QACzB,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC;QAExB,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YAC7C,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,KAAK,EAAE,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC3E,CAAC;aAAM,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACpD,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9E,CAAC;aAAM,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC;YACzD,+BAA+B;YAC/B,IAAI,aAAa,CAAC,IAAI,CAAC,IAAI,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC,CAAC;YAC1D,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,UAAU;oBAChB,KAAK,EAAE,QAAQ;oBACf,MAAM,EAAE,SAAS,CAAC,IAAI,CAAC;oBACvB,KAAK,EAAE,SAAS,CAAC,IAAI,CAAC;iBACvB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,oDAAoD;AACpD,SAAS,SAAS,CAAC,CAAU;IAC3B,IAAI,OAAO,CAAC,KAAK,QAAQ;QAAE,OAAO,CAAC,CAAC;IACpC,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,WAAW,CAAC;AAC1C,CAAC;AASD;;;;GAIG;AACH,SAAS,QAAQ,CAAC,UAAkB,EAAE,SAAiB;IACrD,MAAM,SAAS,GAAG,KAAK,CAAC;IACxB,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;IAC1D,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;IAEzD,mBAAmB;IACnB,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACxB,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACxB,MAAM,GAAG,GAAe,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,EAAE,CAAC,IAAI,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAa,CAAC,CAAC;IAElG,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5B,IAAI,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;gBACpC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;YACpC,CAAC;iBAAM,CAAC;gBACN,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,CAAC;YACvD,CAAC;QACH,CAAC;IACH,CAAC;IAID,MAAM,GAAG,GAAS,EAAE,CAAC;IACrB,IAAI,EAAE,GAAG,CAAC,CAAC;IACX,IAAI,EAAE,GAAG,CAAC,CAAC;IAEX,OAAO,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,EAAE,CAAC;QACxB,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,EAAE,GAAG,CAAC,CAAC,KAAK,MAAM,CAAC,EAAE,GAAG,CAAC,CAAC,EAAE,CAAC;YAC1D,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,EAAE,GAAG,CAAC,CAAE,EAAE,CAAC,CAAC;YACnD,EAAE,EAAE,CAAC;YAAC,EAAE,EAAE,CAAC;QACb,CAAC;aAAM,IAAI,EAAE,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,CAAC,IAAI,GAAG,CAAC,EAAE,CAAE,CAAC,EAAE,GAAG,CAAC,CAAE,IAAI,GAAG,CAAC,EAAE,GAAG,CAAC,CAAE,CAAC,EAAE,CAAE,CAAC,EAAE,CAAC;YAC1E,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,CAAC,EAAE,GAAG,CAAC,CAAE,EAAE,CAAC,CAAC;YAClD,EAAE,EAAE,CAAC;QACP,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,CAAC,EAAE,GAAG,CAAC,CAAE,EAAE,CAAC,CAAC;YAClD,EAAE,EAAE,CAAC;QACP,CAAC;IACH,CAAC;IAED,gDAAgD;IAChD,MAAM,OAAO,GAAiB,EAAE,CAAC;IACjC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,IAAI,CAAC,GAAG,CAAC,CAAC;IAEV,OAAO,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;QACtB,IAAI,GAAG,CAAC,CAAC,CAAE,CAAC,EAAE,KAAK,MAAM,EAAE,CAAC;YAAC,CAAC,EAAE,CAAC;YAAC,SAAS;QAAC,CAAC;QAE7C,8BAA8B;QAC9B,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,MAAM,OAAO,GAAa,EAAE,CAAC;QAE7B,OAAO,CAAC,GAAG,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,CAAC,CAAE,CAAC,EAAE,KAAK,MAAM,EAAE,CAAC;YAC/C,IAAI,GAAG,CAAC,CAAC,CAAE,CAAC,EAAE,KAAK,KAAK;gBAAE,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC,IAAI,CAAC,CAAC;YACnD,IAAI,GAAG,CAAC,CAAC,CAAE,CAAC,EAAE,KAAK,KAAK;gBAAE,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC,IAAI,CAAC,CAAC;YACrD,CAAC,EAAE,CAAC;QACN,CAAC;QAED,UAAU,IAAI,KAAK,CAAC,MAAM,CAAC;QAC3B,YAAY,IAAI,OAAO,CAAC,MAAM,CAAC;QAE/B,uEAAuE;QACvE,IAAI,YAAY,GAAG,SAAS,CAAC,EAAE,CAAC;QAChC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAChE,MAAM,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;YACtB,IAAI,MAAM,IAAI,MAAM,CAAC,EAAE,KAAK,MAAM,IAAI,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;gBACpE,YAAY,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;gBAClC,MAAM;YACR,CAAC;QACH,CAAC;QAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3C,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,YAAY,EAAE,MAAM,EAAE,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9G,CAAC;aAAM,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC/E,CAAC;aAAM,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9B,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,YAAY,EAAE,MAAM,EAAE,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACpF,CAAC;IACH,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,EAAE,UAAU,EAAE,YAAY,EAAE,EAAE,CAAC;AAC1D,CAAC;AAED,kFAAkF;AAElF,SAAS,YAAY,CACnB,OAAqB,EACrB,IAAqB,EACrB,eAAwB,EACxB,SAAyB;IAEzB,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,sBAAsB,CAAC;IAExD,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;QACpB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,eAAe,IAAI,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;QACvE,MAAM,KAAK,GAAa;YACtB,GAAG,OAAO,CAAC,MAAM,SAAS,OAAO,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,UAAU;SACpE,CAAC;QACF,IAAI,SAAS,GAAG,CAAC;YAAE,KAAK,CAAC,IAAI,CAAC,GAAG,SAAS,YAAY,CAAC,CAAC;QACxD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC;IAChC,CAAC;IAED,aAAa;IACb,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC;IAChC,MAAM,KAAK,GAAG,SAAS,EAAE,UAAU,IAAI,CAAC,CAAC;IACzC,MAAM,OAAO,GAAG,SAAS,EAAE,YAAY,IAAI,CAAC,CAAC;IAC7C,OAAO,CACL,GAAG,QAAQ,WAAW,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,UAAU;QACzD,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,KAAK,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC;QAC3C,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,OAAO,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;QAC3C,GAAG,CACJ,CAAC;AACJ,CAAC;AAED,kFAAkF;AAElF,6FAA6F;AAC7F,SAAS,gBAAgB,CAAC,CAAU,EAAE,CAAU;IAC9C,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,WAAW,CAAC,CAAC,EAAE,EAAE,EAAE,IAAI,CAAC,CAAC;IACzB,WAAW,CAAC,CAAC,EAAE,EAAE,EAAE,IAAI,CAAC,CAAC;IACzB,OAAO,IAAI,CAAC,IAAI,CAAC;AACnB,CAAC;AAED,SAAS,WAAW,CAAC,GAAY,EAAE,MAAc,EAAE,GAAgB;IACjE,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,KAAK,IAAI,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;QAClE,IAAI,MAAM;YAAE,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAC5B,OAAO;IACT,CAAC;IACD,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,GAA8B,CAAC,EAAE,CAAC;QAC9D,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAC/C,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACd,WAAW,CAAE,GAA+B,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC;IAChE,CAAC;AACH,CAAC;AAED,kFAAkF;AAElF;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,GAAW,EAAE,UAAuB,EAAE;IAClE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,KAAK,EAAE,OAAO,GAAG,MAAM,EAAE,GAAG,OAAO,CAAC;IAE7D,mEAAmE;IACnE,MAAM,YAAY,GAAG,MAAM,WAAW,CAAC,GAAG,CAAC,CAAC;IAE5C,+EAA+E;IAC/E,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,EAAE;QAC7B,MAAM;QACN,OAAO;QACP,MAAM,EAAE,UAAU;QAClB,cAAc,EAAE,IAAI;KACrB,CAAC,CAAC;IAEH,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAErC,mDAAmD;IACnD,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,OAAO;YACL,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,OAAO,EAAE,KAAK;YACd,SAAS,EAAE,GAAG;YACd,OAAO,EAAE,EAAE;YACX,OAAO,EAAE,gEAAgE;SAC1E,CAAC;IACJ,CAAC;IAED,MAAM,iBAAiB,GAAG,IAAI,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;IACzE,MAAM,eAAe,GAAG,YAAY,CAAC,OAAO,CAAC;IAC7C,MAAM,cAAc,GAAG,MAAM,CAAC,OAAO,CAAC;IAEtC,uDAAuD;IACvD,IAAI,IAAI,GAAoB,MAAM,CAAC;IACnC,IAAI,OAAO,GAAiB,EAAE,CAAC;IAC/B,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,IAAI,SAAoC,CAAC;IAEzC,IAAI,QAAQ,GAAY,IAAI,CAAC;IAC7B,IAAI,QAAQ,GAAY,IAAI,CAAC;IAE7B,IAAI,CAAC;QACH,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;QACvC,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;QACtC,IAAI,GAAG,MAAM,CAAC;IAChB,CAAC;IAAC,MAAM,CAAC;QACP,6CAA6C;IAC/C,CAAC;IAED,IAAI,IAAI,KAAK,MAAM,IAAI,QAAQ,KAAK,IAAI,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;QAC9D,OAAO,GAAG,QAAQ,CAAC,QAAQ,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,CAAC,CAAC;QACnD,eAAe,GAAG,gBAAgB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IACzD,CAAC;SAAM,CAAC;QACN,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,KAAK,EAAE,GAAG,QAAQ,CAAC,eAAe,EAAE,cAAc,CAAC,CAAC;QAClF,OAAO,GAAG,WAAW,CAAC;QACtB,SAAS,GAAG,KAAK,CAAC;IACpB,CAAC;IAED,mFAAmF;IACnF,2EAA2E;IAC3E,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;IAEnC,OAAO;QACL,GAAG,EAAE,MAAM,CAAC,GAAG;QACf,OAAO;QACP,SAAS,EAAE,GAAG;QACd,iBAAiB;QACjB,OAAO;QACP,OAAO,EAAE,YAAY,CAAC,OAAO,EAAE,IAAI,EAAE,eAAe,EAAE,SAAS,CAAC;KACjE,CAAC;AACJ,CAAC;AAED,kFAAkF;AAElF,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Auto-extract repeated listing patterns from HTML pages.
|
|
3
|
+
*
|
|
4
|
+
* Given raw HTML (e.g. an eBay search results page), this module detects the
|
|
5
|
+
* largest group of sibling elements with a consistent internal structure and
|
|
6
|
+
* extracts structured fields (title, price, image, link, description, rating)
|
|
7
|
+
* from each item.
|
|
8
|
+
*
|
|
9
|
+
* @module extract-listings
|
|
10
|
+
*/
|
|
11
|
+
/** A single extracted listing item. */
|
|
12
|
+
export interface ListingItem {
|
|
13
|
+
title?: string;
|
|
14
|
+
price?: string;
|
|
15
|
+
image?: string;
|
|
16
|
+
link?: string;
|
|
17
|
+
description?: string;
|
|
18
|
+
rating?: string;
|
|
19
|
+
[key: string]: string | undefined;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Automatically detect repeated listing patterns in raw HTML and extract
|
|
23
|
+
* structured items.
|
|
24
|
+
*
|
|
25
|
+
* @param html - Raw HTML string to parse.
|
|
26
|
+
* @param url - Optional base URL for resolving relative links and images.
|
|
27
|
+
* @returns Array of extracted listing items (may be empty).
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* ```typescript
|
|
31
|
+
* import { extractListings } from 'webpeel';
|
|
32
|
+
*
|
|
33
|
+
* const items = extractListings(ebayHtml, 'https://ebay.com/sch?q=card');
|
|
34
|
+
* console.log(items[0].title); // "Charizard VMAX 020/189"
|
|
35
|
+
* console.log(items[0].price); // "$24.99"
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
export declare function extractListings(html: string, url?: string): ListingItem[];
|
|
39
|
+
//# sourceMappingURL=extract-listings.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extract-listings.d.ts","sourceRoot":"","sources":["../../src/core/extract-listings.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AASH,uCAAuC;AACvC,MAAM,WAAW,WAAW;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAAC;CACnC;AAyTD;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,GAAG,WAAW,EAAE,CAgBzE"}
|