@olib-ai/owl-browser-sdk 2.0.5 → 2.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +107 -0
- package/dist/extraction/content-cleaner.d.ts +40 -0
- package/dist/extraction/content-cleaner.d.ts.map +1 -0
- package/dist/extraction/content-cleaner.js +393 -0
- package/dist/extraction/content-cleaner.js.map +1 -0
- package/dist/extraction/extractor.d.ts +139 -0
- package/dist/extraction/extractor.d.ts.map +1 -0
- package/dist/extraction/extractor.js +212 -0
- package/dist/extraction/extractor.js.map +1 -0
- package/dist/extraction/html-processor.d.ts +75 -0
- package/dist/extraction/html-processor.d.ts.map +1 -0
- package/dist/extraction/html-processor.js +192 -0
- package/dist/extraction/html-processor.js.map +1 -0
- package/dist/extraction/index.d.ts +14 -0
- package/dist/extraction/index.d.ts.map +1 -0
- package/dist/extraction/index.js +19 -0
- package/dist/extraction/index.js.map +1 -0
- package/dist/extraction/list-extractor.d.ts +24 -0
- package/dist/extraction/list-extractor.d.ts.map +1 -0
- package/dist/extraction/list-extractor.js +303 -0
- package/dist/extraction/list-extractor.js.map +1 -0
- package/dist/extraction/meta-extractor.d.ts +40 -0
- package/dist/extraction/meta-extractor.d.ts.map +1 -0
- package/dist/extraction/meta-extractor.js +216 -0
- package/dist/extraction/meta-extractor.js.map +1 -0
- package/dist/extraction/pagination.d.ts +29 -0
- package/dist/extraction/pagination.d.ts.map +1 -0
- package/dist/extraction/pagination.js +323 -0
- package/dist/extraction/pagination.js.map +1 -0
- package/dist/extraction/pattern-detector.d.ts +16 -0
- package/dist/extraction/pattern-detector.d.ts.map +1 -0
- package/dist/extraction/pattern-detector.js +390 -0
- package/dist/extraction/pattern-detector.js.map +1 -0
- package/dist/extraction/scrape-session.d.ts +23 -0
- package/dist/extraction/scrape-session.d.ts.map +1 -0
- package/dist/extraction/scrape-session.js +192 -0
- package/dist/extraction/scrape-session.js.map +1 -0
- package/dist/extraction/selector-engine.d.ts +23 -0
- package/dist/extraction/selector-engine.d.ts.map +1 -0
- package/dist/extraction/selector-engine.js +127 -0
- package/dist/extraction/selector-engine.js.map +1 -0
- package/dist/extraction/table-extractor.d.ts +29 -0
- package/dist/extraction/table-extractor.d.ts.map +1 -0
- package/dist/extraction/table-extractor.js +282 -0
- package/dist/extraction/table-extractor.js.map +1 -0
- package/dist/extraction/transforms.d.ts +47 -0
- package/dist/extraction/transforms.d.ts.map +1 -0
- package/dist/extraction/transforms.js +277 -0
- package/dist/extraction/transforms.js.map +1 -0
- package/dist/extraction/types.d.ts +199 -0
- package/dist/extraction/types.d.ts.map +1 -0
- package/dist/extraction/types.js +5 -0
- package/dist/extraction/types.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/playwright/browser-type.d.ts +101 -0
- package/dist/playwright/browser-type.d.ts.map +1 -0
- package/dist/playwright/browser-type.js +134 -0
- package/dist/playwright/browser-type.js.map +1 -0
- package/dist/playwright/browser.d.ts +98 -0
- package/dist/playwright/browser.d.ts.map +1 -0
- package/dist/playwright/browser.js +229 -0
- package/dist/playwright/browser.js.map +1 -0
- package/dist/playwright/context.d.ts +217 -0
- package/dist/playwright/context.d.ts.map +1 -0
- package/dist/playwright/context.js +518 -0
- package/dist/playwright/context.js.map +1 -0
- package/dist/playwright/extractor.d.ts +108 -0
- package/dist/playwright/extractor.d.ts.map +1 -0
- package/dist/playwright/extractor.js +404 -0
- package/dist/playwright/extractor.js.map +1 -0
- package/dist/playwright/frame.d.ts +147 -0
- package/dist/playwright/frame.d.ts.map +1 -0
- package/dist/playwright/frame.js +492 -0
- package/dist/playwright/frame.js.map +1 -0
- package/dist/playwright/index.d.ts +163 -0
- package/dist/playwright/index.d.ts.map +1 -0
- package/dist/playwright/index.js +313 -0
- package/dist/playwright/index.js.map +1 -0
- package/dist/playwright/keyboard.d.ts +74 -0
- package/dist/playwright/keyboard.d.ts.map +1 -0
- package/dist/playwright/keyboard.js +187 -0
- package/dist/playwright/keyboard.js.map +1 -0
- package/dist/playwright/locator.d.ts +237 -0
- package/dist/playwright/locator.d.ts.map +1 -0
- package/dist/playwright/locator.js +667 -0
- package/dist/playwright/locator.js.map +1 -0
- package/dist/playwright/mouse.d.ts +82 -0
- package/dist/playwright/mouse.d.ts.map +1 -0
- package/dist/playwright/mouse.js +137 -0
- package/dist/playwright/mouse.js.map +1 -0
- package/dist/playwright/page-helpers.d.ts +267 -0
- package/dist/playwright/page-helpers.d.ts.map +1 -0
- package/dist/playwright/page-helpers.js +449 -0
- package/dist/playwright/page-helpers.js.map +1 -0
- package/dist/playwright/page.d.ts +605 -0
- package/dist/playwright/page.d.ts.map +1 -0
- package/dist/playwright/page.js +1698 -0
- package/dist/playwright/page.js.map +1 -0
- package/dist/playwright/response.d.ts +100 -0
- package/dist/playwright/response.d.ts.map +1 -0
- package/dist/playwright/response.js +194 -0
- package/dist/playwright/response.js.map +1 -0
- package/dist/playwright/types.d.ts +354 -0
- package/dist/playwright/types.d.ts.map +1 -0
- package/dist/playwright/types.js +8 -0
- package/dist/playwright/types.js.map +1 -0
- package/openapi.json +327 -35
- package/package.json +10 -1
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Universal extraction module for Owl Browser Node.js SDK.
|
|
3
|
+
*
|
|
4
|
+
* Provides structured data extraction from any website using CSS selectors,
|
|
5
|
+
* pattern detection, table parsing, metadata extraction, and multi-page scraping.
|
|
6
|
+
*/
|
|
7
|
+
// Main entry point
|
|
8
|
+
export { Extractor } from './extractor.js';
|
|
9
|
+
// Transforms (pure functions, usable standalone)
|
|
10
|
+
export { applyTransform, applyTransforms, applyPattern, coerceType, parsePrice, parseDate, resolveUrl, } from './transforms.js';
|
|
11
|
+
// Selector engine (pure functions, usable standalone)
|
|
12
|
+
export { extractAll, extractFirst, count, } from './selector-engine.js';
|
|
13
|
+
// Table extractor (pure functions, usable standalone)
|
|
14
|
+
export { extractTable, extractGrid, extractDefinitionList, detectTables, } from './table-extractor.js';
|
|
15
|
+
// Meta extractor (pure functions, usable standalone)
|
|
16
|
+
export { extractMeta, extractJsonLd, extractOpenGraph, extractTwitterCard, extractMicrodata, extractFeeds, extractCanonical, extractFavicon, } from './meta-extractor.js';
|
|
17
|
+
// Pattern detector (pure functions, usable standalone)
|
|
18
|
+
export { detect, detectAndExtract, } from './pattern-detector.js';
|
|
19
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/extraction/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,mBAAmB;AACnB,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAuB3C,iDAAiD;AACjD,OAAO,EACL,cAAc,EACd,eAAe,EACf,YAAY,EACZ,UAAU,EACV,UAAU,EACV,SAAS,EACT,UAAU,GACX,MAAM,iBAAiB,CAAC;AAEzB,sDAAsD;AACtD,OAAO,EACL,UAAU,EACV,YAAY,EACZ,KAAK,GACN,MAAM,sBAAsB,CAAC;AAE9B,sDAAsD;AACtD,OAAO,EACL,YAAY,EACZ,WAAW,EACX,qBAAqB,EACrB,YAAY,GACb,MAAM,sBAAsB,CAAC;AAE9B,qDAAqD;AACrD,OAAO,EACL,WAAW,EACX,aAAa,EACb,gBAAgB,EAChB,kBAAkB,EAClB,gBAAgB,EAChB,YAAY,EACZ,gBAAgB,EAChB,cAAc,GACf,MAAM,qBAAqB,CAAC;AAE7B,uDAAuD;AACvD,OAAO,EACL,MAAM,EACN,gBAAgB,GACjB,MAAM,uBAAuB,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extract from lists, cards, grids — any repeating DOM pattern with a known container.
|
|
3
|
+
*
|
|
4
|
+
* Provides auto-field detection: infers field names from child element semantics
|
|
5
|
+
* (headings → title, img → image, a → link, price-like → price, etc.).
|
|
6
|
+
*/
|
|
7
|
+
import type { ExtractedRecord, FieldSpec, ListOptions } from './types.js';
|
|
8
|
+
/**
|
|
9
|
+
* Extract items from a container with optional auto-field detection.
|
|
10
|
+
*/
|
|
11
|
+
export declare function extract(html: string, containerSelector: string, options?: ListOptions): ExtractedRecord[];
|
|
12
|
+
/**
|
|
13
|
+
* Analyze child elements to infer field names and selectors.
|
|
14
|
+
*/
|
|
15
|
+
export declare function analyzeStructure(html: string, containerSelector: string, itemSelector?: string): Record<string, FieldSpec>;
|
|
16
|
+
/**
|
|
17
|
+
* Extract a simple <ul>/<ol> list as string[].
|
|
18
|
+
*/
|
|
19
|
+
export declare function extractList(html: string, selector?: string): string[];
|
|
20
|
+
/**
|
|
21
|
+
* Extract card-like repeating structures.
|
|
22
|
+
*/
|
|
23
|
+
export declare function extractCards(html: string, containerSelector: string, cardSelector?: string): ExtractedRecord[];
|
|
24
|
+
//# sourceMappingURL=list-extractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"list-extractor.d.ts","sourceRoot":"","sources":["../../src/extraction/list-extractor.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,OAAO,KAAK,EAAE,eAAe,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAG1E;;GAEG;AACH,wBAAgB,OAAO,CACrB,IAAI,EAAE,MAAM,EACZ,iBAAiB,EAAE,MAAM,EACzB,OAAO,CAAC,EAAE,WAAW,GACpB,eAAe,EAAE,CAiBnB;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,IAAI,EAAE,MAAM,EACZ,iBAAiB,EAAE,MAAM,EACzB,YAAY,CAAC,EAAE,MAAM,GACpB,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAY3B;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CAWrE;AAED;;GAEG;AACH,wBAAgB,YAAY,CAC1B,IAAI,EAAE,MAAM,EACZ,iBAAiB,EAAE,MAAM,EACzB,YAAY,CAAC,EAAE,MAAM,GACpB,eAAe,EAAE,CAEnB"}
|
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extract from lists, cards, grids — any repeating DOM pattern with a known container.
|
|
3
|
+
*
|
|
4
|
+
* Provides auto-field detection: infers field names from child element semantics
|
|
5
|
+
* (headings → title, img → image, a → link, price-like → price, etc.).
|
|
6
|
+
*/
|
|
7
|
+
import { load } from 'cheerio';
|
|
8
|
+
import { extractAll } from './selector-engine.js';
|
|
9
|
+
/**
|
|
10
|
+
* Extract items from a container with optional auto-field detection.
|
|
11
|
+
*/
|
|
12
|
+
export function extract(html, containerSelector, options) {
|
|
13
|
+
const $ = load(html);
|
|
14
|
+
const container = $(containerSelector).first();
|
|
15
|
+
if (container.length === 0)
|
|
16
|
+
return [];
|
|
17
|
+
const itemSelector = options?.itemSelector ?? detectItemSelector($, container);
|
|
18
|
+
if (!itemSelector)
|
|
19
|
+
return [];
|
|
20
|
+
const fields = options?.fields ?? analyzeStructure(html, containerSelector, itemSelector);
|
|
21
|
+
let results = extractAll(html, `${containerSelector} ${itemSelector}`, fields);
|
|
22
|
+
if (options?.limit) {
|
|
23
|
+
results = results.slice(0, options.limit);
|
|
24
|
+
}
|
|
25
|
+
return results;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Analyze child elements to infer field names and selectors.
|
|
29
|
+
*/
|
|
30
|
+
export function analyzeStructure(html, containerSelector, itemSelector) {
|
|
31
|
+
const $ = load(html);
|
|
32
|
+
const container = $(containerSelector).first();
|
|
33
|
+
if (container.length === 0)
|
|
34
|
+
return {};
|
|
35
|
+
const effectiveItemSelector = itemSelector ?? detectItemSelector($, container);
|
|
36
|
+
if (!effectiveItemSelector)
|
|
37
|
+
return {};
|
|
38
|
+
const firstItem = container.find(effectiveItemSelector).first();
|
|
39
|
+
if (firstItem.length === 0)
|
|
40
|
+
return {};
|
|
41
|
+
return inferFields($, firstItem);
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Extract a simple <ul>/<ol> list as string[].
|
|
45
|
+
*/
|
|
46
|
+
export function extractList(html, selector) {
|
|
47
|
+
const $ = load(html);
|
|
48
|
+
const list = $(selector ?? 'ul, ol').first();
|
|
49
|
+
if (list.length === 0)
|
|
50
|
+
return [];
|
|
51
|
+
const items = [];
|
|
52
|
+
list.find('li').each((_i, el) => {
|
|
53
|
+
const text = $(el).text().trim();
|
|
54
|
+
if (text)
|
|
55
|
+
items.push(text);
|
|
56
|
+
});
|
|
57
|
+
return items;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Extract card-like repeating structures.
|
|
61
|
+
*/
|
|
62
|
+
export function extractCards(html, containerSelector, cardSelector) {
|
|
63
|
+
return extract(html, containerSelector, { itemSelector: cardSelector });
|
|
64
|
+
}
|
|
65
|
+
// ==================== Internal ====================
|
|
66
|
+
function cssEsc(value) {
|
|
67
|
+
return value.replace(/([!"#$%&'()*+,./:;<=>?@[\\\]^`{|}~])/g, '\\$1');
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Detect the most likely item selector within a container.
|
|
71
|
+
*/
|
|
72
|
+
function detectItemSelector($, container) {
|
|
73
|
+
const children = container.children();
|
|
74
|
+
if (children.length === 0)
|
|
75
|
+
return null;
|
|
76
|
+
// Count tag+class combos among direct children
|
|
77
|
+
const comboCounts = new Map();
|
|
78
|
+
children.each((_i, el) => {
|
|
79
|
+
if (el.type !== 'tag')
|
|
80
|
+
return;
|
|
81
|
+
const tag = el.tagName.toLowerCase();
|
|
82
|
+
const rawCls = $(el).attr('class')?.split(/\s+/)[0];
|
|
83
|
+
const key = rawCls ? `${tag}.${cssEsc(rawCls)}` : tag;
|
|
84
|
+
comboCounts.set(key, (comboCounts.get(key) ?? 0) + 1);
|
|
85
|
+
});
|
|
86
|
+
// Find the most common combo with at least 2 occurrences
|
|
87
|
+
let bestSelector = null;
|
|
88
|
+
let bestCount = 1;
|
|
89
|
+
for (const [selector, count] of comboCounts) {
|
|
90
|
+
if (count > bestCount) {
|
|
91
|
+
bestCount = count;
|
|
92
|
+
bestSelector = selector;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
// Also check common list patterns
|
|
96
|
+
if (!bestSelector) {
|
|
97
|
+
const li = container.find('> li');
|
|
98
|
+
if (li.length >= 2)
|
|
99
|
+
return 'li';
|
|
100
|
+
const tr = container.find('> tr');
|
|
101
|
+
if (tr.length >= 2)
|
|
102
|
+
return 'tr';
|
|
103
|
+
}
|
|
104
|
+
return bestSelector ? `> ${bestSelector}` : null;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Infer field names from the structure of a representative item element.
|
|
108
|
+
*/
|
|
109
|
+
function inferFields($, item) {
|
|
110
|
+
const fields = {};
|
|
111
|
+
const usedNames = new Set();
|
|
112
|
+
const itemTag = item[0]?.tagName?.toLowerCase();
|
|
113
|
+
// If the item itself is an <a>, treat it as a link with title
|
|
114
|
+
if (itemTag === 'a') {
|
|
115
|
+
fields['link'] = '@href';
|
|
116
|
+
fields['title'] = ''; // empty selector = the item's own text
|
|
117
|
+
usedNames.add('link');
|
|
118
|
+
usedNames.add('title');
|
|
119
|
+
}
|
|
120
|
+
// If the item itself is a heading, treat it as a title
|
|
121
|
+
if (!usedNames.has('title') && itemTag && /^h[1-6]$/.test(itemTag)) {
|
|
122
|
+
fields['title'] = '';
|
|
123
|
+
usedNames.add('title');
|
|
124
|
+
}
|
|
125
|
+
// --- Microdata / schema.org detection (itemprop attributes) ---
|
|
126
|
+
const microdataMap = {
|
|
127
|
+
'name': 'title',
|
|
128
|
+
'headline': 'title',
|
|
129
|
+
'price': 'price',
|
|
130
|
+
'description': 'description',
|
|
131
|
+
'image': 'image',
|
|
132
|
+
'url': 'link',
|
|
133
|
+
'datePublished': 'date',
|
|
134
|
+
'dateCreated': 'date',
|
|
135
|
+
};
|
|
136
|
+
for (const [prop, fieldName] of Object.entries(microdataMap)) {
|
|
137
|
+
if (usedNames.has(fieldName))
|
|
138
|
+
continue;
|
|
139
|
+
const el = item.find(`[itemprop="${prop}"]`).first();
|
|
140
|
+
if (el.length === 0)
|
|
141
|
+
continue;
|
|
142
|
+
const tag = el[0]?.tagName?.toLowerCase() ?? '';
|
|
143
|
+
if (fieldName === 'image' && tag === 'img') {
|
|
144
|
+
fields[fieldName] = `[itemprop="${prop}"]@src`;
|
|
145
|
+
}
|
|
146
|
+
else if (fieldName === 'link' && tag === 'a') {
|
|
147
|
+
fields[fieldName] = `[itemprop="${prop}"]@href`;
|
|
148
|
+
}
|
|
149
|
+
else if (fieldName === 'date' && tag === 'time') {
|
|
150
|
+
fields[fieldName] = { selector: `[itemprop="${prop}"]`, attribute: 'datetime' };
|
|
151
|
+
}
|
|
152
|
+
else if (fieldName === 'price') {
|
|
153
|
+
const content = el.attr('content');
|
|
154
|
+
if (content) {
|
|
155
|
+
fields[fieldName] = { selector: `[itemprop="${prop}"]`, attribute: 'content' };
|
|
156
|
+
}
|
|
157
|
+
else {
|
|
158
|
+
fields[fieldName] = `[itemprop="${prop}"]`;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
else {
|
|
162
|
+
fields[fieldName] = `[itemprop="${prop}"]`;
|
|
163
|
+
}
|
|
164
|
+
usedNames.add(fieldName);
|
|
165
|
+
}
|
|
166
|
+
// --- Standard field detection ---
|
|
167
|
+
// Check for headings → title
|
|
168
|
+
if (!usedNames.has('title')) {
|
|
169
|
+
const heading = item.find('h1, h2, h3, h4, h5, h6').first();
|
|
170
|
+
if (heading.length > 0) {
|
|
171
|
+
const tag = heading[0]?.tagName?.toLowerCase() ?? 'h2';
|
|
172
|
+
fields['title'] = tag;
|
|
173
|
+
usedNames.add('title');
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
// Check for images → image
|
|
177
|
+
if (!usedNames.has('image')) {
|
|
178
|
+
const img = item.find('img').first();
|
|
179
|
+
if (img.length > 0) {
|
|
180
|
+
fields['image'] = 'img@src';
|
|
181
|
+
usedNames.add('image');
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
// Check for links → link
|
|
185
|
+
if (!usedNames.has('link')) {
|
|
186
|
+
const link = item.find('a').first();
|
|
187
|
+
if (link.length > 0) {
|
|
188
|
+
fields['link'] = 'a@href';
|
|
189
|
+
if (!usedNames.has('title') && link.text().trim()) {
|
|
190
|
+
fields['title'] = 'a';
|
|
191
|
+
usedNames.add('title');
|
|
192
|
+
}
|
|
193
|
+
usedNames.add('link');
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
// Check for price-like content — first by class, then by text pattern
|
|
197
|
+
if (!usedNames.has('price')) {
|
|
198
|
+
const priceEl = item.find('[class*="price"], [data-price]').first();
|
|
199
|
+
if (priceEl.length > 0) {
|
|
200
|
+
const cls = priceEl.attr('class')?.split(/\s+/)[0];
|
|
201
|
+
fields['price'] = cls ? `.${cssEsc(cls)}` : '[class*="price"]';
|
|
202
|
+
usedNames.add('price');
|
|
203
|
+
}
|
|
204
|
+
else {
|
|
205
|
+
// Text-based price detection: look for currency symbols + digits
|
|
206
|
+
const priceRegex = /[$€£¥₹₩]\s*[\d,.]+|[\d,.]+\s*[$€£¥₹₩]/;
|
|
207
|
+
item.find('*').each((_i, el) => {
|
|
208
|
+
if (usedNames.has('price'))
|
|
209
|
+
return;
|
|
210
|
+
if (el.type !== 'tag')
|
|
211
|
+
return;
|
|
212
|
+
const $el = $(el);
|
|
213
|
+
// Only consider leaf-ish elements (no children or only text children)
|
|
214
|
+
if ($el.children().length > 2)
|
|
215
|
+
return;
|
|
216
|
+
const text = $el.text().trim();
|
|
217
|
+
if (priceRegex.test(text) && text.length < 30) {
|
|
218
|
+
const tag = el.tagName.toLowerCase();
|
|
219
|
+
const cls = $el.attr('class')?.split(/\s+/)[0];
|
|
220
|
+
fields['price'] = cls ? `.${cssEsc(cls)}` : tag;
|
|
221
|
+
usedNames.add('price');
|
|
222
|
+
}
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
// Check for time/date elements
|
|
227
|
+
if (!usedNames.has('date')) {
|
|
228
|
+
const timeEl = item.find('time, [class*="date"], [class*="time"]').first();
|
|
229
|
+
if (timeEl.length > 0) {
|
|
230
|
+
const tag = timeEl[0]?.tagName?.toLowerCase();
|
|
231
|
+
if (tag === 'time') {
|
|
232
|
+
fields['date'] = { selector: 'time', attribute: 'datetime' };
|
|
233
|
+
}
|
|
234
|
+
else {
|
|
235
|
+
const cls = timeEl.attr('class')?.split(/\s+/)[0];
|
|
236
|
+
fields['date'] = cls ? `.${cssEsc(cls)}` : '[class*="date"]';
|
|
237
|
+
}
|
|
238
|
+
usedNames.add('date');
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
// Check for rating/star content
|
|
242
|
+
if (!usedNames.has('rating')) {
|
|
243
|
+
const ratingEl = item.find('[class*="rating"], [class*="star"], [data-rating], [aria-label*="rating" i], [aria-label*="star" i]').first();
|
|
244
|
+
if (ratingEl.length > 0) {
|
|
245
|
+
const dataRating = ratingEl.attr('data-rating');
|
|
246
|
+
if (dataRating) {
|
|
247
|
+
fields['rating'] = { selector: '[data-rating]', attribute: 'data-rating' };
|
|
248
|
+
}
|
|
249
|
+
else {
|
|
250
|
+
const ariaLabel = ratingEl.attr('aria-label');
|
|
251
|
+
if (ariaLabel) {
|
|
252
|
+
fields['rating'] = { selector: '[aria-label]', attribute: 'aria-label' };
|
|
253
|
+
}
|
|
254
|
+
else {
|
|
255
|
+
const cls = ratingEl.attr('class')?.split(/\s+/)[0];
|
|
256
|
+
fields['rating'] = cls ? `.${cssEsc(cls)}` : '[class*="rating"]';
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
usedNames.add('rating');
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
// Check for badges/status/tags
|
|
263
|
+
if (!usedNames.has('badge')) {
|
|
264
|
+
const badgeEl = item.find('[class*="badge"]:not(a), [class*="status"]:not(a), [class*="label"]:not(label):not(a), [class*="chip"]:not(a)').first();
|
|
265
|
+
if (badgeEl.length > 0) {
|
|
266
|
+
const cls = badgeEl.attr('class')?.split(/\s+/)[0];
|
|
267
|
+
fields['badge'] = cls ? `.${cssEsc(cls)}` : '[class*="badge"]';
|
|
268
|
+
usedNames.add('badge');
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
// Check for description/body text (longer text blocks or <p> elements)
|
|
272
|
+
if (!usedNames.has('description')) {
|
|
273
|
+
// First, look for <p> elements anywhere in the item (up to depth 3)
|
|
274
|
+
const pEl = item.find('p').first();
|
|
275
|
+
if (pEl.length > 0 && pEl.text().trim().length > 10) {
|
|
276
|
+
fields['description'] = 'p';
|
|
277
|
+
usedNames.add('description');
|
|
278
|
+
}
|
|
279
|
+
// Fallback: check all child elements for longer text (threshold lowered to 20)
|
|
280
|
+
if (!usedNames.has('description')) {
|
|
281
|
+
item.find('*').each((_i, el) => {
|
|
282
|
+
if (usedNames.has('description'))
|
|
283
|
+
return;
|
|
284
|
+
if (el.type !== 'tag')
|
|
285
|
+
return;
|
|
286
|
+
const $el = $(el);
|
|
287
|
+
const tag = el.tagName.toLowerCase();
|
|
288
|
+
if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'img', 'a', 'time', 'p'].includes(tag))
|
|
289
|
+
return;
|
|
290
|
+
if ($el.find('h1, h2, h3, h4, h5, h6, img').length > 0)
|
|
291
|
+
return; // Skip containers of other fields
|
|
292
|
+
const text = $el.text().trim();
|
|
293
|
+
if (text.length > 20) {
|
|
294
|
+
const cls = $el.attr('class')?.split(/\s+/)[0];
|
|
295
|
+
fields['description'] = cls ? `.${cssEsc(cls)}` : tag;
|
|
296
|
+
usedNames.add('description');
|
|
297
|
+
}
|
|
298
|
+
});
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
return fields;
|
|
302
|
+
}
|
|
303
|
+
//# sourceMappingURL=list-extractor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"list-extractor.js","sourceRoot":"","sources":["../../src/extraction/list-extractor.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,IAAI,EAAiC,MAAM,SAAS,CAAC;AAG9D,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAElD;;GAEG;AACH,MAAM,UAAU,OAAO,CACrB,IAAY,EACZ,iBAAyB,EACzB,OAAqB;IAErB,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,MAAM,SAAS,GAAG,CAAC,CAAC,iBAAiB,CAAC,CAAC,KAAK,EAAE,CAAC;IAC/C,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEtC,MAAM,YAAY,GAAG,OAAO,EAAE,YAAY,IAAI,kBAAkB,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;IAC/E,IAAI,CAAC,YAAY;QAAE,OAAO,EAAE,CAAC;IAE7B,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,gBAAgB,CAAC,IAAI,EAAE,iBAAiB,EAAE,YAAY,CAAC,CAAC;IAE1F,IAAI,OAAO,GAAG,UAAU,CAAC,IAAI,EAAE,GAAG,iBAAiB,IAAI,YAAY,EAAE,EAAE,MAAM,CAAC,CAAC;IAE/E,IAAI,OAAO,EAAE,KAAK,EAAE,CAAC;QACnB,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC;IAC5C,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAC9B,IAAY,EACZ,iBAAyB,EACzB,YAAqB;IAErB,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,MAAM,SAAS,GAAG,CAAC,CAAC,iBAAiB,CAAC,CAAC,KAAK,EAAE,CAAC;IAC/C,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEtC,MAAM,qBAAqB,GAAG,YAAY,IAAI,kBAAkB,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;IAC/E,IAAI,CAAC,qBAAqB;QAAE,OAAO,EAAE,CAAC;IAEtC,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE,CAAC;IAChE,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEtC,OAAO,WAAW,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;AACnC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,IAAY,EAAE,QAAiB;IACzD,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,MAAM,IAAI,GAAG,CAAC,CAAC,QAAQ,IAAI,QAAQ,CAAC,CAAC,KAAK,EAAE,CAAC;IAC7C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEjC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QAC9B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QACjC,IAAI,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IACH,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAC1B,IAAY,EACZ,iBAAyB,EACzB,YAAqB;IAErB,OAAO,OAAO,CAAC,IAAI,EAAE,iBAAiB,EAAE,EAAE,YAAY,EAAE,YAAY,EAAE,CAAC,CAAC;AAC1E,CAAC;AAED,qDAAqD;AAErD,SAAS,MAAM,CAAC,KAAa;IAC3B,OAAO,KAAK,CAAC,OAAO,CAAC,uCAAuC,EAAE,MAAM,CAAC,CAAC;AACxE,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB,CAAC,CAAa,EAAE,SAA2B;IACpE,MAAM,QAAQ,GAAG,SAAS,CAAC,QAAQ,EAAE,CAAC;IACtC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEvC,+CAA+C;IAC/C,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC9C,QAAQ,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QACvB,IAAI,EAAE,CAAC,IAAI,KAAK,KAAK;YAAE,OAAO;QAC9B,MAAM,GAAG,GAAI,EAA0B,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;QAC9D,MAAM,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QACpD,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,GAAG,IAAI,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QACtD,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACxD,CAAC,CAAC,CAAC;IAEH,yDAAyD;IACzD,IAAI,YAAY,GAAkB,IAAI,CAAC;IACvC,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,WAAW,EAAE,CAAC;QAC5C,IAAI,KAAK,GAAG,SAAS,EAAE,CAAC;YACtB,SAAS,GAAG,KAAK,CAAC;YAClB,YAAY,GAAG,QAAQ,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,kCAAkC;IAClC,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,MAAM,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAClC,IAAI,EAAE,CAAC,MAAM,IAAI,CAAC;YAAE,OAAO,IAAI,CAAC;QAChC,MAAM,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAClC,IAAI,EAAE,CAAC,MAAM,IAAI,CAAC;YAAE,OAAO,IAAI,CAAC;IAClC,CAAC;IAED,OAAO,YAAY,CAAC,CAAC,CAAC,KAAK,YAAY,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;AACnD,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAClB,CAAa,EACb,IAAsB;IAEtB,MAAM,MAAM,GAA8B,EAAE,CAAC;IAC7C,MAAM,SAAS,GAAG,IAAI,GAAG,EAAU,CAAC;IAEpC,MAAM,OAAO,GAAI,IAAI,CAAC,CAAC,CAA0B,EAAE,OAAO,EAAE,WAAW,EAAE,CAAC;IAE1E,8DAA8D;IAC9D,IAAI,OAAO,KAAK,GAAG,EAAE,CAAC;QACpB,MAAM,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC;QACzB,MAAM,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAE,uCAAuC;QAC9D,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACtB,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IACzB,CAAC;IAED,uDAAuD;IACvD,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,OAAO,IAAI,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QACnE,MAAM,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC;QACrB,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IACzB,CAAC;IAED,iEAAiE;IACjE,MAAM,YAAY,GAA2B;QAC3C,MAAM,EAAE,OAAO;QACf,UAAU,EAAE,OAAO;QACnB,OAAO,EAAE,OAAO;QAChB,aAAa,EAAE,aAAa;QAC5B,OAAO,EAAE,OAAO;QAChB,KAAK,EAAE,MAAM;QACb,eAAe,EAAE,MAAM;QACvB,aAAa,EAAE,MAAM;KACtB,CAAC;IAEF,KAAK,MAAM,CAAC,IAAI,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC;QAC7D,IAAI,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC;YAAE,SAAS;QACvC,MAAM,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,IAAI,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;QACrD,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAE9B,MAAM,GAAG,GAAI,EAAE,CAAC,CAAC,CAA0B,EAAE,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;QAC1E,IAAI,SAAS,KAAK,OAAO,IAAI,GAAG,KAAK,KAAK,EAAE,CAAC;YAC3C,MAAM,CAAC,SAAS,CAAC,GAAG,cAAc,IAAI,QAAQ,CAAC;QACjD,CAAC;aAAM,IAAI,SAAS,KAAK,MAAM,IAAI,GAAG,KAAK,GAAG,EAAE,CAAC;YAC/C,MAAM,CAAC,SAAS,CAAC,GAAG,cAAc,IAAI,SAAS,CAAC;QAClD,CAAC;aAAM,IAAI,SAAS,KAAK,MAAM,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;YAClD,MAAM,CAAC,SAAS,CAAC,GAAG,EAAE,QAAQ,EAAE,cAAc,IAAI,IAAI,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC;QAClF,CAAC;aAAM,IAAI,SAAS,KAAK,OAAO,EAAE,CAAC;YACjC,MAAM,OAAO,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACnC,IAAI,OAAO,EAAE,CAAC;gBACZ,MAAM,CAAC,SAAS,CAAC,GAAG,EAAE,QAAQ,EAAE,cAAc,IAAI,IAAI,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC;YACjF,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,SAAS,CAAC,GAAG,cAAc,IAAI,IAAI,CAAC;YAC7C,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,SAAS,CAAC,GAAG,cAAc,IAAI,IAAI,CAAC;QAC7C,CAAC;QACD,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IAC3B,CAAC;IAED,mCAAmC;IAEnC,6BAA6B;IAC7B,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;QAC5B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC,KAAK,EAAE,CAAC;QAC5D,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,MAAM,GAAG,GAAI,OAAO,CAAC,CAAC,CAA0B,EAAE,OAAO,EAAE,WAAW,EAAE,IAAI,IAAI,CAAC;YACjF,MAAM,CAAC,OAAO,CAAC,GAAG,GAAG,CAAC;YACtB,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QACzB,CAAC;IACH,CAAC;IAED,2BAA2B;IAC3B,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;QAC5B,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,CAAC;QACrC,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnB,MAAM,CAAC,OAAO,CAAC,GAAG,SAAS,CAAC;YAC5B,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QACzB,CAAC;IACH,CAAC;IAED,yBAAyB;IACzB,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC;QACpC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,MAAM,CAAC,MAAM,CAAC,GAAG,QAAQ,CAAC;YAC1B,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC;gBAClD,MAAM,CAAC,OAAO,CAAC,GAAG,GAAG,CAAC;gBACtB,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YACzB,CAAC;YACD,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACxB,CAAC;IACH,CAAC;IAED,sEAAsE;IACtE,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;QAC5B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC,KAAK,EAAE,CAAC;QACpE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACnD,MAAM,CAAC,OAAO,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,kBAAkB,CAAC;YAC/D,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QACzB,CAAC;aAAM,CAAC;YACN,iEAAiE;YACjE,MAAM,UAAU,GAAG,uCAAuC,CAAC;YAC3D,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;gBAC7B,IAAI,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC;oBAAE,OAAO;gBACnC,IAAI,EAAE,CAAC,IAAI,KAAK,KAAK;oBAAE,OAAO;gBAC9B,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;gBAClB,sEAAsE;gBACtE,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC,MAAM,GAAG,CAAC;oBAAE,OAAO;gBACtC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBAC/B,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;oBAC9C,MAAM,GAAG,GAAI,EAA0B,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;oBAC9D,MAAM,GAAG,GAAG,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;oBAC/C,MAAM,CAAC,OAAO,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;oBAChD,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;gBACzB,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,+BAA+B;IAC/B,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;QAC3B,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC,KAAK,EAAE,CAAC;QAC3E,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,MAAM,GAAG,GAAI,MAAM,CAAC,CAAC,CAA0B,EAAE,OAAO,EAAE,WAAW,EAAE,CAAC;YACxE,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;gBACnB,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC;YAC/D,CAAC;iBAAM,CAAC;gBACN,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;gBAClD,MAAM,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,iBAAiB,CAAC;YAC/D,CAAC;YACD,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACxB,CAAC;IACH,CAAC;IAED,gCAAgC;IAChC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,qGAAqG,CAAC,CAAC,KAAK,EAAE,CAAC;QAC1I,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,MAAM,UAAU,GAAG,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;YAChD,IAAI,UAAU,EAAE,CAAC;gBACf,MAAM,CAAC,QAAQ,CAAC,GAAG,EAAE,QAAQ,EAAE,eAAe,EAAE,SAAS,EAAE,aAAa,EAAE,CAAC;YAC7E,CAAC;iBAAM,CAAC;gBACN,MAAM,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;gBAC9C,IAAI,SAAS,EAAE,CAAC;oBACd,MAAM,CAAC,QAAQ,CAAC,GAAG,EAAE,QAAQ,EAAE,cAAc,EAAE,SAAS,EAAE,YAAY,EAAE,CAAC;gBAC3E,CAAC;qBAAM,CAAC;oBACN,MAAM,GAAG,GAAG,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;oBACpD,MAAM,CAAC,QAAQ,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,mBAAmB,CAAC;gBACnE,CAAC;YACH,CAAC;YACD,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,+BAA+B;IAC/B,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;QAC5B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,+GAA+G,CAAC,CAAC,KAAK,EAAE,CAAC;QACnJ,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACnD,MAAM,CAAC,OAAO,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,kBAAkB,CAAC;YAC/D,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QACzB,CAAC;IACH,CAAC;IAED,uEAAuE;IACvE,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,oEAAoE;QACpE,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC;QACnC,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YACpD,MAAM,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC;YAC5B,SAAS,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;QAC/B,CAAC;QAED,+EAA+E;QAC/E,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,aAAa,CAAC,EAAE,CAAC;YAClC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;gBAC7B,IAAI,SAAS,CAAC,GAAG,CAAC,aAAa,CAAC;oBAAE,OAAO;gBACzC,IAAI,EAAE,CAAC,IAAI,KAAK,KAAK;oBAAE,OAAO;gBAC9B,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;gBAClB,MAAM,GAAG,GAAI,EAA0B,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;gBAC9D,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC;oBAAE,OAAO;gBACxF,IAAI,GAAG,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC,MAAM,GAAG,CAAC;oBAAE,OAAO,CAAC,kCAAkC;gBAClG,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBAC/B,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;oBACrB,MAAM,GAAG,GAAG,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;oBAC/C,MAAM,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;oBACtD,SAAS,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;gBAC/B,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured metadata extraction from HTML.
|
|
3
|
+
*
|
|
4
|
+
* Extracts JSON-LD, OpenGraph, Twitter Card, microdata (itemprop/itemscope),
|
|
5
|
+
* canonical URLs, favicons, and RSS/Atom feeds.
|
|
6
|
+
*/
|
|
7
|
+
import type { MetaData, FeedLink, ExtractedRecord } from './types.js';
|
|
8
|
+
/**
|
|
9
|
+
* Extract all structured metadata from HTML.
|
|
10
|
+
*/
|
|
11
|
+
export declare function extractMeta(html: string): MetaData;
|
|
12
|
+
/**
|
|
13
|
+
* Extract JSON-LD structured data.
|
|
14
|
+
*/
|
|
15
|
+
export declare function extractJsonLd(html: string): object[];
|
|
16
|
+
/**
|
|
17
|
+
* Extract OpenGraph meta tags.
|
|
18
|
+
*/
|
|
19
|
+
export declare function extractOpenGraph(html: string): Record<string, string>;
|
|
20
|
+
/**
|
|
21
|
+
* Extract Twitter Card meta tags.
|
|
22
|
+
*/
|
|
23
|
+
export declare function extractTwitterCard(html: string): Record<string, string>;
|
|
24
|
+
/**
|
|
25
|
+
* Extract microdata (itemprop/itemscope) from HTML.
|
|
26
|
+
*/
|
|
27
|
+
export declare function extractMicrodata(html: string): ExtractedRecord[];
|
|
28
|
+
/**
|
|
29
|
+
* Extract RSS/Atom feed links.
|
|
30
|
+
*/
|
|
31
|
+
export declare function extractFeeds(html: string): FeedLink[];
|
|
32
|
+
/**
|
|
33
|
+
* Extract the canonical URL.
|
|
34
|
+
*/
|
|
35
|
+
export declare function extractCanonical(html: string): string | null;
|
|
36
|
+
/**
|
|
37
|
+
* Extract the favicon URL.
|
|
38
|
+
*/
|
|
39
|
+
export declare function extractFavicon(html: string): string | null;
|
|
40
|
+
//# sourceMappingURL=meta-extractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"meta-extractor.d.ts","sourceRoot":"","sources":["../../src/extraction/meta-extractor.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAEtE;;GAEG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,QAAQ,CA8DlD;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAoBpD;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAarE;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAavE;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,eAAe,EAAE,CA8ChE;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,QAAQ,EAAE,CAuBrD;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAI5D;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAI1D"}
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured metadata extraction from HTML.
|
|
3
|
+
*
|
|
4
|
+
* Extracts JSON-LD, OpenGraph, Twitter Card, microdata (itemprop/itemscope),
|
|
5
|
+
* canonical URLs, favicons, and RSS/Atom feeds.
|
|
6
|
+
*/
|
|
7
|
+
import { load } from 'cheerio';
|
|
8
|
+
/**
|
|
9
|
+
* Extract all structured metadata from HTML.
|
|
10
|
+
*/
|
|
11
|
+
export function extractMeta(html) {
|
|
12
|
+
const $ = load(html);
|
|
13
|
+
const result = {
|
|
14
|
+
title: null,
|
|
15
|
+
description: null,
|
|
16
|
+
canonical: null,
|
|
17
|
+
og: {},
|
|
18
|
+
twitter: {},
|
|
19
|
+
jsonLd: [],
|
|
20
|
+
microdata: [],
|
|
21
|
+
feeds: [],
|
|
22
|
+
favicon: null,
|
|
23
|
+
other: {},
|
|
24
|
+
};
|
|
25
|
+
// Title
|
|
26
|
+
const titleEl = $('title').first();
|
|
27
|
+
if (titleEl.length > 0) {
|
|
28
|
+
result.title = titleEl.text().trim() || null;
|
|
29
|
+
}
|
|
30
|
+
// Canonical
|
|
31
|
+
const canonicalEl = $('link[rel="canonical"]').first();
|
|
32
|
+
if (canonicalEl.length > 0) {
|
|
33
|
+
result.canonical = canonicalEl.attr('href') ?? null;
|
|
34
|
+
}
|
|
35
|
+
// Favicon
|
|
36
|
+
const faviconEl = $('link[rel="icon"], link[rel="shortcut icon"]').first();
|
|
37
|
+
if (faviconEl.length > 0) {
|
|
38
|
+
result.favicon = faviconEl.attr('href') ?? null;
|
|
39
|
+
}
|
|
40
|
+
// Meta tags
|
|
41
|
+
$('meta').each((_i, el) => {
|
|
42
|
+
const $el = $(el);
|
|
43
|
+
const name = $el.attr('name') ?? $el.attr('property') ?? '';
|
|
44
|
+
const content = $el.attr('content') ?? '';
|
|
45
|
+
if (!name || !content)
|
|
46
|
+
return;
|
|
47
|
+
if (name === 'description') {
|
|
48
|
+
result.description = content;
|
|
49
|
+
}
|
|
50
|
+
else if (name.startsWith('og:')) {
|
|
51
|
+
result.og[name.slice(3)] = content;
|
|
52
|
+
}
|
|
53
|
+
else if (name.startsWith('twitter:')) {
|
|
54
|
+
result.twitter[name.slice(8)] = content;
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
result.other[name] = content;
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
// JSON-LD
|
|
61
|
+
result.jsonLd = extractJsonLd(html);
|
|
62
|
+
// Microdata
|
|
63
|
+
result.microdata = extractMicrodata(html);
|
|
64
|
+
// Feeds
|
|
65
|
+
result.feeds = extractFeeds(html);
|
|
66
|
+
return result;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Extract JSON-LD structured data.
|
|
70
|
+
*/
|
|
71
|
+
export function extractJsonLd(html) {
|
|
72
|
+
const $ = load(html);
|
|
73
|
+
const results = [];
|
|
74
|
+
$('script[type="application/ld+json"]').each((_i, el) => {
|
|
75
|
+
const text = $(el).html();
|
|
76
|
+
if (!text)
|
|
77
|
+
return;
|
|
78
|
+
try {
|
|
79
|
+
const parsed = JSON.parse(text);
|
|
80
|
+
if (Array.isArray(parsed)) {
|
|
81
|
+
results.push(...parsed);
|
|
82
|
+
}
|
|
83
|
+
else if (typeof parsed === 'object' && parsed !== null) {
|
|
84
|
+
results.push(parsed);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
catch {
|
|
88
|
+
// Skip malformed JSON-LD
|
|
89
|
+
}
|
|
90
|
+
});
|
|
91
|
+
return results;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Extract OpenGraph meta tags.
|
|
95
|
+
*/
|
|
96
|
+
export function extractOpenGraph(html) {
|
|
97
|
+
const $ = load(html);
|
|
98
|
+
const og = {};
|
|
99
|
+
$('meta[property^="og:"]').each((_i, el) => {
|
|
100
|
+
const prop = $(el).attr('property') ?? '';
|
|
101
|
+
const content = $(el).attr('content') ?? '';
|
|
102
|
+
if (prop && content) {
|
|
103
|
+
og[prop.slice(3)] = content;
|
|
104
|
+
}
|
|
105
|
+
});
|
|
106
|
+
return og;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Extract Twitter Card meta tags.
|
|
110
|
+
*/
|
|
111
|
+
export function extractTwitterCard(html) {
|
|
112
|
+
const $ = load(html);
|
|
113
|
+
const twitter = {};
|
|
114
|
+
$('meta[name^="twitter:"]').each((_i, el) => {
|
|
115
|
+
const name = $(el).attr('name') ?? '';
|
|
116
|
+
const content = $(el).attr('content') ?? '';
|
|
117
|
+
if (name && content) {
|
|
118
|
+
twitter[name.slice(8)] = content;
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
return twitter;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Extract microdata (itemprop/itemscope) from HTML.
|
|
125
|
+
*/
|
|
126
|
+
export function extractMicrodata(html) {
|
|
127
|
+
const $ = load(html);
|
|
128
|
+
const results = [];
|
|
129
|
+
$('[itemscope]').each((_i, el) => {
|
|
130
|
+
const $el = $(el);
|
|
131
|
+
// Skip nested itemscopes (only extract top-level)
|
|
132
|
+
if ($el.parents('[itemscope]').length > 0)
|
|
133
|
+
return;
|
|
134
|
+
const record = {};
|
|
135
|
+
const itemType = $el.attr('itemtype');
|
|
136
|
+
if (itemType)
|
|
137
|
+
record['@type'] = itemType;
|
|
138
|
+
$el.find('[itemprop]').each((_j, prop) => {
|
|
139
|
+
const $prop = $(prop);
|
|
140
|
+
// Skip props that belong to a nested itemscope
|
|
141
|
+
const closestScope = $prop.closest('[itemscope]');
|
|
142
|
+
if (closestScope.length > 0 && closestScope[0] !== el)
|
|
143
|
+
return;
|
|
144
|
+
const name = $prop.attr('itemprop') ?? '';
|
|
145
|
+
if (!name)
|
|
146
|
+
return;
|
|
147
|
+
// Extract value based on element type
|
|
148
|
+
let value = null;
|
|
149
|
+
const tag = prop.tagName?.toLowerCase() ?? '';
|
|
150
|
+
if (tag === 'meta') {
|
|
151
|
+
value = $prop.attr('content') ?? null;
|
|
152
|
+
}
|
|
153
|
+
else if (tag === 'a' || tag === 'link') {
|
|
154
|
+
value = $prop.attr('href') ?? null;
|
|
155
|
+
}
|
|
156
|
+
else if (tag === 'img') {
|
|
157
|
+
value = $prop.attr('src') ?? null;
|
|
158
|
+
}
|
|
159
|
+
else if (tag === 'time') {
|
|
160
|
+
value = $prop.attr('datetime') ?? ($prop.text().trim() || null);
|
|
161
|
+
}
|
|
162
|
+
else {
|
|
163
|
+
value = $prop.text().trim() || null;
|
|
164
|
+
}
|
|
165
|
+
record[name] = value;
|
|
166
|
+
});
|
|
167
|
+
if (Object.keys(record).length > 0) {
|
|
168
|
+
results.push(record);
|
|
169
|
+
}
|
|
170
|
+
});
|
|
171
|
+
return results;
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Extract RSS/Atom feed links.
|
|
175
|
+
*/
|
|
176
|
+
export function extractFeeds(html) {
|
|
177
|
+
const $ = load(html);
|
|
178
|
+
const feeds = [];
|
|
179
|
+
$('link[rel="alternate"]').each((_i, el) => {
|
|
180
|
+
const $el = $(el);
|
|
181
|
+
const type = $el.attr('type') ?? '';
|
|
182
|
+
const href = $el.attr('href');
|
|
183
|
+
if (!href)
|
|
184
|
+
return;
|
|
185
|
+
let feedType = 'unknown';
|
|
186
|
+
if (type.includes('rss'))
|
|
187
|
+
feedType = 'rss';
|
|
188
|
+
else if (type.includes('atom'))
|
|
189
|
+
feedType = 'atom';
|
|
190
|
+
else
|
|
191
|
+
return; // Skip non-feed alternate links
|
|
192
|
+
feeds.push({
|
|
193
|
+
type: feedType,
|
|
194
|
+
href,
|
|
195
|
+
title: $el.attr('title') ?? null,
|
|
196
|
+
});
|
|
197
|
+
});
|
|
198
|
+
return feeds;
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Extract the canonical URL.
|
|
202
|
+
*/
|
|
203
|
+
export function extractCanonical(html) {
|
|
204
|
+
const $ = load(html);
|
|
205
|
+
const el = $('link[rel="canonical"]').first();
|
|
206
|
+
return el.length > 0 ? (el.attr('href') ?? null) : null;
|
|
207
|
+
}
|
|
208
|
+
/**
|
|
209
|
+
* Extract the favicon URL.
|
|
210
|
+
*/
|
|
211
|
+
export function extractFavicon(html) {
|
|
212
|
+
const $ = load(html);
|
|
213
|
+
const el = $('link[rel="icon"], link[rel="shortcut icon"]').first();
|
|
214
|
+
return el.length > 0 ? (el.attr('href') ?? null) : null;
|
|
215
|
+
}
|
|
216
|
+
//# sourceMappingURL=meta-extractor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"meta-extractor.js","sourceRoot":"","sources":["../../src/extraction/meta-extractor.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAG/B;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IAErB,MAAM,MAAM,GAAa;QACvB,KAAK,EAAE,IAAI;QACX,WAAW,EAAE,IAAI;QACjB,SAAS,EAAE,IAAI;QACf,EAAE,EAAE,EAAE;QACN,OAAO,EAAE,EAAE;QACX,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,EAAE;QACb,KAAK,EAAE,EAAE;QACT,OAAO,EAAE,IAAI;QACb,KAAK,EAAE,EAAE;KACV,CAAC;IAEF,QAAQ;IACR,MAAM,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC;IACnC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,MAAM,CAAC,KAAK,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,IAAI,IAAI,CAAC;IAC/C,CAAC;IAED,YAAY;IACZ,MAAM,WAAW,GAAG,CAAC,CAAC,uBAAuB,CAAC,CAAC,KAAK,EAAE,CAAC;IACvD,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,MAAM,CAAC,SAAS,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC;IACtD,CAAC;IAED,UAAU;IACV,MAAM,SAAS,GAAG,CAAC,CAAC,6CAA6C,CAAC,CAAC,KAAK,EAAE,CAAC;IAC3E,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzB,MAAM,CAAC,OAAO,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC;IAClD,CAAC;IAED,YAAY;IACZ,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QACxB,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;QAClB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;QAC5D,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QAC1C,IAAI,CAAC,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO;QAE9B,IAAI,IAAI,KAAK,aAAa,EAAE,CAAC;YAC3B,MAAM,CAAC,WAAW,GAAG,OAAO,CAAC;QAC/B,CAAC;aAAM,IAAI,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YAClC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC;QACrC,CAAC;aAAM,IAAI,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YACvC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC;QAC1C,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC;QAC/B,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,UAAU;IACV,MAAM,CAAC,MAAM,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;IAEpC,YAAY;IACZ,MAAM,CAAC,SAAS,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;IAE1C,QAAQ;IACR,MAAM,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAElC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,CAAC,CAAC,oCAAoC,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QACtD,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QAC1B,IAAI,CAAC,IAAI;YAAE,OAAO;QAClB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAChC,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC1B,OAAO,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;YAC1B,CAAC;iBAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;gBACzD,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,yBAAyB;QAC3B,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,MAAM,EAAE,GAA2B,EAAE,CAAC;IAEtC,CAAC,CAAC,uBAAuB,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QACzC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;QAC1C,MAAM,OAAO,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QAC5C,IAAI,IAAI,IAAI,OAAO,EAAE,CAAC;YACpB,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC;QAC9B,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAC7C,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,MAAM,OAAO,GAA2B,EAAE,CAAC;IAE3C,CAAC,CAAC,wBAAwB,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QAC1C,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACtC,MAAM,OAAO,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QAC5C,IAAI,IAAI,IAAI,OAAO,EAAE,CAAC;YACpB,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC;QACnC,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,MAAM,OAAO,GAAsB,EAAE,CAAC;IAEtC,CAAC,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QAC/B,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;QAClB,kDAAkD;QAClD,IAAI,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO;QAElD,MAAM,MAAM,GAAoB,EAAE,CAAC;QACnC,MAAM,QAAQ,GAAG,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACtC,IAAI,QAAQ;YAAE,MAAM,CAAC,OAAO,CAAC,GAAG,QAAQ,CAAC;QAEzC,GAAG,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,EAAE;YACvC,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;YACtB,+CAA+C;YAC/C,MAAM,YAAY,GAAG,KAAK,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;YAClD,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,CAAC,CAAC,CAAC,KAAK,EAAE;gBAAE,OAAO;YAE9D,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;YAC1C,IAAI,CAAC,IAAI;gBAAE,OAAO;YAElB,sCAAsC;YACtC,IAAI,KAAK,GAAkB,IAAI,CAAC;YAChC,MAAM,GAAG,GAAI,IAA6B,CAAC,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;YACxE,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;gBACnB,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC;YACxC,CAAC;iBAAM,IAAI,GAAG,KAAK,GAAG,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;gBACzC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC;YACrC,CAAC;iBAAM,IAAI,GAAG,KAAK,KAAK,EAAE,CAAC;gBACzB,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC;YACpC,CAAC;iBAAM,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;gBAC1B,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,IAAI,IAAI,CAAC,CAAC;YAClE,CAAC;iBAAM,CAAC;gBACN,KAAK,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,IAAI,IAAI,CAAC;YACtC,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC;QACvB,CAAC,CAAC,CAAC;QAEH,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,MAAM,KAAK,GAAe,EAAE,CAAC;IAE7B,CAAC,CAAC,uBAAuB,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE;QACzC,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;QAClB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACpC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC9B,IAAI,CAAC,IAAI;YAAE,OAAO;QAElB,IAAI,QAAQ,GAAqB,SAAS,CAAC;QAC3C,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;YAAE,QAAQ,GAAG,KAAK,CAAC;aACtC,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,QAAQ,GAAG,MAAM,CAAC;;YAC7C,OAAO,CAAC,gCAAgC;QAE7C,KAAK,CAAC,IAAI,CAAC;YACT,IAAI,EAAE,QAAQ;YACd,IAAI;YACJ,KAAK,EAAE,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,IAAI;SACjC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,MAAM,EAAE,GAAG,CAAC,CAAC,uBAAuB,CAAC,CAAC,KAAK,EAAE,CAAC;IAC9C,OAAO,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AAC1D,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,MAAM,EAAE,GAAG,CAAC,CAAC,6CAA6C,CAAC,CAAC,KAAK,EAAE,CAAC;IACpE,OAAO,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AAC1D,CAAC"}
|