@mcp-b/smart-dom-reader 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bundle-string.d.ts +4 -2
- package/dist/bundle-string.d.ts.map +1 -0
- package/dist/bundle-string.js +13 -7
- package/dist/bundle-string.js.map +1 -1
- package/dist/index.d.ts +388 -383
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +1564 -1776
- package/dist/index.js.map +1 -1
- package/package.json +4 -4
package/dist/index.d.ts
CHANGED
|
@@ -1,378 +1,382 @@
|
|
|
1
|
+
//#region src/types.d.ts
|
|
1
2
|
type ExtractionMode = 'interactive' | 'full' | 'structure' | 'content';
|
|
2
3
|
interface ElementSelector {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
4
|
+
css: string;
|
|
5
|
+
xpath: string;
|
|
6
|
+
textBased?: string;
|
|
7
|
+
dataTestId?: string;
|
|
8
|
+
ariaLabel?: string;
|
|
9
|
+
candidates?: ElementSelectorCandidate[];
|
|
9
10
|
}
|
|
10
11
|
interface ElementSelectorCandidate {
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
12
|
+
type: 'id' | 'data-testid' | 'role-aria' | 'name' | 'class-path' | 'css-path' | 'xpath' | 'text';
|
|
13
|
+
value: string;
|
|
14
|
+
score: number;
|
|
14
15
|
}
|
|
15
16
|
interface ElementContext {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
nearestForm?: string;
|
|
18
|
+
nearestSection?: string;
|
|
19
|
+
nearestMain?: string;
|
|
20
|
+
nearestNav?: string;
|
|
21
|
+
parentChain: string[];
|
|
21
22
|
}
|
|
22
23
|
interface ElementInteraction {
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
24
|
+
click?: boolean;
|
|
25
|
+
change?: boolean;
|
|
26
|
+
submit?: boolean;
|
|
27
|
+
nav?: boolean;
|
|
28
|
+
disabled?: boolean;
|
|
29
|
+
hidden?: boolean;
|
|
30
|
+
role?: string;
|
|
31
|
+
form?: string;
|
|
31
32
|
}
|
|
32
33
|
interface ExtractedElement {
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
34
|
+
tag: string;
|
|
35
|
+
text: string;
|
|
36
|
+
selector: ElementSelector;
|
|
37
|
+
attributes: Record<string, string>;
|
|
38
|
+
context: ElementContext;
|
|
39
|
+
interaction: ElementInteraction;
|
|
40
|
+
children?: ExtractedElement[];
|
|
40
41
|
}
|
|
41
42
|
interface FormInfo {
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
43
|
+
selector: string;
|
|
44
|
+
action?: string;
|
|
45
|
+
method?: string;
|
|
46
|
+
inputs: ExtractedElement[];
|
|
47
|
+
buttons: ExtractedElement[];
|
|
47
48
|
}
|
|
48
49
|
interface PageLandmarks {
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
50
|
+
navigation: string[];
|
|
51
|
+
main: string[];
|
|
52
|
+
forms: string[];
|
|
53
|
+
headers: string[];
|
|
54
|
+
footers: string[];
|
|
55
|
+
articles: string[];
|
|
56
|
+
sections: string[];
|
|
56
57
|
}
|
|
57
58
|
interface PageState {
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
59
|
+
url: string;
|
|
60
|
+
title: string;
|
|
61
|
+
hasErrors: boolean;
|
|
62
|
+
isLoading: boolean;
|
|
63
|
+
hasModals: boolean;
|
|
64
|
+
hasFocus?: string;
|
|
64
65
|
}
|
|
65
66
|
interface SmartDOMResult {
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
67
|
+
mode: ExtractionMode;
|
|
68
|
+
timestamp: number;
|
|
69
|
+
page: PageState;
|
|
70
|
+
landmarks: PageLandmarks;
|
|
71
|
+
interactive: {
|
|
72
|
+
buttons: ExtractedElement[];
|
|
73
|
+
links: ExtractedElement[];
|
|
74
|
+
inputs: ExtractedElement[];
|
|
75
|
+
forms: FormInfo[];
|
|
76
|
+
clickable: ExtractedElement[];
|
|
77
|
+
};
|
|
78
|
+
semantic?: {
|
|
79
|
+
headings: ExtractedElement[];
|
|
80
|
+
images: ExtractedElement[];
|
|
81
|
+
tables: ExtractedElement[];
|
|
82
|
+
lists: ExtractedElement[];
|
|
83
|
+
articles: ExtractedElement[];
|
|
84
|
+
};
|
|
85
|
+
metadata?: {
|
|
86
|
+
totalElements: number;
|
|
87
|
+
extractedElements: number;
|
|
88
|
+
mainContent?: string;
|
|
89
|
+
language?: string;
|
|
90
|
+
};
|
|
90
91
|
}
|
|
91
92
|
interface FilterOptions {
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
93
|
+
includeSelectors?: string[];
|
|
94
|
+
excludeSelectors?: string[];
|
|
95
|
+
textContains?: string[];
|
|
96
|
+
textMatches?: RegExp[];
|
|
97
|
+
hasAttributes?: string[];
|
|
98
|
+
attributeValues?: Record<string, string | RegExp>;
|
|
99
|
+
tags?: string[];
|
|
100
|
+
interactionTypes?: Array<keyof ElementInteraction>;
|
|
101
|
+
withinSelectors?: string[];
|
|
102
|
+
nearText?: string;
|
|
102
103
|
}
|
|
103
104
|
interface ExtractionOptions {
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
105
|
+
mode: ExtractionMode;
|
|
106
|
+
maxDepth?: number;
|
|
107
|
+
includeHidden?: boolean;
|
|
108
|
+
includeShadowDOM?: boolean;
|
|
109
|
+
includeIframes?: boolean;
|
|
110
|
+
viewportOnly?: boolean;
|
|
111
|
+
mainContentOnly?: boolean;
|
|
112
|
+
customSelectors?: string[];
|
|
113
|
+
attributeTruncateLength?: number;
|
|
114
|
+
dataAttributeTruncateLength?: number;
|
|
115
|
+
textTruncateLength?: number;
|
|
116
|
+
filter?: FilterOptions;
|
|
116
117
|
}
|
|
117
118
|
interface RegionInfo {
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
119
|
+
selector: string;
|
|
120
|
+
label?: string;
|
|
121
|
+
role?: string;
|
|
122
|
+
interactiveCount: number;
|
|
123
|
+
hasForm?: boolean;
|
|
124
|
+
hasList?: boolean;
|
|
125
|
+
hasTable?: boolean;
|
|
126
|
+
hasMedia?: boolean;
|
|
127
|
+
buttonCount?: number;
|
|
128
|
+
linkCount?: number;
|
|
129
|
+
inputCount?: number;
|
|
130
|
+
textPreview?: string;
|
|
130
131
|
}
|
|
131
132
|
interface StructuralOverview {
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
133
|
+
regions: {
|
|
134
|
+
header?: RegionInfo;
|
|
135
|
+
navigation?: RegionInfo[];
|
|
136
|
+
main?: RegionInfo;
|
|
137
|
+
sidebar?: RegionInfo[];
|
|
138
|
+
footer?: RegionInfo;
|
|
139
|
+
modals?: RegionInfo[];
|
|
140
|
+
sections?: RegionInfo[];
|
|
141
|
+
};
|
|
142
|
+
forms: Array<{
|
|
143
|
+
selector: string;
|
|
144
|
+
location: string;
|
|
145
|
+
inputCount: number;
|
|
146
|
+
purpose?: string;
|
|
147
|
+
}>;
|
|
148
|
+
summary: {
|
|
149
|
+
totalInteractive: number;
|
|
150
|
+
totalForms: number;
|
|
151
|
+
totalSections: number;
|
|
152
|
+
hasModals: boolean;
|
|
153
|
+
hasErrors: boolean;
|
|
154
|
+
isLoading: boolean;
|
|
155
|
+
mainContentSelector?: string;
|
|
156
|
+
};
|
|
157
|
+
suggestions?: string[];
|
|
157
158
|
}
|
|
158
159
|
interface ContentExtractionOptions {
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
160
|
+
includeHeadings?: boolean;
|
|
161
|
+
includeLists?: boolean;
|
|
162
|
+
includeTables?: boolean;
|
|
163
|
+
includeMedia?: boolean;
|
|
164
|
+
preserveFormatting?: boolean;
|
|
165
|
+
maxTextLength?: number;
|
|
165
166
|
}
|
|
166
167
|
interface ExtractedContent {
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
}>;
|
|
173
|
-
paragraphs?: string[];
|
|
174
|
-
lists?: Array<{
|
|
175
|
-
type: 'ul' | 'ol';
|
|
176
|
-
items: string[];
|
|
177
|
-
}>;
|
|
178
|
-
};
|
|
179
|
-
tables?: Array<{
|
|
180
|
-
headers: string[];
|
|
181
|
-
rows: string[][];
|
|
168
|
+
selector: string;
|
|
169
|
+
text: {
|
|
170
|
+
headings?: Array<{
|
|
171
|
+
level: number;
|
|
172
|
+
text: string;
|
|
182
173
|
}>;
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
174
|
+
paragraphs?: string[];
|
|
175
|
+
lists?: Array<{
|
|
176
|
+
type: 'ul' | 'ol';
|
|
177
|
+
items: string[];
|
|
187
178
|
}>;
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
179
|
+
};
|
|
180
|
+
tables?: Array<{
|
|
181
|
+
headers: string[];
|
|
182
|
+
rows: string[][];
|
|
183
|
+
}>;
|
|
184
|
+
media?: Array<{
|
|
185
|
+
type: 'img' | 'video' | 'audio';
|
|
186
|
+
alt?: string;
|
|
187
|
+
src?: string;
|
|
188
|
+
}>;
|
|
189
|
+
metadata: {
|
|
190
|
+
wordCount: number;
|
|
191
|
+
hasInteractive: boolean;
|
|
192
|
+
};
|
|
192
193
|
}
|
|
193
|
-
|
|
194
|
+
//#endregion
|
|
195
|
+
//#region src/markdown-formatter.d.ts
|
|
194
196
|
type MarkdownDetailLevel = 'summary' | 'region' | 'deep';
|
|
195
197
|
interface MarkdownFormatOptions {
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
198
|
+
detail?: MarkdownDetailLevel;
|
|
199
|
+
maxTextLength?: number;
|
|
200
|
+
maxElements?: number;
|
|
199
201
|
}
|
|
200
202
|
type PageMeta = {
|
|
201
|
-
|
|
202
|
-
|
|
203
|
+
title?: string;
|
|
204
|
+
url?: string;
|
|
203
205
|
};
|
|
204
206
|
declare class MarkdownFormatter {
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
207
|
+
static structure(overview: StructuralOverview, _opts?: MarkdownFormatOptions, meta?: PageMeta): string;
|
|
208
|
+
static region(result: SmartDOMResult, opts?: MarkdownFormatOptions, meta?: PageMeta): string;
|
|
209
|
+
static content(content: ExtractedContent, opts?: MarkdownFormatOptions, meta?: PageMeta): string;
|
|
208
210
|
}
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
* Type-safe interface for the stateless bundle extraction API
|
|
212
|
-
*/
|
|
213
|
-
|
|
211
|
+
//#endregion
|
|
212
|
+
//#region src/bundle-types.d.ts
|
|
214
213
|
type ExtractionMethod = 'extractStructure' | 'extractRegion' | 'extractContent' | 'extractInteractive' | 'extractFull';
|
|
215
214
|
interface BaseExtractionArgs {
|
|
216
|
-
|
|
217
|
-
|
|
215
|
+
frameSelector?: string;
|
|
216
|
+
formatOptions?: MarkdownFormatOptions;
|
|
218
217
|
}
|
|
219
218
|
interface ExtractStructureArgs extends BaseExtractionArgs {
|
|
220
|
-
|
|
219
|
+
selector?: string;
|
|
221
220
|
}
|
|
222
221
|
interface ExtractRegionArgs extends BaseExtractionArgs {
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
222
|
+
selector: string;
|
|
223
|
+
mode?: 'interactive' | 'full';
|
|
224
|
+
options?: Partial<ExtractionOptions>;
|
|
226
225
|
}
|
|
227
226
|
interface ExtractContentArgs extends BaseExtractionArgs {
|
|
228
|
-
|
|
229
|
-
|
|
227
|
+
selector: string;
|
|
228
|
+
options?: ContentExtractionOptions;
|
|
230
229
|
}
|
|
231
230
|
interface ExtractInteractiveArgs extends BaseExtractionArgs {
|
|
232
|
-
|
|
233
|
-
|
|
231
|
+
selector?: string;
|
|
232
|
+
options?: Partial<ExtractionOptions>;
|
|
234
233
|
}
|
|
235
234
|
interface ExtractFullArgs extends BaseExtractionArgs {
|
|
236
|
-
|
|
237
|
-
|
|
235
|
+
selector?: string;
|
|
236
|
+
options?: Partial<ExtractionOptions>;
|
|
238
237
|
}
|
|
239
238
|
type ExtractionArgs = {
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
239
|
+
extractStructure: ExtractStructureArgs;
|
|
240
|
+
extractRegion: ExtractRegionArgs;
|
|
241
|
+
extractContent: ExtractContentArgs;
|
|
242
|
+
extractInteractive: ExtractInteractiveArgs;
|
|
243
|
+
extractFull: ExtractFullArgs;
|
|
245
244
|
};
|
|
246
245
|
interface ExtractionError {
|
|
247
|
-
|
|
246
|
+
error: string;
|
|
248
247
|
}
|
|
249
248
|
type ExtractionResult = string | ExtractionError;
|
|
250
249
|
interface SmartDOMReaderBundle {
|
|
251
|
-
|
|
250
|
+
executeExtraction<M extends ExtractionMethod>(method: M, args: ExtractionArgs[M]): ExtractionResult;
|
|
252
251
|
}
|
|
253
252
|
declare global {
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
253
|
+
interface Window {
|
|
254
|
+
SmartDOMReaderBundle: SmartDOMReaderBundle;
|
|
255
|
+
}
|
|
257
256
|
}
|
|
258
|
-
|
|
257
|
+
//# sourceMappingURL=bundle-types.d.ts.map
|
|
258
|
+
//#endregion
|
|
259
|
+
//#region src/content-detection.d.ts
|
|
259
260
|
declare class ContentDetection {
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
261
|
+
/**
|
|
262
|
+
* Find the main content area of a page
|
|
263
|
+
* Inspired by dom-to-semantic-markdown's approach
|
|
264
|
+
*/
|
|
265
|
+
static findMainContent(doc: Document): Element;
|
|
266
|
+
/**
|
|
267
|
+
* Detect main content using scoring algorithm
|
|
268
|
+
*/
|
|
269
|
+
private static detectMainContent;
|
|
270
|
+
/**
|
|
271
|
+
* Collect content candidates
|
|
272
|
+
*/
|
|
273
|
+
private static collectCandidates;
|
|
274
|
+
/**
|
|
275
|
+
* Calculate content score for an element
|
|
276
|
+
*/
|
|
277
|
+
static calculateContentScore(element: Element): number;
|
|
278
|
+
/**
|
|
279
|
+
* Calculate link density in an element
|
|
280
|
+
*/
|
|
281
|
+
private static calculateLinkDensity;
|
|
282
|
+
/**
|
|
283
|
+
* Check if an element is likely navigation
|
|
284
|
+
*/
|
|
285
|
+
static isNavigation(element: Element): boolean;
|
|
286
|
+
/**
|
|
287
|
+
* Check if element is likely supplementary content
|
|
288
|
+
*/
|
|
289
|
+
static isSupplementary(element: Element): boolean;
|
|
290
|
+
/**
|
|
291
|
+
* Detect page landmarks
|
|
292
|
+
*/
|
|
293
|
+
static detectLandmarks(doc: Document): Record<string, Element[]>;
|
|
293
294
|
}
|
|
294
|
-
|
|
295
|
+
//#endregion
|
|
296
|
+
//#region src/progressive.d.ts
|
|
295
297
|
type SmartDomReaderCtor = new (options?: Partial<ExtractionOptions>) => SmartDOMReader;
|
|
296
298
|
declare class ProgressiveExtractor {
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
299
|
+
/**
|
|
300
|
+
* Step 1: Extract high-level structural overview
|
|
301
|
+
* This provides a "map" of the page for the AI to understand structure
|
|
302
|
+
*/
|
|
303
|
+
static extractStructure(root: Document | Element): StructuralOverview;
|
|
304
|
+
/**
|
|
305
|
+
* Step 2: Extract detailed information from a specific region
|
|
306
|
+
*/
|
|
307
|
+
static extractRegion(selector: string, doc: Document, options?: Partial<ExtractionOptions>, smartDomReaderCtor?: SmartDomReaderCtor): SmartDOMResult | null;
|
|
308
|
+
/**
|
|
309
|
+
* Step 3: Extract readable content from a region
|
|
310
|
+
*/
|
|
311
|
+
static extractContent(selector: string, doc: Document, options?: ContentExtractionOptions): ExtractedContent | null;
|
|
312
|
+
/**
|
|
313
|
+
* Analyze a region and extract summary information
|
|
314
|
+
*/
|
|
315
|
+
private static analyzeRegion;
|
|
316
|
+
/**
|
|
317
|
+
* Extract overview of forms on the page
|
|
318
|
+
*/
|
|
319
|
+
private static extractFormOverview;
|
|
320
|
+
/**
|
|
321
|
+
* Calculate summary statistics
|
|
322
|
+
*/
|
|
323
|
+
private static calculateSummary;
|
|
324
|
+
/**
|
|
325
|
+
* Generate AI-friendly suggestions
|
|
326
|
+
*/
|
|
327
|
+
private static generateSuggestions;
|
|
328
|
+
/**
|
|
329
|
+
* Get text content with optional truncation
|
|
330
|
+
*/
|
|
331
|
+
private static getTextContent;
|
|
330
332
|
}
|
|
331
|
-
|
|
333
|
+
//#endregion
|
|
334
|
+
//#region src/selectors.d.ts
|
|
332
335
|
declare class SelectorGenerator {
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
336
|
+
/**
|
|
337
|
+
* Generate multiple selector strategies for an element
|
|
338
|
+
*/
|
|
339
|
+
static generateSelectors(element: Element): ElementSelector;
|
|
340
|
+
/**
|
|
341
|
+
* Generate a unique CSS selector for an element
|
|
342
|
+
*/
|
|
343
|
+
private static generateCSSSelector;
|
|
344
|
+
/**
|
|
345
|
+
* Generate XPath for an element
|
|
346
|
+
*/
|
|
347
|
+
private static generateXPath;
|
|
348
|
+
/**
|
|
349
|
+
* Generate a text-based selector for buttons and links
|
|
350
|
+
*/
|
|
351
|
+
private static generateTextBasedSelector;
|
|
352
|
+
/**
|
|
353
|
+
* Get data-testid or similar attributes
|
|
354
|
+
*/
|
|
355
|
+
private static getDataTestId;
|
|
356
|
+
/**
|
|
357
|
+
* Check if an ID is unique in the document
|
|
358
|
+
*/
|
|
359
|
+
private static isUniqueId;
|
|
360
|
+
/**
|
|
361
|
+
* Check if a selector is unique within a container
|
|
362
|
+
*/
|
|
363
|
+
private static isUniqueSelector;
|
|
364
|
+
private static isUniqueSelectorSafe;
|
|
365
|
+
/**
|
|
366
|
+
* Get meaningful classes (filtering out utility classes)
|
|
367
|
+
*/
|
|
368
|
+
private static getMeaningfulClasses;
|
|
369
|
+
/**
|
|
370
|
+
* Optimize the selector path by removing unnecessary parts
|
|
371
|
+
*/
|
|
372
|
+
private static optimizePath;
|
|
373
|
+
/**
|
|
374
|
+
* Get a human-readable path description
|
|
375
|
+
*/
|
|
376
|
+
static getContextPath(element: Element): string[];
|
|
374
377
|
}
|
|
375
|
-
|
|
378
|
+
//#endregion
|
|
379
|
+
//#region src/index.d.ts
|
|
376
380
|
/**
|
|
377
381
|
* Smart DOM Reader - Full Extraction Approach
|
|
378
382
|
*
|
|
@@ -387,81 +391,82 @@ declare class SelectorGenerator {
|
|
|
387
391
|
* - Returns complete structured data immediately
|
|
388
392
|
*/
|
|
389
393
|
declare class SmartDOMReader {
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
394
|
+
private options;
|
|
395
|
+
constructor(options?: Partial<ExtractionOptions>);
|
|
396
|
+
/**
|
|
397
|
+
* Main extraction method - extracts all data in one pass
|
|
398
|
+
* @param rootElement The document or element to extract from
|
|
399
|
+
* @param runtimeOptions Options to override constructor options
|
|
400
|
+
*/
|
|
401
|
+
extract(rootElement?: Document | Element, runtimeOptions?: Partial<ExtractionOptions>): SmartDOMResult;
|
|
402
|
+
/**
|
|
403
|
+
* Extract page state information
|
|
404
|
+
*/
|
|
405
|
+
private extractPageState;
|
|
406
|
+
/**
|
|
407
|
+
* Extract page landmarks
|
|
408
|
+
*/
|
|
409
|
+
private extractLandmarks;
|
|
410
|
+
/**
|
|
411
|
+
* Convert elements to selector strings
|
|
412
|
+
*/
|
|
413
|
+
private elementsToSelectors;
|
|
414
|
+
/**
|
|
415
|
+
* Extract interactive elements
|
|
416
|
+
*/
|
|
417
|
+
private extractInteractiveElements;
|
|
418
|
+
/**
|
|
419
|
+
* Extract form information
|
|
420
|
+
*/
|
|
421
|
+
private extractForms;
|
|
422
|
+
/**
|
|
423
|
+
* Extract semantic elements (full mode only)
|
|
424
|
+
*/
|
|
425
|
+
private extractSemanticElements;
|
|
426
|
+
/**
|
|
427
|
+
* Extract metadata
|
|
428
|
+
*/
|
|
429
|
+
private extractMetadata;
|
|
430
|
+
/**
|
|
431
|
+
* Check if element should be included based on options
|
|
432
|
+
*/
|
|
433
|
+
private shouldIncludeElement;
|
|
434
|
+
/**
|
|
435
|
+
* Detect errors on the page
|
|
436
|
+
*/
|
|
437
|
+
private detectErrors;
|
|
438
|
+
/**
|
|
439
|
+
* Detect if page is loading
|
|
440
|
+
*/
|
|
441
|
+
private detectLoading;
|
|
442
|
+
/**
|
|
443
|
+
* Detect modal dialogs
|
|
444
|
+
*/
|
|
445
|
+
private detectModals;
|
|
446
|
+
/**
|
|
447
|
+
* Get currently focused element
|
|
448
|
+
*/
|
|
449
|
+
private getFocusedElement;
|
|
450
|
+
/**
|
|
451
|
+
* Quick extraction for interactive elements only
|
|
452
|
+
* @param doc The document to extract from
|
|
453
|
+
* @param options Extraction options
|
|
454
|
+
*/
|
|
455
|
+
static extractInteractive(doc: Document, options?: Partial<ExtractionOptions>): SmartDOMResult;
|
|
456
|
+
/**
|
|
457
|
+
* Quick extraction for full content
|
|
458
|
+
* @param doc The document to extract from
|
|
459
|
+
* @param options Extraction options
|
|
460
|
+
*/
|
|
461
|
+
static extractFull(doc: Document, options?: Partial<ExtractionOptions>): SmartDOMResult;
|
|
462
|
+
/**
|
|
463
|
+
* Extract from a specific element
|
|
464
|
+
* @param element The element to extract from
|
|
465
|
+
* @param mode The extraction mode
|
|
466
|
+
* @param options Additional options
|
|
467
|
+
*/
|
|
468
|
+
static extractFromElement(element: Element, mode?: ExtractionMode, options?: Partial<ExtractionOptions>): SmartDOMResult;
|
|
465
469
|
}
|
|
466
|
-
|
|
467
|
-
export { ContentDetection,
|
|
470
|
+
//#endregion
|
|
471
|
+
export { ContentDetection, ContentExtractionOptions, ElementContext, ElementInteraction, ElementSelector, ElementSelectorCandidate, type ExtractContentArgs, type ExtractFullArgs, type ExtractInteractiveArgs, type ExtractRegionArgs, type ExtractStructureArgs, ExtractedContent, ExtractedElement, type ExtractionArgs, type ExtractionMethod, ExtractionMode, ExtractionOptions, type ExtractionResult, FilterOptions, FormInfo, type MarkdownFormatOptions, MarkdownFormatter, PageLandmarks, PageState, ProgressiveExtractor, RegionInfo, SelectorGenerator, SmartDOMReader, SmartDOMReader as default, SmartDOMResult, StructuralOverview };
|
|
472
|
+
//# sourceMappingURL=index.d.ts.map
|