@mcp-b/smart-dom-reader 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,378 +1,382 @@
1
+ //#region src/types.d.ts
1
2
  type ExtractionMode = 'interactive' | 'full' | 'structure' | 'content';
2
3
  interface ElementSelector {
3
- css: string;
4
- xpath: string;
5
- textBased?: string;
6
- dataTestId?: string;
7
- ariaLabel?: string;
8
- candidates?: ElementSelectorCandidate[];
4
+ css: string;
5
+ xpath: string;
6
+ textBased?: string;
7
+ dataTestId?: string;
8
+ ariaLabel?: string;
9
+ candidates?: ElementSelectorCandidate[];
9
10
  }
10
11
  interface ElementSelectorCandidate {
11
- type: 'id' | 'data-testid' | 'role-aria' | 'name' | 'class-path' | 'css-path' | 'xpath' | 'text';
12
- value: string;
13
- score: number;
12
+ type: 'id' | 'data-testid' | 'role-aria' | 'name' | 'class-path' | 'css-path' | 'xpath' | 'text';
13
+ value: string;
14
+ score: number;
14
15
  }
15
16
  interface ElementContext {
16
- nearestForm?: string;
17
- nearestSection?: string;
18
- nearestMain?: string;
19
- nearestNav?: string;
20
- parentChain: string[];
17
+ nearestForm?: string;
18
+ nearestSection?: string;
19
+ nearestMain?: string;
20
+ nearestNav?: string;
21
+ parentChain: string[];
21
22
  }
22
23
  interface ElementInteraction {
23
- click?: boolean;
24
- change?: boolean;
25
- submit?: boolean;
26
- nav?: boolean;
27
- disabled?: boolean;
28
- hidden?: boolean;
29
- role?: string;
30
- form?: string;
24
+ click?: boolean;
25
+ change?: boolean;
26
+ submit?: boolean;
27
+ nav?: boolean;
28
+ disabled?: boolean;
29
+ hidden?: boolean;
30
+ role?: string;
31
+ form?: string;
31
32
  }
32
33
  interface ExtractedElement {
33
- tag: string;
34
- text: string;
35
- selector: ElementSelector;
36
- attributes: Record<string, string>;
37
- context: ElementContext;
38
- interaction: ElementInteraction;
39
- children?: ExtractedElement[];
34
+ tag: string;
35
+ text: string;
36
+ selector: ElementSelector;
37
+ attributes: Record<string, string>;
38
+ context: ElementContext;
39
+ interaction: ElementInteraction;
40
+ children?: ExtractedElement[];
40
41
  }
41
42
  interface FormInfo {
42
- selector: string;
43
- action?: string;
44
- method?: string;
45
- inputs: ExtractedElement[];
46
- buttons: ExtractedElement[];
43
+ selector: string;
44
+ action?: string;
45
+ method?: string;
46
+ inputs: ExtractedElement[];
47
+ buttons: ExtractedElement[];
47
48
  }
48
49
  interface PageLandmarks {
49
- navigation: string[];
50
- main: string[];
51
- forms: string[];
52
- headers: string[];
53
- footers: string[];
54
- articles: string[];
55
- sections: string[];
50
+ navigation: string[];
51
+ main: string[];
52
+ forms: string[];
53
+ headers: string[];
54
+ footers: string[];
55
+ articles: string[];
56
+ sections: string[];
56
57
  }
57
58
  interface PageState {
58
- url: string;
59
- title: string;
60
- hasErrors: boolean;
61
- isLoading: boolean;
62
- hasModals: boolean;
63
- hasFocus?: string;
59
+ url: string;
60
+ title: string;
61
+ hasErrors: boolean;
62
+ isLoading: boolean;
63
+ hasModals: boolean;
64
+ hasFocus?: string;
64
65
  }
65
66
  interface SmartDOMResult {
66
- mode: ExtractionMode;
67
- timestamp: number;
68
- page: PageState;
69
- landmarks: PageLandmarks;
70
- interactive: {
71
- buttons: ExtractedElement[];
72
- links: ExtractedElement[];
73
- inputs: ExtractedElement[];
74
- forms: FormInfo[];
75
- clickable: ExtractedElement[];
76
- };
77
- semantic?: {
78
- headings: ExtractedElement[];
79
- images: ExtractedElement[];
80
- tables: ExtractedElement[];
81
- lists: ExtractedElement[];
82
- articles: ExtractedElement[];
83
- };
84
- metadata?: {
85
- totalElements: number;
86
- extractedElements: number;
87
- mainContent?: string;
88
- language?: string;
89
- };
67
+ mode: ExtractionMode;
68
+ timestamp: number;
69
+ page: PageState;
70
+ landmarks: PageLandmarks;
71
+ interactive: {
72
+ buttons: ExtractedElement[];
73
+ links: ExtractedElement[];
74
+ inputs: ExtractedElement[];
75
+ forms: FormInfo[];
76
+ clickable: ExtractedElement[];
77
+ };
78
+ semantic?: {
79
+ headings: ExtractedElement[];
80
+ images: ExtractedElement[];
81
+ tables: ExtractedElement[];
82
+ lists: ExtractedElement[];
83
+ articles: ExtractedElement[];
84
+ };
85
+ metadata?: {
86
+ totalElements: number;
87
+ extractedElements: number;
88
+ mainContent?: string;
89
+ language?: string;
90
+ };
90
91
  }
91
92
  interface FilterOptions {
92
- includeSelectors?: string[];
93
- excludeSelectors?: string[];
94
- textContains?: string[];
95
- textMatches?: RegExp[];
96
- hasAttributes?: string[];
97
- attributeValues?: Record<string, string | RegExp>;
98
- tags?: string[];
99
- interactionTypes?: Array<keyof ElementInteraction>;
100
- withinSelectors?: string[];
101
- nearText?: string;
93
+ includeSelectors?: string[];
94
+ excludeSelectors?: string[];
95
+ textContains?: string[];
96
+ textMatches?: RegExp[];
97
+ hasAttributes?: string[];
98
+ attributeValues?: Record<string, string | RegExp>;
99
+ tags?: string[];
100
+ interactionTypes?: Array<keyof ElementInteraction>;
101
+ withinSelectors?: string[];
102
+ nearText?: string;
102
103
  }
103
104
  interface ExtractionOptions {
104
- mode: ExtractionMode;
105
- maxDepth?: number;
106
- includeHidden?: boolean;
107
- includeShadowDOM?: boolean;
108
- includeIframes?: boolean;
109
- viewportOnly?: boolean;
110
- mainContentOnly?: boolean;
111
- customSelectors?: string[];
112
- attributeTruncateLength?: number;
113
- dataAttributeTruncateLength?: number;
114
- textTruncateLength?: number;
115
- filter?: FilterOptions;
105
+ mode: ExtractionMode;
106
+ maxDepth?: number;
107
+ includeHidden?: boolean;
108
+ includeShadowDOM?: boolean;
109
+ includeIframes?: boolean;
110
+ viewportOnly?: boolean;
111
+ mainContentOnly?: boolean;
112
+ customSelectors?: string[];
113
+ attributeTruncateLength?: number;
114
+ dataAttributeTruncateLength?: number;
115
+ textTruncateLength?: number;
116
+ filter?: FilterOptions;
116
117
  }
117
118
  interface RegionInfo {
118
- selector: string;
119
- label?: string;
120
- role?: string;
121
- interactiveCount: number;
122
- hasForm?: boolean;
123
- hasList?: boolean;
124
- hasTable?: boolean;
125
- hasMedia?: boolean;
126
- buttonCount?: number;
127
- linkCount?: number;
128
- inputCount?: number;
129
- textPreview?: string;
119
+ selector: string;
120
+ label?: string;
121
+ role?: string;
122
+ interactiveCount: number;
123
+ hasForm?: boolean;
124
+ hasList?: boolean;
125
+ hasTable?: boolean;
126
+ hasMedia?: boolean;
127
+ buttonCount?: number;
128
+ linkCount?: number;
129
+ inputCount?: number;
130
+ textPreview?: string;
130
131
  }
131
132
  interface StructuralOverview {
132
- regions: {
133
- header?: RegionInfo;
134
- navigation?: RegionInfo[];
135
- main?: RegionInfo;
136
- sidebar?: RegionInfo[];
137
- footer?: RegionInfo;
138
- modals?: RegionInfo[];
139
- sections?: RegionInfo[];
140
- };
141
- forms: Array<{
142
- selector: string;
143
- location: string;
144
- inputCount: number;
145
- purpose?: string;
146
- }>;
147
- summary: {
148
- totalInteractive: number;
149
- totalForms: number;
150
- totalSections: number;
151
- hasModals: boolean;
152
- hasErrors: boolean;
153
- isLoading: boolean;
154
- mainContentSelector?: string;
155
- };
156
- suggestions?: string[];
133
+ regions: {
134
+ header?: RegionInfo;
135
+ navigation?: RegionInfo[];
136
+ main?: RegionInfo;
137
+ sidebar?: RegionInfo[];
138
+ footer?: RegionInfo;
139
+ modals?: RegionInfo[];
140
+ sections?: RegionInfo[];
141
+ };
142
+ forms: Array<{
143
+ selector: string;
144
+ location: string;
145
+ inputCount: number;
146
+ purpose?: string;
147
+ }>;
148
+ summary: {
149
+ totalInteractive: number;
150
+ totalForms: number;
151
+ totalSections: number;
152
+ hasModals: boolean;
153
+ hasErrors: boolean;
154
+ isLoading: boolean;
155
+ mainContentSelector?: string;
156
+ };
157
+ suggestions?: string[];
157
158
  }
158
159
  interface ContentExtractionOptions {
159
- includeHeadings?: boolean;
160
- includeLists?: boolean;
161
- includeTables?: boolean;
162
- includeMedia?: boolean;
163
- preserveFormatting?: boolean;
164
- maxTextLength?: number;
160
+ includeHeadings?: boolean;
161
+ includeLists?: boolean;
162
+ includeTables?: boolean;
163
+ includeMedia?: boolean;
164
+ preserveFormatting?: boolean;
165
+ maxTextLength?: number;
165
166
  }
166
167
  interface ExtractedContent {
167
- selector: string;
168
- text: {
169
- headings?: Array<{
170
- level: number;
171
- text: string;
172
- }>;
173
- paragraphs?: string[];
174
- lists?: Array<{
175
- type: 'ul' | 'ol';
176
- items: string[];
177
- }>;
178
- };
179
- tables?: Array<{
180
- headers: string[];
181
- rows: string[][];
168
+ selector: string;
169
+ text: {
170
+ headings?: Array<{
171
+ level: number;
172
+ text: string;
182
173
  }>;
183
- media?: Array<{
184
- type: 'img' | 'video' | 'audio';
185
- alt?: string;
186
- src?: string;
174
+ paragraphs?: string[];
175
+ lists?: Array<{
176
+ type: 'ul' | 'ol';
177
+ items: string[];
187
178
  }>;
188
- metadata: {
189
- wordCount: number;
190
- hasInteractive: boolean;
191
- };
179
+ };
180
+ tables?: Array<{
181
+ headers: string[];
182
+ rows: string[][];
183
+ }>;
184
+ media?: Array<{
185
+ type: 'img' | 'video' | 'audio';
186
+ alt?: string;
187
+ src?: string;
188
+ }>;
189
+ metadata: {
190
+ wordCount: number;
191
+ hasInteractive: boolean;
192
+ };
192
193
  }
193
-
194
+ //#endregion
195
+ //#region src/markdown-formatter.d.ts
194
196
  type MarkdownDetailLevel = 'summary' | 'region' | 'deep';
195
197
  interface MarkdownFormatOptions {
196
- detail?: MarkdownDetailLevel;
197
- maxTextLength?: number;
198
- maxElements?: number;
198
+ detail?: MarkdownDetailLevel;
199
+ maxTextLength?: number;
200
+ maxElements?: number;
199
201
  }
200
202
  type PageMeta = {
201
- title?: string;
202
- url?: string;
203
+ title?: string;
204
+ url?: string;
203
205
  };
204
206
  declare class MarkdownFormatter {
205
- static structure(overview: StructuralOverview, _opts?: MarkdownFormatOptions, meta?: PageMeta): string;
206
- static region(result: SmartDOMResult, opts?: MarkdownFormatOptions, meta?: PageMeta): string;
207
- static content(content: ExtractedContent, opts?: MarkdownFormatOptions, meta?: PageMeta): string;
207
+ static structure(overview: StructuralOverview, _opts?: MarkdownFormatOptions, meta?: PageMeta): string;
208
+ static region(result: SmartDOMResult, opts?: MarkdownFormatOptions, meta?: PageMeta): string;
209
+ static content(content: ExtractedContent, opts?: MarkdownFormatOptions, meta?: PageMeta): string;
208
210
  }
209
-
210
- /**
211
- * Type-safe interface for the stateless bundle extraction API
212
- */
213
-
211
+ //#endregion
212
+ //#region src/bundle-types.d.ts
214
213
  type ExtractionMethod = 'extractStructure' | 'extractRegion' | 'extractContent' | 'extractInteractive' | 'extractFull';
215
214
  interface BaseExtractionArgs {
216
- frameSelector?: string;
217
- formatOptions?: MarkdownFormatOptions;
215
+ frameSelector?: string;
216
+ formatOptions?: MarkdownFormatOptions;
218
217
  }
219
218
  interface ExtractStructureArgs extends BaseExtractionArgs {
220
- selector?: string;
219
+ selector?: string;
221
220
  }
222
221
  interface ExtractRegionArgs extends BaseExtractionArgs {
223
- selector: string;
224
- mode?: 'interactive' | 'full';
225
- options?: Partial<ExtractionOptions>;
222
+ selector: string;
223
+ mode?: 'interactive' | 'full';
224
+ options?: Partial<ExtractionOptions>;
226
225
  }
227
226
  interface ExtractContentArgs extends BaseExtractionArgs {
228
- selector: string;
229
- options?: ContentExtractionOptions;
227
+ selector: string;
228
+ options?: ContentExtractionOptions;
230
229
  }
231
230
  interface ExtractInteractiveArgs extends BaseExtractionArgs {
232
- selector?: string;
233
- options?: Partial<ExtractionOptions>;
231
+ selector?: string;
232
+ options?: Partial<ExtractionOptions>;
234
233
  }
235
234
  interface ExtractFullArgs extends BaseExtractionArgs {
236
- selector?: string;
237
- options?: Partial<ExtractionOptions>;
235
+ selector?: string;
236
+ options?: Partial<ExtractionOptions>;
238
237
  }
239
238
  type ExtractionArgs = {
240
- extractStructure: ExtractStructureArgs;
241
- extractRegion: ExtractRegionArgs;
242
- extractContent: ExtractContentArgs;
243
- extractInteractive: ExtractInteractiveArgs;
244
- extractFull: ExtractFullArgs;
239
+ extractStructure: ExtractStructureArgs;
240
+ extractRegion: ExtractRegionArgs;
241
+ extractContent: ExtractContentArgs;
242
+ extractInteractive: ExtractInteractiveArgs;
243
+ extractFull: ExtractFullArgs;
245
244
  };
246
245
  interface ExtractionError {
247
- error: string;
246
+ error: string;
248
247
  }
249
248
  type ExtractionResult = string | ExtractionError;
250
249
  interface SmartDOMReaderBundle {
251
- executeExtraction<M extends ExtractionMethod>(method: M, args: ExtractionArgs[M]): ExtractionResult;
250
+ executeExtraction<M extends ExtractionMethod>(method: M, args: ExtractionArgs[M]): ExtractionResult;
252
251
  }
253
252
  declare global {
254
- interface Window {
255
- SmartDOMReaderBundle: SmartDOMReaderBundle;
256
- }
253
+ interface Window {
254
+ SmartDOMReaderBundle: SmartDOMReaderBundle;
255
+ }
257
256
  }
258
-
257
+ //# sourceMappingURL=bundle-types.d.ts.map
258
+ //#endregion
259
+ //#region src/content-detection.d.ts
259
260
  declare class ContentDetection {
260
- /**
261
- * Find the main content area of a page
262
- * Inspired by dom-to-semantic-markdown's approach
263
- */
264
- static findMainContent(doc: Document): Element;
265
- /**
266
- * Detect main content using scoring algorithm
267
- */
268
- private static detectMainContent;
269
- /**
270
- * Collect content candidates
271
- */
272
- private static collectCandidates;
273
- /**
274
- * Calculate content score for an element
275
- */
276
- static calculateContentScore(element: Element): number;
277
- /**
278
- * Calculate link density in an element
279
- */
280
- private static calculateLinkDensity;
281
- /**
282
- * Check if an element is likely navigation
283
- */
284
- static isNavigation(element: Element): boolean;
285
- /**
286
- * Check if element is likely supplementary content
287
- */
288
- static isSupplementary(element: Element): boolean;
289
- /**
290
- * Detect page landmarks
291
- */
292
- static detectLandmarks(doc: Document): Record<string, Element[]>;
261
+ /**
262
+ * Find the main content area of a page
263
+ * Inspired by dom-to-semantic-markdown's approach
264
+ */
265
+ static findMainContent(doc: Document): Element;
266
+ /**
267
+ * Detect main content using scoring algorithm
268
+ */
269
+ private static detectMainContent;
270
+ /**
271
+ * Collect content candidates
272
+ */
273
+ private static collectCandidates;
274
+ /**
275
+ * Calculate content score for an element
276
+ */
277
+ static calculateContentScore(element: Element): number;
278
+ /**
279
+ * Calculate link density in an element
280
+ */
281
+ private static calculateLinkDensity;
282
+ /**
283
+ * Check if an element is likely navigation
284
+ */
285
+ static isNavigation(element: Element): boolean;
286
+ /**
287
+ * Check if element is likely supplementary content
288
+ */
289
+ static isSupplementary(element: Element): boolean;
290
+ /**
291
+ * Detect page landmarks
292
+ */
293
+ static detectLandmarks(doc: Document): Record<string, Element[]>;
293
294
  }
294
-
295
+ //#endregion
296
+ //#region src/progressive.d.ts
295
297
  type SmartDomReaderCtor = new (options?: Partial<ExtractionOptions>) => SmartDOMReader;
296
298
  declare class ProgressiveExtractor {
297
- /**
298
- * Step 1: Extract high-level structural overview
299
- * This provides a "map" of the page for the AI to understand structure
300
- */
301
- static extractStructure(root: Document | Element): StructuralOverview;
302
- /**
303
- * Step 2: Extract detailed information from a specific region
304
- */
305
- static extractRegion(selector: string, doc: Document, options?: Partial<ExtractionOptions>, smartDomReaderCtor?: SmartDomReaderCtor): SmartDOMResult | null;
306
- /**
307
- * Step 3: Extract readable content from a region
308
- */
309
- static extractContent(selector: string, doc: Document, options?: ContentExtractionOptions): ExtractedContent | null;
310
- /**
311
- * Analyze a region and extract summary information
312
- */
313
- private static analyzeRegion;
314
- /**
315
- * Extract overview of forms on the page
316
- */
317
- private static extractFormOverview;
318
- /**
319
- * Calculate summary statistics
320
- */
321
- private static calculateSummary;
322
- /**
323
- * Generate AI-friendly suggestions
324
- */
325
- private static generateSuggestions;
326
- /**
327
- * Get text content with optional truncation
328
- */
329
- private static getTextContent;
299
+ /**
300
+ * Step 1: Extract high-level structural overview
301
+ * This provides a "map" of the page for the AI to understand structure
302
+ */
303
+ static extractStructure(root: Document | Element): StructuralOverview;
304
+ /**
305
+ * Step 2: Extract detailed information from a specific region
306
+ */
307
+ static extractRegion(selector: string, doc: Document, options?: Partial<ExtractionOptions>, smartDomReaderCtor?: SmartDomReaderCtor): SmartDOMResult | null;
308
+ /**
309
+ * Step 3: Extract readable content from a region
310
+ */
311
+ static extractContent(selector: string, doc: Document, options?: ContentExtractionOptions): ExtractedContent | null;
312
+ /**
313
+ * Analyze a region and extract summary information
314
+ */
315
+ private static analyzeRegion;
316
+ /**
317
+ * Extract overview of forms on the page
318
+ */
319
+ private static extractFormOverview;
320
+ /**
321
+ * Calculate summary statistics
322
+ */
323
+ private static calculateSummary;
324
+ /**
325
+ * Generate AI-friendly suggestions
326
+ */
327
+ private static generateSuggestions;
328
+ /**
329
+ * Get text content with optional truncation
330
+ */
331
+ private static getTextContent;
330
332
  }
331
-
333
+ //#endregion
334
+ //#region src/selectors.d.ts
332
335
  declare class SelectorGenerator {
333
- /**
334
- * Generate multiple selector strategies for an element
335
- */
336
- static generateSelectors(element: Element): ElementSelector;
337
- /**
338
- * Generate a unique CSS selector for an element
339
- */
340
- private static generateCSSSelector;
341
- /**
342
- * Generate XPath for an element
343
- */
344
- private static generateXPath;
345
- /**
346
- * Generate a text-based selector for buttons and links
347
- */
348
- private static generateTextBasedSelector;
349
- /**
350
- * Get data-testid or similar attributes
351
- */
352
- private static getDataTestId;
353
- /**
354
- * Check if an ID is unique in the document
355
- */
356
- private static isUniqueId;
357
- /**
358
- * Check if a selector is unique within a container
359
- */
360
- private static isUniqueSelector;
361
- private static isUniqueSelectorSafe;
362
- /**
363
- * Get meaningful classes (filtering out utility classes)
364
- */
365
- private static getMeaningfulClasses;
366
- /**
367
- * Optimize the selector path by removing unnecessary parts
368
- */
369
- private static optimizePath;
370
- /**
371
- * Get a human-readable path description
372
- */
373
- static getContextPath(element: Element): string[];
336
+ /**
337
+ * Generate multiple selector strategies for an element
338
+ */
339
+ static generateSelectors(element: Element): ElementSelector;
340
+ /**
341
+ * Generate a unique CSS selector for an element
342
+ */
343
+ private static generateCSSSelector;
344
+ /**
345
+ * Generate XPath for an element
346
+ */
347
+ private static generateXPath;
348
+ /**
349
+ * Generate a text-based selector for buttons and links
350
+ */
351
+ private static generateTextBasedSelector;
352
+ /**
353
+ * Get data-testid or similar attributes
354
+ */
355
+ private static getDataTestId;
356
+ /**
357
+ * Check if an ID is unique in the document
358
+ */
359
+ private static isUniqueId;
360
+ /**
361
+ * Check if a selector is unique within a container
362
+ */
363
+ private static isUniqueSelector;
364
+ private static isUniqueSelectorSafe;
365
+ /**
366
+ * Get meaningful classes (filtering out utility classes)
367
+ */
368
+ private static getMeaningfulClasses;
369
+ /**
370
+ * Optimize the selector path by removing unnecessary parts
371
+ */
372
+ private static optimizePath;
373
+ /**
374
+ * Get a human-readable path description
375
+ */
376
+ static getContextPath(element: Element): string[];
374
377
  }
375
-
378
+ //#endregion
379
+ //#region src/index.d.ts
376
380
  /**
377
381
  * Smart DOM Reader - Full Extraction Approach
378
382
  *
@@ -387,81 +391,82 @@ declare class SelectorGenerator {
387
391
  * - Returns complete structured data immediately
388
392
  */
389
393
  declare class SmartDOMReader {
390
- private options;
391
- constructor(options?: Partial<ExtractionOptions>);
392
- /**
393
- * Main extraction method - extracts all data in one pass
394
- * @param rootElement The document or element to extract from
395
- * @param runtimeOptions Options to override constructor options
396
- */
397
- extract(rootElement?: Document | Element, runtimeOptions?: Partial<ExtractionOptions>): SmartDOMResult;
398
- /**
399
- * Extract page state information
400
- */
401
- private extractPageState;
402
- /**
403
- * Extract page landmarks
404
- */
405
- private extractLandmarks;
406
- /**
407
- * Convert elements to selector strings
408
- */
409
- private elementsToSelectors;
410
- /**
411
- * Extract interactive elements
412
- */
413
- private extractInteractiveElements;
414
- /**
415
- * Extract form information
416
- */
417
- private extractForms;
418
- /**
419
- * Extract semantic elements (full mode only)
420
- */
421
- private extractSemanticElements;
422
- /**
423
- * Extract metadata
424
- */
425
- private extractMetadata;
426
- /**
427
- * Check if element should be included based on options
428
- */
429
- private shouldIncludeElement;
430
- /**
431
- * Detect errors on the page
432
- */
433
- private detectErrors;
434
- /**
435
- * Detect if page is loading
436
- */
437
- private detectLoading;
438
- /**
439
- * Detect modal dialogs
440
- */
441
- private detectModals;
442
- /**
443
- * Get currently focused element
444
- */
445
- private getFocusedElement;
446
- /**
447
- * Quick extraction for interactive elements only
448
- * @param doc The document to extract from
449
- * @param options Extraction options
450
- */
451
- static extractInteractive(doc: Document, options?: Partial<ExtractionOptions>): SmartDOMResult;
452
- /**
453
- * Quick extraction for full content
454
- * @param doc The document to extract from
455
- * @param options Extraction options
456
- */
457
- static extractFull(doc: Document, options?: Partial<ExtractionOptions>): SmartDOMResult;
458
- /**
459
- * Extract from a specific element
460
- * @param element The element to extract from
461
- * @param mode The extraction mode
462
- * @param options Additional options
463
- */
464
- static extractFromElement(element: Element, mode?: ExtractionMode, options?: Partial<ExtractionOptions>): SmartDOMResult;
394
+ private options;
395
+ constructor(options?: Partial<ExtractionOptions>);
396
+ /**
397
+ * Main extraction method - extracts all data in one pass
398
+ * @param rootElement The document or element to extract from
399
+ * @param runtimeOptions Options to override constructor options
400
+ */
401
+ extract(rootElement?: Document | Element, runtimeOptions?: Partial<ExtractionOptions>): SmartDOMResult;
402
+ /**
403
+ * Extract page state information
404
+ */
405
+ private extractPageState;
406
+ /**
407
+ * Extract page landmarks
408
+ */
409
+ private extractLandmarks;
410
+ /**
411
+ * Convert elements to selector strings
412
+ */
413
+ private elementsToSelectors;
414
+ /**
415
+ * Extract interactive elements
416
+ */
417
+ private extractInteractiveElements;
418
+ /**
419
+ * Extract form information
420
+ */
421
+ private extractForms;
422
+ /**
423
+ * Extract semantic elements (full mode only)
424
+ */
425
+ private extractSemanticElements;
426
+ /**
427
+ * Extract metadata
428
+ */
429
+ private extractMetadata;
430
+ /**
431
+ * Check if element should be included based on options
432
+ */
433
+ private shouldIncludeElement;
434
+ /**
435
+ * Detect errors on the page
436
+ */
437
+ private detectErrors;
438
+ /**
439
+ * Detect if page is loading
440
+ */
441
+ private detectLoading;
442
+ /**
443
+ * Detect modal dialogs
444
+ */
445
+ private detectModals;
446
+ /**
447
+ * Get currently focused element
448
+ */
449
+ private getFocusedElement;
450
+ /**
451
+ * Quick extraction for interactive elements only
452
+ * @param doc The document to extract from
453
+ * @param options Extraction options
454
+ */
455
+ static extractInteractive(doc: Document, options?: Partial<ExtractionOptions>): SmartDOMResult;
456
+ /**
457
+ * Quick extraction for full content
458
+ * @param doc The document to extract from
459
+ * @param options Extraction options
460
+ */
461
+ static extractFull(doc: Document, options?: Partial<ExtractionOptions>): SmartDOMResult;
462
+ /**
463
+ * Extract from a specific element
464
+ * @param element The element to extract from
465
+ * @param mode The extraction mode
466
+ * @param options Additional options
467
+ */
468
+ static extractFromElement(element: Element, mode?: ExtractionMode, options?: Partial<ExtractionOptions>): SmartDOMResult;
465
469
  }
466
-
467
- export { ContentDetection, type ContentExtractionOptions, type ElementContext, type ElementInteraction, type ElementSelector, type ElementSelectorCandidate, type ExtractContentArgs, type ExtractFullArgs, type ExtractInteractiveArgs, type ExtractRegionArgs, type ExtractStructureArgs, type ExtractedContent, type ExtractedElement, type ExtractionArgs, type ExtractionMethod, type ExtractionMode, type ExtractionOptions, type ExtractionResult, type FilterOptions, type FormInfo, type MarkdownFormatOptions, MarkdownFormatter, type PageLandmarks, type PageState, ProgressiveExtractor, type RegionInfo, SelectorGenerator, SmartDOMReader, type SmartDOMResult, type StructuralOverview, SmartDOMReader as default };
470
+ //#endregion
471
+ export { ContentDetection, ContentExtractionOptions, ElementContext, ElementInteraction, ElementSelector, ElementSelectorCandidate, type ExtractContentArgs, type ExtractFullArgs, type ExtractInteractiveArgs, type ExtractRegionArgs, type ExtractStructureArgs, ExtractedContent, ExtractedElement, type ExtractionArgs, type ExtractionMethod, ExtractionMode, ExtractionOptions, type ExtractionResult, FilterOptions, FormInfo, type MarkdownFormatOptions, MarkdownFormatter, PageLandmarks, PageState, ProgressiveExtractor, RegionInfo, SelectorGenerator, SmartDOMReader, SmartDOMReader as default, SmartDOMResult, StructuralOverview };
472
+ //# sourceMappingURL=index.d.ts.map