defuddle 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -15
- package/dist/constants.js +8 -0
- package/dist/constants.js.map +1 -1
- package/dist/defuddle.d.ts +11 -0
- package/dist/defuddle.js +207 -24
- package/dist/defuddle.js.map +1 -1
- package/dist/extractors/hackernews.js +6 -5
- package/dist/extractors/hackernews.js.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.full.js +1 -1
- package/dist/index.js +1 -1
- package/dist/markdown.js +5 -15
- package/dist/markdown.js.map +1 -1
- package/dist/scoring.d.ts +3 -2
- package/dist/scoring.js +48 -12
- package/dist/scoring.js.map +1 -1
- package/dist/standardize.js +40 -10
- package/dist/standardize.js.map +1 -1
- package/dist/types.d.ts +35 -0
- package/dist/utils/dom.d.ts +5 -0
- package/dist/utils/dom.js +14 -0
- package/dist/utils/dom.js.map +1 -1
- package/dist/utils.d.ts +2 -1
- package/dist/utils.js +6 -2
- package/dist/utils.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -133,6 +133,7 @@ Defuddle returns an object with the following properties:
|
|
|
133
133
|
| `schemaOrgData` | object | Raw schema.org data extracted from the page |
|
|
134
134
|
| `title` | string | Title of the article |
|
|
135
135
|
| `wordCount` | number | Total number of words in the extracted content |
|
|
136
|
+
| `debug` | object | Debug info including content selector and removals (when `debug: true`) |
|
|
136
137
|
|
|
137
138
|
## Bundles
|
|
138
139
|
|
|
@@ -148,29 +149,20 @@ The core bundle is recommended for most use cases. It still handles math content
|
|
|
148
149
|
|
|
149
150
|
| Option | Type | Default | Description |
|
|
150
151
|
| ------------------------ | ------- | ------- | ------------------------------------------------------------------------- |
|
|
151
|
-
| `debug` | boolean | false | Enable debug logging
|
|
152
|
+
| `debug` | boolean | false | Enable debug logging and return debug info in the response |
|
|
152
153
|
| `url` | string | | URL of the page being parsed |
|
|
153
154
|
| `markdown` | boolean | false | Convert `content` to Markdown |
|
|
154
155
|
| `separateMarkdown` | boolean | false | Keep `content` as HTML and return `contentMarkdown` as Markdown |
|
|
155
156
|
| `removeExactSelectors` | boolean | true | Remove elements matching exact selectors like ads, social buttons, etc. |
|
|
156
157
|
| `removePartialSelectors` | boolean | true | Remove elements matching partial selectors like ads, social buttons, etc. |
|
|
158
|
+
| `removeHiddenElements` | boolean | true | Remove elements hidden via CSS (display:none, visibility:hidden, etc.) |
|
|
159
|
+
| `removeLowScoring` | boolean | true | Remove non-content blocks by scoring (navigation, link lists, etc.) |
|
|
160
|
+
| `removeSmallImages` | boolean | true | Remove small images (icons, tracking pixels, etc.) |
|
|
157
161
|
| `removeImages` | boolean | false | Remove images. |
|
|
162
|
+
| `standardize` | boolean | true | Standardize HTML (footnotes, headings, code blocks, etc.) |
|
|
163
|
+
| `contentSelector` | string | | CSS selector to use as the main content element, bypassing auto-detection |
|
|
158
164
|
| `useAsync` | boolean | true | Allow async extractors to fetch from third-party APIs when no local content is available. |
|
|
159
165
|
|
|
160
|
-
### Debug mode
|
|
161
|
-
|
|
162
|
-
You can enable debug mode by passing an options object when creating a new Defuddle instance:
|
|
163
|
-
|
|
164
|
-
```typescript
|
|
165
|
-
const article = new Defuddle(document, { debug: true }).parse();
|
|
166
|
-
```
|
|
167
|
-
|
|
168
|
-
- More verbose console logging about the parsing process
|
|
169
|
-
- Preserves HTML class and id attributes that are normally stripped
|
|
170
|
-
- Retains all data-* attributes
|
|
171
|
-
- Skips div flattening to preserve document structure
|
|
172
|
-
|
|
173
|
-
|
|
174
166
|
## HTML standardization
|
|
175
167
|
|
|
176
168
|
Defuddle attempts to standardize HTML elements to provide a consistent input for subsequent manipulation such as conversion to Markdown.
|
|
@@ -242,3 +234,68 @@ npm run build
|
|
|
242
234
|
When using `parseAsync()`, if no content can be extracted from the local HTML, Defuddle may fetch content from third-party APIs as a fallback. This only happens when the page HTML contains no usable content (e.g. client-side rendered SPAs). You can disable this by setting `useAsync: false` in options.
|
|
243
235
|
|
|
244
236
|
- [FxTwitter API](https://github.com/FixTweet/FxTwitter) — Used to extract X (Twitter) article content, which is not available in server-rendered HTML.
|
|
237
|
+
|
|
238
|
+
## Debugging
|
|
239
|
+
|
|
240
|
+
### Debug mode
|
|
241
|
+
|
|
242
|
+
You can enable debug mode by passing an options object when creating a new Defuddle instance:
|
|
243
|
+
|
|
244
|
+
```typescript
|
|
245
|
+
const result = new Defuddle(document, { debug: true }).parse();
|
|
246
|
+
|
|
247
|
+
// Access debug info
|
|
248
|
+
console.log(result.debug.contentSelector); // CSS selector path of chosen main content element
|
|
249
|
+
console.log(result.debug.removals); // Array of removed elements with reasons
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
When debug mode is enabled:
|
|
253
|
+
|
|
254
|
+
- Returns a `debug` field in the response with detailed information about content extraction
|
|
255
|
+
- More verbose console logging about the parsing process
|
|
256
|
+
- Preserves HTML class and id attributes that are normally stripped
|
|
257
|
+
- Retains all data-* attributes
|
|
258
|
+
- Skips div flattening to preserve document structure
|
|
259
|
+
|
|
260
|
+
The `debug` field contains:
|
|
261
|
+
|
|
262
|
+
| Property | Type | Description |
|
|
263
|
+
|----------|------|-------------|
|
|
264
|
+
| `contentSelector` | string | CSS selector path of the chosen main content element |
|
|
265
|
+
| `removals` | array | List of elements removed during processing |
|
|
266
|
+
|
|
267
|
+
Each removal entry contains:
|
|
268
|
+
|
|
269
|
+
| Property | Type | Description |
|
|
270
|
+
|----------|------|-------------|
|
|
271
|
+
| `step` | string | Pipeline step that removed the element (e.g. `removeLowScoring`, `removeBySelector`, `removeHiddenElements`) |
|
|
272
|
+
| `selector` | string | CSS selector or pattern that matched (for selector-based removal) |
|
|
273
|
+
| `reason` | string | Why the element was removed (e.g. `score: -20`, `display:none`) |
|
|
274
|
+
| `text` | string | First 200 characters of the removed element's text content |
|
|
275
|
+
|
|
276
|
+
### Pipeline toggles
|
|
277
|
+
|
|
278
|
+
You can disable individual pipeline steps to diagnose content extraction issues:
|
|
279
|
+
|
|
280
|
+
```typescript
|
|
281
|
+
// Skip content scoring to see if it's removing content incorrectly
|
|
282
|
+
const result = new Defuddle(document, { removeLowScoring: false }).parse();
|
|
283
|
+
|
|
284
|
+
// Skip hidden element removal (useful for CSS sidenote layouts)
|
|
285
|
+
const result = new Defuddle(document, { removeHiddenElements: false }).parse();
|
|
286
|
+
|
|
287
|
+
// Skip small image removal
|
|
288
|
+
const result = new Defuddle(document, { removeSmallImages: false }).parse();
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
### Content selector
|
|
292
|
+
|
|
293
|
+
Use `contentSelector` to bypass Defuddle's auto-detection and specify the main content element directly:
|
|
294
|
+
|
|
295
|
+
```typescript
|
|
296
|
+
const result = new Defuddle(document, {
|
|
297
|
+
contentSelector: 'article.post-content'
|
|
298
|
+
}).parse();
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
If the selector doesn't match any element, Defuddle falls back to auto-detection.
|
package/dist/constants.js
CHANGED
|
@@ -220,6 +220,7 @@ exports.PARTIAL_SELECTORS = [
|
|
|
220
220
|
'activitypub',
|
|
221
221
|
'actioncall',
|
|
222
222
|
'addcomment',
|
|
223
|
+
'addtoany',
|
|
223
224
|
'advert',
|
|
224
225
|
// '-ad-', howtogeek.com
|
|
225
226
|
'adlayout',
|
|
@@ -274,6 +275,7 @@ exports.PARTIAL_SELECTORS = [
|
|
|
274
275
|
'articletopics',
|
|
275
276
|
'article-topics',
|
|
276
277
|
// 'article-type',
|
|
278
|
+
'article-actions',
|
|
277
279
|
'article--lede', // The Verge
|
|
278
280
|
'articlewell',
|
|
279
281
|
'associated-people',
|
|
@@ -372,6 +374,7 @@ exports.PARTIAL_SELECTORS = [
|
|
|
372
374
|
'donate',
|
|
373
375
|
'donation',
|
|
374
376
|
'dropdown', // Ars Technica
|
|
377
|
+
'element-invisible',
|
|
375
378
|
'eletters',
|
|
376
379
|
'emailsignup',
|
|
377
380
|
'emoji-bar',
|
|
@@ -411,6 +414,7 @@ exports.PARTIAL_SELECTORS = [
|
|
|
411
414
|
'frontmatter',
|
|
412
415
|
'further-reading',
|
|
413
416
|
'fullbleedheader',
|
|
417
|
+
'gallery-count',
|
|
414
418
|
'gated-',
|
|
415
419
|
'gh-feed',
|
|
416
420
|
'gist-meta',
|
|
@@ -439,6 +443,7 @@ exports.PARTIAL_SELECTORS = [
|
|
|
439
443
|
'itemendrow',
|
|
440
444
|
'invisible',
|
|
441
445
|
'jp-no-solution',
|
|
446
|
+
'jp-relatedposts',
|
|
442
447
|
'jswarning',
|
|
443
448
|
'js-warning',
|
|
444
449
|
'jumplink',
|
|
@@ -482,6 +487,7 @@ exports.PARTIAL_SELECTORS = [
|
|
|
482
487
|
'might-like',
|
|
483
488
|
'minibio',
|
|
484
489
|
'more-about',
|
|
490
|
+
'mod-paywall',
|
|
485
491
|
'_modal',
|
|
486
492
|
'-modal',
|
|
487
493
|
'more-',
|
|
@@ -623,6 +629,7 @@ exports.PARTIAL_SELECTORS = [
|
|
|
623
629
|
'related',
|
|
624
630
|
'relevant',
|
|
625
631
|
'reversefootnote',
|
|
632
|
+
'robots-nocontent',
|
|
626
633
|
'_rss',
|
|
627
634
|
'rss-link',
|
|
628
635
|
'screen-reader-text',
|
|
@@ -730,6 +737,7 @@ exports.PARTIAL_SELECTORS = [
|
|
|
730
737
|
'u-hide',
|
|
731
738
|
'upsell',
|
|
732
739
|
'viewbottom',
|
|
740
|
+
'yarpp-related',
|
|
733
741
|
'visually-hidden',
|
|
734
742
|
'welcomebox',
|
|
735
743
|
'widget_pages',
|
package/dist/constants.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"constants.js","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":";;;AAAA,uBAAuB;AACvB,oEAAoE;AACvD,QAAA,oBAAoB,GAAG;IACnC,OAAO;IACP,eAAe;IACf,kBAAkB;IAClB,kBAAkB;IAClB,eAAe;IACf,kBAAkB;IAClB,gBAAgB;IAChB,kBAAkB;IAClB,kBAAkB;IAClB,OAAO;IACP,gBAAgB;IAChB,SAAS;IACT,kBAAkB;IAClB,MAAM;IACN,eAAe;IACf,UAAU;IACV,MAAM,CAAC,kCAAkC;CACzC,CAAC;AAEW,QAAA,YAAY,GAAG,GAAG,CAAC;AACnB,QAAA,cAAc,GAAG,CAAC,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;AAEnH,wCAAwC;AAC3B,QAAA,iBAAiB,GAAG,IAAI,GAAG,CAAC;IACxC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;IAC1D,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;IAClC,QAAQ,EAAE,YAAY,EAAE,SAAS;IACjC,SAAS,EAAE,SAAS;IACpB,YAAY;IACZ,MAAM,EAAE,UAAU;CAClB,CAAC,CAAC;AAEH,+CAA+C;AAClC,QAAA,eAAe,GAAG,IAAI,GAAG,CAAC;IACtC,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO;IACjE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,eAAe,EAAE,MAAM;IACxF,MAAM;CACN,CAAC,CAAC;AAEH,0BAA0B;AACb,QAAA,eAAe,GAAG;IAC9B,kBAAkB;IAClB,UAAU;IACV,6BAA6B;IAC7B,OAAO;IACP,MAAM;IACN,MAAM;IAEN,MAAM;IACN,8BAA8B;IAC9B,kBAAkB;IAClB,kBAAkB;IAClB,eAAe;IACf,eAAe;IACf,mBAAmB;IACnB,mBAAmB;IACnB,QAAQ;IACR,QAAQ;IACR,eAAe,EAAE,SAAS;IAC1B,QAAQ;IAER,WAAW;IACX,mBAAmB;IACnB,kBAAkB;IAElB,eAAe;IACf,sBAAsB;IACtB,mBAAmB;IAEnB,cAAc;IACd,QAAQ;IACR,sBAAsB;IACtB,SAAS;IACT,SAAS;IACT,SAAS;IACT,SAAS;IACT,KAAK;IACL,aAAa;IACb,aAAa;IACb,6BAA6B;IAC7B,uBAAuB;IACvB,mBAAmB;IACnB,2BAA2B;IAC3B,yBAAyB;IACzB,OAAO;IACP,6BAA6B;IAC7B,UAAU;IACV,0BAA0B;IAC1B,WAAW;IAEX,WAAW;IACX,SAAS;IACT,SAAS;IACT,iBAAiB;IACjB,aAAa;IACb,cAAc;IACd,OAAO;IACP,OAAO;IACP,aAAa;IACb,aAAa;IACb,OAAO;IACP,OAAO;IACP,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,WAAW;IACX,WAAW;IACX,QAAQ;IACR,QAAQ;IACR,aAAa;IACb,2CAA2C;IAC3C,6CAA6C;IAC7C,iBAAiB;IACjB,kBAAkB;IAClB,yCAAyC;IACzC,oBAAoB;IACpB,oBAAoB;IACpB,mBAAmB;IACnB,gBAAgB;IAChB,eAAe;IACf,eAAe;IACf,uBAAuB;IACvB,mBAAmB;IACnB,wBAAwB;IACxB,wBAAwB;IACxB,iBAAiB;IACjB,iBAAiB;IAEjB,SAAS;IACT,QAAQ;IAER,0BAA0B;IAC1B,QAAQ;IACR,+BAA+B;IAC/B,QAAQ;IACP,mCAAmC;IACpC,QAAQ;IACR,MAAM;IACN,QAAQ;IACR,UAAU;IACV,MAAM;IACN,8BAA8B;IAC9B,OAAO;IACP,QAAQ;IACR,QAAQ;IACR,kBAAkB;IAClB,iBAAiB;IACjB,UAAU;IACV,4BAA4B;IAC5B,qCAAqC;IAErC,SAAS;IACT,UAAU;IACV,2CAA2C;IAC3C,+CAA+C;IAC/C,8CAA8C;IAC9C,+BAA+B;IAC/B,8BAA8B;IAC9B,SAAS;IACT,YAAY;IAEZ,UAAU;IACV,kBAAkB;IAClB,6IAA6I;IAE7I,QAAQ;IACR,kBAAkB;IAClB,OAAO;IACP,OAAO;IAEP,aAAa;IACb,aAAa;IACb,aAAa;IACb,YAAY;IAEZ,mBAAmB;IACnB,UAAU;IACV,8BAA8B;IAC9B,6BAA6B;IAE7B,uBAAuB;IACvB,6BAA6B;IAC7B,sDAAsD;IACtD,iCAAiC;IACjC,4CAA4C;IAE5C,aAAa;IACb,mCAAmC;IAEnC,UAAU;IACV,UAAU;IACV,UAAU;IACV,UAAU;IACV,UAAU;IACV,WAAW;IACX,UAAU;IAEV,aAAa;IACb,4BAA4B;IAC5B,wBAAwB;IAExB,QAAQ;IACR,YAAY;IACZ,YAAY;IACZ,aAAa;IACb,YAAY;IACZ,MAAM;IACN,OAAO;IACP,SAAS;IACT,eAAe,EAAE,MAAM;IACvB,gCAAgC;IAChC,oDAAoD,EAAE,iBAAiB;IACvE,eAAe;IACf,gDAAgD,EAAE,gBAAgB;IAClE,+BAA+B;IAC/B,mBAAmB,EAAE,SAAS;IAC9B,uCAAuC,EAAE,SAAS;CAClD,CAAC;AAEF,iDAAiD;AACpC,QAAA,eAAe,GAAG;IAC9B,OAAO;IACP,IAAI;IACJ,WAAW;IACX,aAAa;IACb,cAAc;IACd,SAAS;IACT,SAAS;CACT,CAAC;AAEF,mDAAmD;AACnD,4CAA4C;AAC/B,QAAA,iBAAiB,GAAG;IAChC,aAAa;IACb,aAAa;IACb,aAAa;IACb,YAAY;IACZ,YAAY;IACZ,QAAQ;IACT,wBAAwB;IACvB,UAAU;IACV,SAAS;IACT,cAAc;IACd,eAAe;IACf,MAAM;IACN,eAAe;IACf,oBAAoB;IACpB,WAAW;IACX,UAAU;IACV,SAAS;IACT,WAAW;IACZ,cAAc;IACb,UAAU;IACV,gBAAgB;IAChB,aAAa;IACb,gBAAgB;IAChB,gBAAgB;IAChB,gBAAgB;IAChB,wBAAwB;IACxB,gBAAgB;IAChB,kBAAkB;IAClB,cAAc;IACd,kBAAkB;IAClB,eAAe;IACf,cAAc;IACd,cAAc;IACd,cAAc;IACd,gBAAgB;IAChB,gBAAgB;IAChB,iBAAiB;IACjB,eAAe;IACf,eAAe;IACf,cAAc;IACd,cAAc;IACd,cAAc;IACd,eAAe;IACf,aAAa;IACb,iBAAiB;IACjB,iBAAiB;IACjB,iBAAiB;IACjB,mBAAmB;IACnB,gBAAgB;IAChB,iBAAiB;IACjB,aAAa;IACb,cAAc;IACd,cAAc;IACd,cAAc;IACd,eAAe;IACf,eAAe;IACf,eAAe;IACf,gBAAgB;IACjB,kBAAkB;IACjB,eAAe,EAAE,YAAY;IAC7B,aAAa;IACb,mBAAmB;IACnB,YAAY;IACb,kBAAkB;IAClB,aAAa;IACZ,YAAY;IACZ,YAAY;IACZ,aAAa;IACb,aAAa;IACb,SAAS;IACT,iBAAiB;IACjB,aAAa;IACb,qBAAqB;IACrB,aAAa;IACb,QAAQ;IAER,aAAa;IACb,oBAAoB;IACpB,mBAAmB;IACpB,YAAY;IACX,WAAW;IACX,QAAQ;IACR,YAAY;IACZ,WAAW;IACX,WAAW;IACX,YAAY;IACZ,WAAW;IACX,mBAAmB;IACnB,gBAAgB;IAChB,WAAW;IACX,QAAQ;IACR,YAAY;IACZ,UAAU;IACV,gBAAgB;IAChB,mBAAmB;IACnB,MAAM;IACN,MAAM;IACN,QAAQ;IAER,SAAS;IACT,WAAW;IACX,YAAY;IACZ,WAAW;IACZ,cAAc;IACb,mBAAmB;IACnB,oBAAoB;IACpB,YAAY;IACZ,UAAU;IACV,aAAa;IACb,aAAa;IACb,cAAc;IACd,cAAc,EAAE,gBAAgB;IAChC,aAAa;IACb,UAAU;IACX,kCAAkC;IACjC,YAAY;IACZ,gBAAgB;IAChB,aAAa;IACb,iBAAiB;IACjB,eAAe;IACf,cAAc;IACd,gBAAgB;IAChB,iBAAiB;IACjB,gBAAgB;IAChB,cAAc;IACd,eAAe;IACf,SAAS;IACT,UAAU;IACV,cAAc,EAAE,YAAY;IAC5B,gBAAgB;IAChB,cAAc;IACd,aAAa;IACb,gBAAgB,EAAE,UAAU;IAC5B,iBAAiB;IACjB,aAAa;IACb,aAAa;IACb,YAAY;IACZ,cAAc;IACd,mBAAmB;IACnB,aAAa;IACb,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,eAAe,EAAE,aAAa;IAC9B,oBAAoB;IAEpB,UAAU;IACV,YAAY;IACZ,aAAa;IACb,UAAU;IACX,YAAY;IACX,YAAY;IACZ,YAAY;IACZ,YAAY;IACZ,UAAU;IACV,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,UAAU,EAAE,eAAe;IAE3B,UAAU;IACV,aAAa;IACb,WAAW;IACX,mBAAmB;IACnB,cAAc;IACd,mBAAmB;IACnB,kBAAkB;IAClB,YAAY;IACb,gBAAgB;IACf,aAAa;IACb,eAAe;IACf,QAAQ;IACR,QAAQ;IACR,SAAS;IACT,eAAe;IACf,iBAAiB;IACjB,0BAA0B,EAAE,iBAAiB;IAC7C,gBAAgB;IAChB,aAAa;IAEb,UAAU;IACV,WAAW;IACX,UAAU;IACV,kBAAkB;IAClB,cAAc;IACd,UAAU;IACV,YAAY;IACZ,qBAAqB;IACrB,WAAW;IACX,cAAc;IACf,YAAY;IACX,UAAU;IACV,QAAQ;IACR,eAAe;IACf,cAAc;IACd,YAAY;IACZ,SAAS;IACT,aAAa;IACb,iBAAiB;IACjB,iBAAiB;IAEjB,QAAQ;IACR,SAAS;IACT,WAAW;IACZ,YAAY;IACZ,YAAY;IACX,OAAO;IACP,YAAY;IAEZ,WAAW;IACX,aAAa;IACb,aAAa;IACb,gBAAgB,EAAE,YAAY;IAC/B,2BAA2B;IAC1B,WAAW;IACZ,aAAa;IACZ,gBAAgB;IAChB,YAAY;IACZ,qBAAqB;IACrB,cAAc;IACd,iBAAiB;IACjB,sBAAsB;IAEtB,UAAU;IACV,cAAc;IACd,sBAAsB;IACtB,WAAW;IACX,aAAa;IACb,YAAY;IACZ,WAAW;IAEX,gBAAgB;IAChB,WAAW;IACX,YAAY;IACZ,UAAU;IACV,UAAU;IACV,oBAAoB;IAEpB,aAAa;IACb,cAAc;IACd,cAAc;IACf,4CAA4C;IAC3C,cAAc;IACd,QAAQ;IAER,SAAS,EAAE,QAAQ;IACnB,SAAS;IACT,eAAe;IACf,aAAa;IACb,gBAAgB;IAChB,SAAS,EAAE,YAAY;IACvB,UAAU;IACV,UAAU;IACV,gBAAgB;IAChB,aAAa;IACb,UAAU;IACV,YAAY,EAAE,MAAM;IACpB,aAAa,EAAE,MAAM;IACrB,uBAAuB,EAAE,gBAAgB;IACzC,WAAW;IACX,UAAU;IACV,SAAS;IACT,UAAU;IACV,gBAAgB;IAChB,iBAAiB,EAAE,QAAQ;IAC3B,iBAAiB;IACjB,WAAW;IAEX,UAAU;IACV,WAAW;IACX,eAAe;IACf,OAAO;IACP,OAAO;IACR,+BAA+B;IAC9B,UAAU;IACV,YAAY;IACZ,SAAS;IACT,YAAY;IACZ,QAAQ;IACR,QAAQ;IACR,OAAO;IACP,UAAU;IACV,aAAa;IACb,cAAc;IACd,WAAW;IACX,aAAa;IACb,gBAAgB;IAChB,kBAAkB;IAClB,eAAe;IACf,cAAc;IAEd,MAAM;IACN,MAAM;IACP,YAAY;IACZ,gBAAgB;IACf,iBAAiB;IACjB,OAAO;IACP,aAAa;IACb,kBAAkB;IACnB,iCAAiC;IAChC,aAAa;IACb,kBAAkB;IAClB,qBAAqB;IACrB,iBAAiB;IACjB,mBAAmB;IACnB,kBAAkB;IAClB,kBAAkB;IAClB,mBAAmB;IACnB,WAAW;IACX,cAAc;IACd,UAAU;IACV,SAAS;IAET,gBAAgB;IAChB,sBAAsB,EAAE,eAAe;IACvC,aAAa;IACb,cAAc;IACf,aAAa;IAEZ,UAAU;IACV,aAAa;IACb,YAAY;IACZ,iBAAiB;IACjB,WAAW;IACX,aAAa;IACb,MAAM;IACN,SAAS;IACV,iBAAiB;IAChB,aAAa;IACd,aAAa;IACZ,aAAa;IACb,QAAQ;IACR,aAAa;IACb,aAAa;IACb,gBAAgB;IAChB,aAAa;IACb,UAAU;IACV,WAAW;IACX,WAAW;IACX,cAAc;IACd,YAAY;IACZ,UAAU;IACV,WAAW;IACX,WAAW;IACX,kBAAkB;IAClB,YAAY;IACZ,UAAU;IACV,WAAW;IACX,WAAW;IACX,WAAW;IACX,UAAU;IACV,WAAW;IACX,UAAU;IACV,iBAAiB;IACjB,UAAU;IACV,cAAc;IACd,aAAa;IACb,cAAc;IACd,cAAc;IACd,cAAc;IACd,SAAS;IACT,UAAU;IACV,UAAU;IACV,SAAS;IACT,UAAU;IACV,UAAU;IACV,WAAW;IACX,WAAW;IACX,YAAY;IACZ,YAAY;IACZ,aAAa;IACb,iBAAiB;IAClB,sCAAsC;IACrC,WAAW;IACX,UAAU;IACV,WAAW;IACX,WAAW;IACX,cAAc;IACd,iBAAiB;IACjB,YAAY;IACZ,cAAc;IACd,cAAc;IACd,gBAAgB;IAChB,kBAAkB;IAClB,SAAS;IACV,WAAW;IACV,eAAe;IACf,WAAW;IACX,WAAW;IACX,SAAS;IACT,UAAU;IACV,UAAU;IACV,cAAc;IACd,cAAc;IACd,kBAAkB;IAClB,iBAAiB,EAAE,SAAS;IAE5B,SAAS;IACT,SAAS;IACT,UAAU;IAEV,OAAO;IACP,gBAAgB;IAChB,WAAW;IACX,UAAU;IACV,WAAW;IACX,WAAW;IACX,WAAW;IACX,WAAW;IACX,cAAc;IACd,cAAc;IACd,cAAc;IACd,SAAS;IACT,iBAAiB;IACjB,YAAY;IACZ,aAAa;IACb,aAAa;IACb,WAAW;IACX,gBAAgB;IAChB,QAAQ;IACR,UAAU;IACV,SAAS;IACT,UAAU;IACV,iBAAiB;IACjB,MAAM;IACN,UAAU;IAEV,oBAAoB;IACpB,WAAW;IACX,WAAW;IACX,SAAS;IACT,SAAS;IACT,aAAa;IACb,eAAe;IAChB,WAAW;IACX,6BAA6B;IAC5B,WAAW;IACX,YAAY;IACZ,aAAa;IACb,YAAY;IACZ,YAAY;IACZ,aAAa;IACb,eAAe;IACf,gBAAgB;IAChB,cAAc;IACf,cAAc;IACb,iBAAiB;IACjB,iBAAiB;IACjB,WAAW;IACX,gBAAgB;IAChB,cAAc;IACd,UAAU;IACV,WAAW;IACX,cAAc;IACd,UAAU;IACV,UAAU;IACV,WAAW;IACX,YAAY;IACZ,aAAa;IACb,YAAY;IACZ,WAAW;IACX,WAAW;IACX,gBAAgB;IACjB,WAAW;IACV,cAAc;IACd,iBAAiB;IACjB,WAAW;IACX,aAAa;IACb,YAAY;IACZ,SAAS;IACT,WAAW;IACZ,YAAY;IACX,eAAe;IACf,aAAa;IACb,aAAa;IACb,kBAAkB;IAClB,WAAW;IACX,SAAS;IACT,kBAAkB;IAClB,SAAS;IACV,YAAY;IACX,QAAQ;IACT,YAAY;IACX,YAAY;IACZ,kBAAkB;IAClB,eAAe,EAAE,SAAS;IAC1B,YAAY;IACZ,kBAAkB,EAAE,SAAS;IAC7B,eAAe;IACf,SAAS;IACT,SAAS;IACV,eAAe;IACd,aAAa;IACb,kBAAkB;IAClB,eAAe;IAEf,OAAO;IACP,YAAY;IACZ,UAAU;IACV,UAAU;IACX,mBAAmB;IAClB,mBAAmB;IACnB,OAAO;IACR,mBAAmB;IAClB,cAAc;IACd,aAAa;IACb,WAAW;IACX,WAAW;IACX,cAAc;IACd,SAAS;IACT,SAAS;IACT,QAAQ;IACT,UAAU;IACT,eAAe;IACf,gBAAgB;IACjB,sBAAsB;IACrB,iBAAiB;IACjB,QAAQ;IACR,eAAe;IACf,cAAc;IACd,YAAY;IACZ,cAAc;IACf,iBAAiB;IAChB,aAAa;IACb,WAAW;IACX,UAAU;IACV,YAAY;IACZ,aAAa;IACb,eAAe;IACf,SAAS;IAET,QAAQ;IACR,QAAQ;IAER,YAAY;IACZ,iBAAiB;IACjB,YAAY;IACZ,cAAc;IACf,YAAY;CACX,CAAC;AAEF,wCAAwC;AAC3B,QAAA,0BAA0B,GAAG;IACzC,eAAe;IACf,eAAe;IACf,gBAAgB;IAChB,iBAAiB;IACjB,6BAA6B;IAC7B,6BAA6B;IAC7B,oBAAoB;IACpB,YAAY;IACZ,mBAAmB;IACnB,gBAAgB;IAChB,kBAAkB;IAClB,uBAAuB;IACvB,sBAAsB;IACtB,eAAe,EAAE,4BAA4B;IAC7C,eAAe,EAAE,qCAAqC;IACtD,sBAAsB;IACtB,qBAAqB;IACrB,mBAAmB,EAAE,WAAW;IAChC,kCAAkC,EAAE,WAAW;IAC/C,yBAAyB,EAAE,cAAc;IACzC,gBAAgB;IAChB,mBAAmB,EAAE,aAAa;IAClC,iBAAiB,EAAE,UAAU;CAC7B,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAEC,QAAA,uBAAuB,GAAG;IACtC,iBAAiB;IACjB,kBAAkB;IAClB,0BAA0B;IAC1B,2BAA2B;IAC3B,mBAAmB;IACnB,cAAc;IACd,eAAe;IACf,iCAAiC;IACjC,sBAAsB;IACtB,8BAA8B;IAC9B,+BAA+B;IAC/B,kCAAkC;IAClC,mBAAmB;IACnB,gBAAgB;IAChB,mDAAmD,EAAE,WAAW;IAChE,sBAAsB,CAAC,UAAU;CACjC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAEZ,wCAAwC;AACxC,qDAAqD;AACxC,QAAA,sBAAsB,GAAG,IAAI,GAAG,CAAC;IAC7C,MAAM;IACN,OAAO;IACP,MAAM;IACN,IAAI;IACJ,QAAQ;IACR,KAAK;IACL,MAAM;IACN,SAAS;IACT,OAAO;IACP,QAAQ;IACR,GAAG;IACH,IAAI;IACJ,QAAQ;IACR,KAAK;IACL,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,QAAQ;IACR,OAAO;IACP,MAAM;IACN,SAAS;IACT,SAAS;IACT,SAAS;IACT,UAAU;IACV,MAAM;IACN,QAAQ;IACR,MAAM;IACN,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,OAAO;IACP,KAAK;IACL,OAAO;IACP,KAAK;CACL,CAAC,CAAC;AAEH,qBAAqB;AACR,QAAA,kBAAkB,GAAG,IAAI,GAAG,CAAC;IACzC,KAAK;IACL,OAAO;IACP,iBAAiB;IACjB,YAAY;IACZ,SAAS;IACT,SAAS;IACT,UAAU;IACV,YAAY;IACZ,UAAU;IACV,aAAa;IACb,cAAc;IACd,WAAW;IACX,KAAK;IACL,SAAS;IACT,aAAa;IACb,SAAS;IACT,QAAQ;IACR,MAAM;IACN,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,SAAS;IACT,KAAK;IACL,SAAS;IACT,QAAQ;IACR,OAAO;IACP,MAAM;IACN,OAAO;IAEP,oBAAoB;IACpB,QAAQ;IACR,aAAa;IACb,OAAO;IACP,aAAa;IACb,aAAa;IACb,eAAe;IACf,YAAY;IACZ,mBAAmB;IACnB,OAAO;IACP,cAAc;IACd,OAAO;IACP,OAAO;IACP,cAAc;IACd,eAAe;IACf,QAAQ;IACR,UAAU;IACV,aAAa;IACb,SAAS;IACT,SAAS;IACT,eAAe;IACf,UAAU;IACV,UAAU;IACV,UAAU;IACV,YAAY;IACZ,SAAS;IACT,QAAQ;IACR,aAAa;IACb,WAAW;IACX,UAAU;IACV,WAAW;IACX,SAAS;IACT,OAAO;CACP,CAAC,CAAC;AACU,QAAA,wBAAwB,GAAG,IAAI,GAAG,CAAC;IAC/C,OAAO;IACP,IAAI;CACJ,CAAC,CAAC"}
|
|
1
|
+
{"version":3,"file":"constants.js","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":";;;AAAA,uBAAuB;AACvB,oEAAoE;AACvD,QAAA,oBAAoB,GAAG;IACnC,OAAO;IACP,eAAe;IACf,kBAAkB;IAClB,kBAAkB;IAClB,eAAe;IACf,kBAAkB;IAClB,gBAAgB;IAChB,kBAAkB;IAClB,kBAAkB;IAClB,OAAO;IACP,gBAAgB;IAChB,SAAS;IACT,kBAAkB;IAClB,MAAM;IACN,eAAe;IACf,UAAU;IACV,MAAM,CAAC,kCAAkC;CACzC,CAAC;AAEW,QAAA,YAAY,GAAG,GAAG,CAAC;AACnB,QAAA,cAAc,GAAG,CAAC,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;AAEnH,wCAAwC;AAC3B,QAAA,iBAAiB,GAAG,IAAI,GAAG,CAAC;IACxC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;IAC1D,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;IAClC,QAAQ,EAAE,YAAY,EAAE,SAAS;IACjC,SAAS,EAAE,SAAS;IACpB,YAAY;IACZ,MAAM,EAAE,UAAU;CAClB,CAAC,CAAC;AAEH,+CAA+C;AAClC,QAAA,eAAe,GAAG,IAAI,GAAG,CAAC;IACtC,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO;IACjE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,eAAe,EAAE,MAAM;IACxF,MAAM;CACN,CAAC,CAAC;AAEH,0BAA0B;AACb,QAAA,eAAe,GAAG;IAC9B,kBAAkB;IAClB,UAAU;IACV,6BAA6B;IAC7B,OAAO;IACP,MAAM;IACN,MAAM;IAEN,MAAM;IACN,8BAA8B;IAC9B,kBAAkB;IAClB,kBAAkB;IAClB,eAAe;IACf,eAAe;IACf,mBAAmB;IACnB,mBAAmB;IACnB,QAAQ;IACR,QAAQ;IACR,eAAe,EAAE,SAAS;IAC1B,QAAQ;IAER,WAAW;IACX,mBAAmB;IACnB,kBAAkB;IAElB,eAAe;IACf,sBAAsB;IACtB,mBAAmB;IAEnB,cAAc;IACd,QAAQ;IACR,sBAAsB;IACtB,SAAS;IACT,SAAS;IACT,SAAS;IACT,SAAS;IACT,KAAK;IACL,aAAa;IACb,aAAa;IACb,6BAA6B;IAC7B,uBAAuB;IACvB,mBAAmB;IACnB,2BAA2B;IAC3B,yBAAyB;IACzB,OAAO;IACP,6BAA6B;IAC7B,UAAU;IACV,0BAA0B;IAC1B,WAAW;IAEX,WAAW;IACX,SAAS;IACT,SAAS;IACT,iBAAiB;IACjB,aAAa;IACb,cAAc;IACd,OAAO;IACP,OAAO;IACP,aAAa;IACb,aAAa;IACb,OAAO;IACP,OAAO;IACP,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,WAAW;IACX,WAAW;IACX,QAAQ;IACR,QAAQ;IACR,aAAa;IACb,2CAA2C;IAC3C,6CAA6C;IAC7C,iBAAiB;IACjB,kBAAkB;IAClB,yCAAyC;IACzC,oBAAoB;IACpB,oBAAoB;IACpB,mBAAmB;IACnB,gBAAgB;IAChB,eAAe;IACf,eAAe;IACf,uBAAuB;IACvB,mBAAmB;IACnB,wBAAwB;IACxB,wBAAwB;IACxB,iBAAiB;IACjB,iBAAiB;IAEjB,SAAS;IACT,QAAQ;IAER,0BAA0B;IAC1B,QAAQ;IACR,+BAA+B;IAC/B,QAAQ;IACP,mCAAmC;IACpC,QAAQ;IACR,MAAM;IACN,QAAQ;IACR,UAAU;IACV,MAAM;IACN,8BAA8B;IAC9B,OAAO;IACP,QAAQ;IACR,QAAQ;IACR,kBAAkB;IAClB,iBAAiB;IACjB,UAAU;IACV,4BAA4B;IAC5B,qCAAqC;IAErC,SAAS;IACT,UAAU;IACV,2CAA2C;IAC3C,+CAA+C;IAC/C,8CAA8C;IAC9C,+BAA+B;IAC/B,8BAA8B;IAC9B,SAAS;IACT,YAAY;IAEZ,UAAU;IACV,kBAAkB;IAClB,6IAA6I;IAE7I,QAAQ;IACR,kBAAkB;IAClB,OAAO;IACP,OAAO;IAEP,aAAa;IACb,aAAa;IACb,aAAa;IACb,YAAY;IAEZ,mBAAmB;IACnB,UAAU;IACV,8BAA8B;IAC9B,6BAA6B;IAE7B,uBAAuB;IACvB,6BAA6B;IAC7B,sDAAsD;IACtD,iCAAiC;IACjC,4CAA4C;IAE5C,aAAa;IACb,mCAAmC;IAEnC,UAAU;IACV,UAAU;IACV,UAAU;IACV,UAAU;IACV,UAAU;IACV,WAAW;IACX,UAAU;IAEV,aAAa;IACb,4BAA4B;IAC5B,wBAAwB;IAExB,QAAQ;IACR,YAAY;IACZ,YAAY;IACZ,aAAa;IACb,YAAY;IACZ,MAAM;IACN,OAAO;IACP,SAAS;IACT,eAAe,EAAE,MAAM;IACvB,gCAAgC;IAChC,oDAAoD,EAAE,iBAAiB;IACvE,eAAe;IACf,gDAAgD,EAAE,gBAAgB;IAClE,+BAA+B;IAC/B,mBAAmB,EAAE,SAAS;IAC9B,uCAAuC,EAAE,SAAS;CAClD,CAAC;AAEF,iDAAiD;AACpC,QAAA,eAAe,GAAG;IAC9B,OAAO;IACP,IAAI;IACJ,WAAW;IACX,aAAa;IACb,cAAc;IACd,SAAS;IACT,SAAS;CACT,CAAC;AAEF,mDAAmD;AACnD,4CAA4C;AAC/B,QAAA,iBAAiB,GAAG;IAChC,aAAa;IACb,aAAa;IACb,aAAa;IACb,YAAY;IACZ,YAAY;IACZ,UAAU;IACV,QAAQ;IACT,wBAAwB;IACvB,UAAU;IACV,SAAS;IACT,cAAc;IACd,eAAe;IACf,MAAM;IACN,eAAe;IACf,oBAAoB;IACpB,WAAW;IACX,UAAU;IACV,SAAS;IACT,WAAW;IACZ,cAAc;IACb,UAAU;IACV,gBAAgB;IAChB,aAAa;IACb,gBAAgB;IAChB,gBAAgB;IAChB,gBAAgB;IAChB,wBAAwB;IACxB,gBAAgB;IAChB,kBAAkB;IAClB,cAAc;IACd,kBAAkB;IAClB,eAAe;IACf,cAAc;IACd,cAAc;IACd,cAAc;IACd,gBAAgB;IAChB,gBAAgB;IAChB,iBAAiB;IACjB,eAAe;IACf,eAAe;IACf,cAAc;IACd,cAAc;IACd,cAAc;IACd,eAAe;IACf,aAAa;IACb,iBAAiB;IACjB,iBAAiB;IACjB,iBAAiB;IACjB,mBAAmB;IACnB,gBAAgB;IAChB,iBAAiB;IACjB,aAAa;IACb,cAAc;IACd,cAAc;IACd,cAAc;IACd,eAAe;IACf,eAAe;IACf,eAAe;IACf,gBAAgB;IACjB,kBAAkB;IACjB,iBAAiB;IACjB,eAAe,EAAE,YAAY;IAC7B,aAAa;IACb,mBAAmB;IACnB,YAAY;IACb,kBAAkB;IAClB,aAAa;IACZ,YAAY;IACZ,YAAY;IACZ,aAAa;IACb,aAAa;IACb,SAAS;IACT,iBAAiB;IACjB,aAAa;IACb,qBAAqB;IACrB,aAAa;IACb,QAAQ;IAER,aAAa;IACb,oBAAoB;IACpB,mBAAmB;IACpB,YAAY;IACX,WAAW;IACX,QAAQ;IACR,YAAY;IACZ,WAAW;IACX,WAAW;IACX,YAAY;IACZ,WAAW;IACX,mBAAmB;IACnB,gBAAgB;IAChB,WAAW;IACX,QAAQ;IACR,YAAY;IACZ,UAAU;IACV,gBAAgB;IAChB,mBAAmB;IACnB,MAAM;IACN,MAAM;IACN,QAAQ;IAER,SAAS;IACT,WAAW;IACX,YAAY;IACZ,WAAW;IACZ,cAAc;IACb,mBAAmB;IACnB,oBAAoB;IACpB,YAAY;IACZ,UAAU;IACV,aAAa;IACb,aAAa;IACb,cAAc;IACd,cAAc,EAAE,gBAAgB;IAChC,aAAa;IACb,UAAU;IACX,kCAAkC;IACjC,YAAY;IACZ,gBAAgB;IAChB,aAAa;IACb,iBAAiB;IACjB,eAAe;IACf,cAAc;IACd,gBAAgB;IAChB,iBAAiB;IACjB,gBAAgB;IAChB,cAAc;IACd,eAAe;IACf,SAAS;IACT,UAAU;IACV,cAAc,EAAE,YAAY;IAC5B,gBAAgB;IAChB,cAAc;IACd,aAAa;IACb,gBAAgB,EAAE,UAAU;IAC5B,iBAAiB;IACjB,aAAa;IACb,aAAa;IACb,YAAY;IACZ,cAAc;IACd,mBAAmB;IACnB,aAAa;IACb,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,eAAe,EAAE,aAAa;IAC9B,oBAAoB;IAEpB,UAAU;IACV,YAAY;IACZ,aAAa;IACb,UAAU;IACX,YAAY;IACX,YAAY;IACZ,YAAY;IACZ,YAAY;IACZ,UAAU;IACV,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,UAAU,EAAE,eAAe;IAE3B,mBAAmB;IACnB,UAAU;IACV,aAAa;IACb,WAAW;IACX,mBAAmB;IACnB,cAAc;IACd,mBAAmB;IACnB,kBAAkB;IAClB,YAAY;IACb,gBAAgB;IACf,aAAa;IACb,eAAe;IACf,QAAQ;IACR,QAAQ;IACR,SAAS;IACT,eAAe;IACf,iBAAiB;IACjB,0BAA0B,EAAE,iBAAiB;IAC7C,gBAAgB;IAChB,aAAa;IAEb,UAAU;IACV,WAAW;IACX,UAAU;IACV,kBAAkB;IAClB,cAAc;IACd,UAAU;IACV,YAAY;IACZ,qBAAqB;IACrB,WAAW;IACX,cAAc;IACf,YAAY;IACX,UAAU;IACV,QAAQ;IACR,eAAe;IACf,cAAc;IACd,YAAY;IACZ,SAAS;IACT,aAAa;IACb,iBAAiB;IACjB,iBAAiB;IAEjB,eAAe;IACf,QAAQ;IACR,SAAS;IACT,WAAW;IACZ,YAAY;IACZ,YAAY;IACX,OAAO;IACP,YAAY;IAEZ,WAAW;IACX,aAAa;IACb,aAAa;IACb,gBAAgB,EAAE,YAAY;IAC/B,2BAA2B;IAC1B,WAAW;IACZ,aAAa;IACZ,gBAAgB;IAChB,YAAY;IACZ,qBAAqB;IACrB,cAAc;IACd,iBAAiB;IACjB,sBAAsB;IAEtB,UAAU;IACV,cAAc;IACd,sBAAsB;IACtB,WAAW;IACX,aAAa;IACb,YAAY;IACZ,WAAW;IAEX,gBAAgB;IAChB,iBAAiB;IACjB,WAAW;IACX,YAAY;IACZ,UAAU;IACV,UAAU;IACV,oBAAoB;IAEpB,aAAa;IACb,cAAc;IACd,cAAc;IACf,4CAA4C;IAC3C,cAAc;IACd,QAAQ;IAER,SAAS,EAAE,QAAQ;IACnB,SAAS;IACT,eAAe;IACf,aAAa;IACb,gBAAgB;IAChB,SAAS,EAAE,YAAY;IACvB,UAAU;IACV,UAAU;IACV,gBAAgB;IAChB,aAAa;IACb,UAAU;IACV,YAAY,EAAE,MAAM;IACpB,aAAa,EAAE,MAAM;IACrB,uBAAuB,EAAE,gBAAgB;IACzC,WAAW;IACX,UAAU;IACV,SAAS;IACT,UAAU;IACV,gBAAgB;IAChB,iBAAiB,EAAE,QAAQ;IAC3B,iBAAiB;IACjB,WAAW;IAEX,UAAU;IACV,WAAW;IACX,eAAe;IACf,OAAO;IACP,OAAO;IACR,+BAA+B;IAC9B,UAAU;IACV,YAAY;IACZ,SAAS;IACT,YAAY;IACZ,aAAa;IACb,QAAQ;IACR,QAAQ;IACR,OAAO;IACP,UAAU;IACV,aAAa;IACb,cAAc;IACd,WAAW;IACX,aAAa;IACb,gBAAgB;IAChB,kBAAkB;IAClB,eAAe;IACf,cAAc;IAEd,MAAM;IACN,MAAM;IACP,YAAY;IACZ,gBAAgB;IACf,iBAAiB;IACjB,OAAO;IACP,aAAa;IACb,kBAAkB;IACnB,iCAAiC;IAChC,aAAa;IACb,kBAAkB;IAClB,qBAAqB;IACrB,iBAAiB;IACjB,mBAAmB;IACnB,kBAAkB;IAClB,kBAAkB;IAClB,mBAAmB;IACnB,WAAW;IACX,cAAc;IACd,UAAU;IACV,SAAS;IAET,gBAAgB;IAChB,sBAAsB,EAAE,eAAe;IACvC,aAAa;IACb,cAAc;IACf,aAAa;IAEZ,UAAU;IACV,aAAa;IACb,YAAY;IACZ,iBAAiB;IACjB,WAAW;IACX,aAAa;IACb,MAAM;IACN,SAAS;IACV,iBAAiB;IAChB,aAAa;IACd,aAAa;IACZ,aAAa;IACb,QAAQ;IACR,aAAa;IACb,aAAa;IACb,gBAAgB;IAChB,aAAa;IACb,UAAU;IACV,WAAW;IACX,WAAW;IACX,cAAc;IACd,YAAY;IACZ,UAAU;IACV,WAAW;IACX,WAAW;IACX,kBAAkB;IAClB,YAAY;IACZ,UAAU;IACV,WAAW;IACX,WAAW;IACX,WAAW;IACX,UAAU;IACV,WAAW;IACX,UAAU;IACV,iBAAiB;IACjB,UAAU;IACV,cAAc;IACd,aAAa;IACb,cAAc;IACd,cAAc;IACd,cAAc;IACd,SAAS;IACT,UAAU;IACV,UAAU;IACV,SAAS;IACT,UAAU;IACV,UAAU;IACV,WAAW;IACX,WAAW;IACX,YAAY;IACZ,YAAY;IACZ,aAAa;IACb,iBAAiB;IAClB,sCAAsC;IACrC,WAAW;IACX,UAAU;IACV,WAAW;IACX,WAAW;IACX,cAAc;IACd,iBAAiB;IACjB,YAAY;IACZ,cAAc;IACd,cAAc;IACd,gBAAgB;IAChB,kBAAkB;IAClB,SAAS;IACV,WAAW;IACV,eAAe;IACf,WAAW;IACX,WAAW;IACX,SAAS;IACT,UAAU;IACV,UAAU;IACV,cAAc;IACd,cAAc;IACd,kBAAkB;IAClB,iBAAiB,EAAE,SAAS;IAE5B,SAAS;IACT,SAAS;IACT,UAAU;IAEV,OAAO;IACP,gBAAgB;IAChB,WAAW;IACX,UAAU;IACV,WAAW;IACX,WAAW;IACX,WAAW;IACX,WAAW;IACX,cAAc;IACd,cAAc;IACd,cAAc;IACd,SAAS;IACT,iBAAiB;IACjB,YAAY;IACZ,aAAa;IACb,aAAa;IACb,WAAW;IACX,gBAAgB;IAChB,QAAQ;IACR,UAAU;IACV,SAAS;IACT,UAAU;IACV,iBAAiB;IACjB,kBAAkB;IAClB,MAAM;IACN,UAAU;IAEV,oBAAoB;IACpB,WAAW;IACX,WAAW;IACX,SAAS;IACT,SAAS;IACT,aAAa;IACb,eAAe;IAChB,WAAW;IACX,6BAA6B;IAC5B,WAAW;IACX,YAAY;IACZ,aAAa;IACb,YAAY;IACZ,YAAY;IACZ,aAAa;IACb,eAAe;IACf,gBAAgB;IAChB,cAAc;IACf,cAAc;IACb,iBAAiB;IACjB,iBAAiB;IACjB,WAAW;IACX,gBAAgB;IAChB,cAAc;IACd,UAAU;IACV,WAAW;IACX,cAAc;IACd,UAAU;IACV,UAAU;IACV,WAAW;IACX,YAAY;IACZ,aAAa;IACb,YAAY;IACZ,WAAW;IACX,WAAW;IACX,gBAAgB;IACjB,WAAW;IACV,cAAc;IACd,iBAAiB;IACjB,WAAW;IACX,aAAa;IACb,YAAY;IACZ,SAAS;IACT,WAAW;IACZ,YAAY;IACX,eAAe;IACf,aAAa;IACb,aAAa;IACb,kBAAkB;IAClB,WAAW;IACX,SAAS;IACT,kBAAkB;IAClB,SAAS;IACV,YAAY;IACX,QAAQ;IACT,YAAY;IACX,YAAY;IACZ,kBAAkB;IAClB,eAAe,EAAE,SAAS;IAC1B,YAAY;IACZ,kBAAkB,EAAE,SAAS;IAC7B,eAAe;IACf,SAAS;IACT,SAAS;IACV,eAAe;IACd,aAAa;IACb,kBAAkB;IAClB,eAAe;IAEf,OAAO;IACP,YAAY;IACZ,UAAU;IACV,UAAU;IACX,mBAAmB;IAClB,mBAAmB;IACnB,OAAO;IACR,mBAAmB;IAClB,cAAc;IACd,aAAa;IACb,WAAW;IACX,WAAW;IACX,cAAc;IACd,SAAS;IACT,SAAS;IACT,QAAQ;IACT,UAAU;IACT,eAAe;IACf,gBAAgB;IACjB,sBAAsB;IACrB,iBAAiB;IACjB,QAAQ;IACR,eAAe;IACf,cAAc;IACd,YAAY;IACZ,cAAc;IACf,iBAAiB;IAChB,aAAa;IACb,WAAW;IACX,UAAU;IACV,YAAY;IACZ,aAAa;IACb,eAAe;IACf,SAAS;IAET,QAAQ;IACR,QAAQ;IAER,YAAY;IACZ,eAAe;IACf,iBAAiB;IACjB,YAAY;IACZ,cAAc;IACf,YAAY;CACX,CAAC;AAEF,wCAAwC;AAC3B,QAAA,0BAA0B,GAAG;IACzC,eAAe;IACf,eAAe;IACf,gBAAgB;IAChB,iBAAiB;IACjB,6BAA6B;IAC7B,6BAA6B;IAC7B,oBAAoB;IACpB,YAAY;IACZ,mBAAmB;IACnB,gBAAgB;IAChB,kBAAkB;IAClB,uBAAuB;IACvB,sBAAsB;IACtB,eAAe,EAAE,4BAA4B;IAC7C,eAAe,EAAE,qCAAqC;IACtD,sBAAsB;IACtB,qBAAqB;IACrB,mBAAmB,EAAE,WAAW;IAChC,kCAAkC,EAAE,WAAW;IAC/C,yBAAyB,EAAE,cAAc;IACzC,gBAAgB;IAChB,mBAAmB,EAAE,aAAa;IAClC,iBAAiB,EAAE,UAAU;CAC7B,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAEC,QAAA,uBAAuB,GAAG;IACtC,iBAAiB;IACjB,kBAAkB;IAClB,0BAA0B;IAC1B,2BAA2B;IAC3B,mBAAmB;IACnB,cAAc;IACd,eAAe;IACf,iCAAiC;IACjC,sBAAsB;IACtB,8BAA8B;IAC9B,+BAA+B;IAC/B,kCAAkC;IAClC,mBAAmB;IACnB,gBAAgB;IAChB,mDAAmD,EAAE,WAAW;IAChE,sBAAsB,CAAC,UAAU;CACjC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAEZ,wCAAwC;AACxC,qDAAqD;AACxC,QAAA,sBAAsB,GAAG,IAAI,GAAG,CAAC;IAC7C,MAAM;IACN,OAAO;IACP,MAAM;IACN,IAAI;IACJ,QAAQ;IACR,KAAK;IACL,MAAM;IACN,SAAS;IACT,OAAO;IACP,QAAQ;IACR,GAAG;IACH,IAAI;IACJ,QAAQ;IACR,KAAK;IACL,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,QAAQ;IACR,OAAO;IACP,MAAM;IACN,SAAS;IACT,SAAS;IACT,SAAS;IACT,UAAU;IACV,MAAM;IACN,QAAQ;IACR,MAAM;IACN,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,OAAO;IACP,KAAK;IACL,OAAO;IACP,KAAK;CACL,CAAC,CAAC;AAEH,qBAAqB;AACR,QAAA,kBAAkB,GAAG,IAAI,GAAG,CAAC;IACzC,KAAK;IACL,OAAO;IACP,iBAAiB;IACjB,YAAY;IACZ,SAAS;IACT,SAAS;IACT,UAAU;IACV,YAAY;IACZ,UAAU;IACV,aAAa;IACb,cAAc;IACd,WAAW;IACX,KAAK;IACL,SAAS;IACT,aAAa;IACb,SAAS;IACT,QAAQ;IACR,MAAM;IACN,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,SAAS;IACT,KAAK;IACL,SAAS;IACT,QAAQ;IACR,OAAO;IACP,MAAM;IACN,OAAO;IAEP,oBAAoB;IACpB,QAAQ;IACR,aAAa;IACb,OAAO;IACP,aAAa;IACb,aAAa;IACb,eAAe;IACf,YAAY;IACZ,mBAAmB;IACnB,OAAO;IACP,cAAc;IACd,OAAO;IACP,OAAO;IACP,cAAc;IACd,eAAe;IACf,QAAQ;IACR,UAAU;IACV,aAAa;IACb,SAAS;IACT,SAAS;IACT,eAAe;IACf,UAAU;IACV,UAAU;IACV,UAAU;IACV,YAAY;IACZ,SAAS;IACT,QAAQ;IACR,aAAa;IACb,WAAW;IACX,UAAU;IACV,WAAW;IACX,SAAS;IACT,OAAO;CACP,CAAC,CAAC;AACU,QAAA,wBAAwB,GAAG,IAAI,GAAG,CAAC;IAC/C,OAAO;IACP,IAAI;CACJ,CAAC,CAAC"}
|
package/dist/defuddle.d.ts
CHANGED
|
@@ -61,6 +61,17 @@ export declare class Defuddle {
|
|
|
61
61
|
* Resolve relative URLs to absolute within a DOM element
|
|
62
62
|
*/
|
|
63
63
|
private resolveRelativeUrls;
|
|
64
|
+
/**
|
|
65
|
+
* Flatten shadow DOM content into a cloned document.
|
|
66
|
+
* Walks both trees in parallel so positional correspondence is exact.
|
|
67
|
+
*/
|
|
68
|
+
private flattenShadowRoots;
|
|
69
|
+
/**
|
|
70
|
+
* Replace a shadow DOM host element with a div containing its shadow content.
|
|
71
|
+
* Custom elements (tag names with hyphens) would re-initialize when inserted
|
|
72
|
+
* into a live DOM, recreating their shadow roots and hiding the content.
|
|
73
|
+
*/
|
|
74
|
+
private replaceShadowHost;
|
|
64
75
|
/**
|
|
65
76
|
* Resolve relative URLs in an HTML string
|
|
66
77
|
*/
|
package/dist/defuddle.js
CHANGED
|
@@ -41,6 +41,20 @@ class Defuddle {
|
|
|
41
41
|
result = retryResult;
|
|
42
42
|
}
|
|
43
43
|
}
|
|
44
|
+
// If still very little content, the page may be an index/listing page
|
|
45
|
+
// where card elements were scored as non-content or removed by partial
|
|
46
|
+
// selectors (e.g. "post-preview"). Retry with both disabled.
|
|
47
|
+
if (result.wordCount < 50) {
|
|
48
|
+
this._log('Still very little content, retrying without scoring/partial selectors (possible index page)');
|
|
49
|
+
const indexRetry = this.parseInternal({
|
|
50
|
+
removeLowScoring: false,
|
|
51
|
+
removePartialSelectors: false
|
|
52
|
+
});
|
|
53
|
+
if (indexRetry.wordCount > result.wordCount) {
|
|
54
|
+
this._log('Index page retry produced more content');
|
|
55
|
+
result = indexRetry;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
44
58
|
// Strip dangerous elements from this.doc before any fallback paths
|
|
45
59
|
// that read from it (e.g. _findContentBySchemaText).
|
|
46
60
|
// This must happen after parseInternal, which needs script tags
|
|
@@ -280,9 +294,14 @@ class Defuddle {
|
|
|
280
294
|
const options = {
|
|
281
295
|
removeExactSelectors: true,
|
|
282
296
|
removePartialSelectors: true,
|
|
297
|
+
removeHiddenElements: true,
|
|
298
|
+
removeLowScoring: true,
|
|
299
|
+
removeSmallImages: true,
|
|
300
|
+
standardize: true,
|
|
283
301
|
...this.options,
|
|
284
302
|
...overrideOptions
|
|
285
303
|
};
|
|
304
|
+
const debugRemovals = [];
|
|
286
305
|
// Extract schema.org data
|
|
287
306
|
const schemaOrgData = this._extractSchemaOrgData(this.doc);
|
|
288
307
|
const pageMetaTags = this._collectMetaTags();
|
|
@@ -324,10 +343,19 @@ class Defuddle {
|
|
|
324
343
|
const smallImages = this.findSmallImages(this.doc);
|
|
325
344
|
// Clone document
|
|
326
345
|
const clone = this.doc.cloneNode(true);
|
|
346
|
+
// Flatten shadow DOM content into the clone
|
|
347
|
+
this.flattenShadowRoots(this.doc, clone);
|
|
327
348
|
// Apply mobile styles to clone
|
|
328
349
|
this.applyMobileStyles(clone, mobileStyles);
|
|
329
350
|
// Find main content
|
|
330
|
-
|
|
351
|
+
let mainContent = null;
|
|
352
|
+
if (options.contentSelector) {
|
|
353
|
+
mainContent = clone.querySelector(options.contentSelector);
|
|
354
|
+
this._log('Using contentSelector:', options.contentSelector, mainContent ? 'found' : 'not found');
|
|
355
|
+
}
|
|
356
|
+
if (!mainContent) {
|
|
357
|
+
mainContent = this.findMainContent(clone);
|
|
358
|
+
}
|
|
331
359
|
if (!mainContent) {
|
|
332
360
|
const fallbackContent = this.resolveContentUrls((0, dom_1.serializeHTML)(this.doc.body));
|
|
333
361
|
const endTime = Date.now();
|
|
@@ -340,31 +368,48 @@ class Defuddle {
|
|
|
340
368
|
};
|
|
341
369
|
}
|
|
342
370
|
// Standardize footnotes before cleanup (CSS sidenotes use display:none)
|
|
343
|
-
(
|
|
371
|
+
if (options.standardize) {
|
|
372
|
+
(0, footnotes_1.standardizeFootnotes)(mainContent);
|
|
373
|
+
}
|
|
344
374
|
// Remove small images
|
|
345
|
-
|
|
375
|
+
if (options.removeSmallImages) {
|
|
376
|
+
this.removeSmallImages(clone, smallImages);
|
|
377
|
+
}
|
|
346
378
|
// Remove hidden elements using computed styles
|
|
347
|
-
|
|
379
|
+
if (options.removeHiddenElements) {
|
|
380
|
+
this.removeHiddenElements(clone, debugRemovals);
|
|
381
|
+
}
|
|
348
382
|
// Remove non-content blocks by scoring
|
|
349
383
|
// Tries to find lists, navigation based on text content and link density
|
|
350
|
-
|
|
384
|
+
if (options.removeLowScoring) {
|
|
385
|
+
scoring_1.ContentScorer.scoreAndRemove(clone, this.debug, debugRemovals);
|
|
386
|
+
}
|
|
351
387
|
// Remove clutter using selectors
|
|
352
388
|
if (options.removeExactSelectors || options.removePartialSelectors) {
|
|
353
|
-
this.removeBySelector(clone, options.removeExactSelectors, options.removePartialSelectors, mainContent);
|
|
389
|
+
this.removeBySelector(clone, options.removeExactSelectors, options.removePartialSelectors, mainContent, debugRemovals);
|
|
354
390
|
}
|
|
355
391
|
// Normalize the main content
|
|
356
|
-
(
|
|
392
|
+
if (options.standardize) {
|
|
393
|
+
(0, standardize_1.standardizeContent)(mainContent, metadata, this.doc, this.debug);
|
|
394
|
+
}
|
|
357
395
|
// Resolve relative URLs to absolute
|
|
358
396
|
this.resolveRelativeUrls(mainContent);
|
|
359
397
|
const content = mainContent.outerHTML;
|
|
360
398
|
const endTime = Date.now();
|
|
361
|
-
|
|
399
|
+
const result = {
|
|
362
400
|
content,
|
|
363
401
|
...metadata,
|
|
364
402
|
wordCount: this.countWords(content),
|
|
365
403
|
parseTime: Math.round(endTime - startTime),
|
|
366
404
|
metaTags: pageMetaTags
|
|
367
405
|
};
|
|
406
|
+
if (this.debug) {
|
|
407
|
+
result.debug = {
|
|
408
|
+
contentSelector: this.getElementSelector(mainContent),
|
|
409
|
+
removals: debugRemovals
|
|
410
|
+
};
|
|
411
|
+
}
|
|
412
|
+
return result;
|
|
368
413
|
}
|
|
369
414
|
catch (error) {
|
|
370
415
|
console.error('Defuddle', 'Error processing document:', error);
|
|
@@ -487,11 +532,12 @@ class Defuddle {
|
|
|
487
532
|
image.remove();
|
|
488
533
|
});
|
|
489
534
|
}
|
|
490
|
-
removeHiddenElements(doc) {
|
|
535
|
+
removeHiddenElements(doc, debugRemovals) {
|
|
491
536
|
let count = 0;
|
|
492
|
-
const elementsToRemove = new
|
|
493
|
-
//
|
|
494
|
-
|
|
537
|
+
const elementsToRemove = new Map();
|
|
538
|
+
// Use querySelectorAll instead of getElementsByTagName because
|
|
539
|
+
// linkedom's cloneNode does not wire up live HTMLCollections.
|
|
540
|
+
const allElements = Array.from(doc.querySelectorAll('*'));
|
|
495
541
|
// Process styles in batches to minimize layout thrashing
|
|
496
542
|
const BATCH_SIZE = 100;
|
|
497
543
|
for (let i = 0; i < allElements.length; i += BATCH_SIZE) {
|
|
@@ -518,30 +564,66 @@ class Defuddle {
|
|
|
518
564
|
// Write phase - mark elements for removal
|
|
519
565
|
batch.forEach((element, index) => {
|
|
520
566
|
const computedStyle = styles[index];
|
|
521
|
-
if (computedStyle
|
|
522
|
-
|
|
523
|
-
computedStyle.
|
|
524
|
-
|
|
525
|
-
|
|
567
|
+
if (computedStyle) {
|
|
568
|
+
let reason = '';
|
|
569
|
+
if (computedStyle.display === 'none')
|
|
570
|
+
reason = 'display:none';
|
|
571
|
+
else if (computedStyle.visibility === 'hidden')
|
|
572
|
+
reason = 'visibility:hidden';
|
|
573
|
+
else if (computedStyle.opacity === '0')
|
|
574
|
+
reason = 'opacity:0';
|
|
575
|
+
if (reason) {
|
|
576
|
+
elementsToRemove.set(element, reason);
|
|
577
|
+
count++;
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
// Detect CSS framework hidden utilities (e.g. Tailwind's "hidden",
|
|
581
|
+
// "sm:hidden", "not-machine:hidden") which JSDOM/linkedom can't
|
|
582
|
+
// resolve through computed styles.
|
|
583
|
+
if (!elementsToRemove.has(element)) {
|
|
584
|
+
const className = element.getAttribute('class') || '';
|
|
585
|
+
if (className) {
|
|
586
|
+
const tokens = className.split(/\s+/);
|
|
587
|
+
for (const token of tokens) {
|
|
588
|
+
if (token === 'hidden' || token.endsWith(':hidden')) {
|
|
589
|
+
elementsToRemove.set(element, `class:${token}`);
|
|
590
|
+
count++;
|
|
591
|
+
break;
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
}
|
|
526
595
|
}
|
|
527
596
|
});
|
|
528
597
|
}
|
|
529
598
|
// Batch remove all hidden elements
|
|
530
|
-
elementsToRemove.forEach(el =>
|
|
599
|
+
elementsToRemove.forEach((reason, el) => {
|
|
600
|
+
if (this.debug && debugRemovals) {
|
|
601
|
+
debugRemovals.push({
|
|
602
|
+
step: 'removeHiddenElements',
|
|
603
|
+
reason,
|
|
604
|
+
text: (0, utils_1.textPreview)(el)
|
|
605
|
+
});
|
|
606
|
+
}
|
|
607
|
+
el.remove();
|
|
608
|
+
});
|
|
531
609
|
this._log('Removed hidden elements:', count);
|
|
532
610
|
}
|
|
533
|
-
removeBySelector(doc, removeExact = true, removePartial = true, mainContent) {
|
|
611
|
+
removeBySelector(doc, removeExact = true, removePartial = true, mainContent, debugRemovals) {
|
|
534
612
|
const startTime = Date.now();
|
|
535
613
|
let exactSelectorCount = 0;
|
|
536
614
|
let partialSelectorCount = 0;
|
|
537
|
-
// Track all elements to be removed
|
|
538
|
-
const elementsToRemove = new
|
|
615
|
+
// Track all elements to be removed, with their match type
|
|
616
|
+
const elementsToRemove = new Map();
|
|
539
617
|
// First collect elements matching exact selectors
|
|
540
618
|
if (removeExact) {
|
|
541
619
|
const exactElements = doc.querySelectorAll(constants_1.EXACT_SELECTORS.join(','));
|
|
542
620
|
exactElements.forEach(el => {
|
|
543
621
|
if (el?.parentNode) {
|
|
544
|
-
|
|
622
|
+
// Skip elements inside code blocks (e.g. syntax highlighting spans)
|
|
623
|
+
if (el.closest('pre, code')) {
|
|
624
|
+
return;
|
|
625
|
+
}
|
|
626
|
+
elementsToRemove.set(el, { type: 'exact' });
|
|
545
627
|
exactSelectorCount++;
|
|
546
628
|
}
|
|
547
629
|
});
|
|
@@ -550,6 +632,10 @@ class Defuddle {
|
|
|
550
632
|
// Pre-compile regexes and combine into a single regex for better performance
|
|
551
633
|
const combinedPattern = constants_1.PARTIAL_SELECTORS.join('|');
|
|
552
634
|
const partialRegex = new RegExp(combinedPattern, 'i');
|
|
635
|
+
// Pre-compile individual regexes for debug pattern identification
|
|
636
|
+
const individualRegexes = this.debug
|
|
637
|
+
? constants_1.PARTIAL_SELECTORS.map(p => ({ pattern: p, regex: new RegExp(p, 'i') }))
|
|
638
|
+
: null;
|
|
553
639
|
// Create an efficient attribute selector for elements we care about
|
|
554
640
|
const attributeSelector = constants_1.TEST_ATTRIBUTES.map(attr => `[${attr}]`).join(',');
|
|
555
641
|
const allElements = doc.querySelectorAll(attributeSelector);
|
|
@@ -581,7 +667,10 @@ class Defuddle {
|
|
|
581
667
|
}
|
|
582
668
|
// Check for partial match using single regex test
|
|
583
669
|
if (partialRegex.test(attrs)) {
|
|
584
|
-
|
|
670
|
+
const matchedPattern = individualRegexes
|
|
671
|
+
? individualRegexes.find(r => r.regex.test(attrs))?.pattern
|
|
672
|
+
: undefined;
|
|
673
|
+
elementsToRemove.set(el, { type: 'partial', selector: matchedPattern });
|
|
585
674
|
partialSelectorCount++;
|
|
586
675
|
}
|
|
587
676
|
});
|
|
@@ -590,7 +679,7 @@ class Defuddle {
|
|
|
590
679
|
// Skip elements that are ancestors of mainContent to avoid disconnecting it
|
|
591
680
|
// Skip footnote list containers, their parents, and immediate children
|
|
592
681
|
// Skip anchor links inside headings - the heading transform handles these
|
|
593
|
-
elementsToRemove.forEach(el => {
|
|
682
|
+
elementsToRemove.forEach(({ type, selector }, el) => {
|
|
594
683
|
if (mainContent && el.contains(mainContent)) {
|
|
595
684
|
return;
|
|
596
685
|
}
|
|
@@ -608,6 +697,14 @@ class Defuddle {
|
|
|
608
697
|
}
|
|
609
698
|
}
|
|
610
699
|
catch (e) { }
|
|
700
|
+
if (this.debug && debugRemovals) {
|
|
701
|
+
debugRemovals.push({
|
|
702
|
+
step: 'removeBySelector',
|
|
703
|
+
selector: type === 'exact' ? 'exact' : selector,
|
|
704
|
+
reason: type === 'exact' ? 'exact selector match' : `partial match: ${selector}`,
|
|
705
|
+
text: (0, utils_1.textPreview)(el)
|
|
706
|
+
});
|
|
707
|
+
}
|
|
611
708
|
el.remove();
|
|
612
709
|
});
|
|
613
710
|
const endTime = Date.now();
|
|
@@ -805,12 +902,29 @@ class Defuddle {
|
|
|
805
902
|
// just because sibling noise inflates the parent's content score.
|
|
806
903
|
// Only prefer the child if it has meaningful content (>50 words),
|
|
807
904
|
// otherwise it may be an empty card element (e.g. related article cards).
|
|
905
|
+
// Skip this when the parent contains multiple children matching the
|
|
906
|
+
// same selector — that indicates a listing/portfolio page where the
|
|
907
|
+
// parent is the real content container.
|
|
808
908
|
const top = candidates[0];
|
|
809
909
|
let best = top;
|
|
810
910
|
for (let i = 1; i < candidates.length; i++) {
|
|
811
911
|
const child = candidates[i];
|
|
812
912
|
const childWords = (child.element.textContent || '').split(/\s+/).length;
|
|
813
913
|
if (child.selectorIndex < best.selectorIndex && best.element.contains(child.element) && childWords > 50) {
|
|
914
|
+
// Count how many candidates share this selector index inside
|
|
915
|
+
// the top element. Use top (not best) as the stable reference
|
|
916
|
+
// so the check isn't affected by earlier iterations.
|
|
917
|
+
let siblingsAtIndex = 0;
|
|
918
|
+
for (const c of candidates) {
|
|
919
|
+
if (c.selectorIndex === child.selectorIndex && top.element.contains(c.element)) {
|
|
920
|
+
if (++siblingsAtIndex > 1)
|
|
921
|
+
break;
|
|
922
|
+
}
|
|
923
|
+
}
|
|
924
|
+
if (siblingsAtIndex > 1) {
|
|
925
|
+
// Multiple articles/cards inside the parent — it's a listing page
|
|
926
|
+
continue;
|
|
927
|
+
}
|
|
814
928
|
best = child;
|
|
815
929
|
}
|
|
816
930
|
}
|
|
@@ -931,6 +1045,75 @@ class Defuddle {
|
|
|
931
1045
|
el.setAttribute('poster', resolve(poster));
|
|
932
1046
|
});
|
|
933
1047
|
}
|
|
1048
|
+
/**
|
|
1049
|
+
* Flatten shadow DOM content into a cloned document.
|
|
1050
|
+
* Walks both trees in parallel so positional correspondence is exact.
|
|
1051
|
+
*/
|
|
1052
|
+
flattenShadowRoots(original, clone) {
|
|
1053
|
+
const origElements = Array.from(original.body.getElementsByTagName('*'));
|
|
1054
|
+
// Find the first element with a shadow root (also serves as the hasShadowRoots check)
|
|
1055
|
+
const firstShadow = origElements.find(el => el.shadowRoot);
|
|
1056
|
+
if (!firstShadow)
|
|
1057
|
+
return;
|
|
1058
|
+
const cloneElements = Array.from(clone.body.getElementsByTagName('*'));
|
|
1059
|
+
// Check if we can directly read shadow DOM content (main world / Node.js).
|
|
1060
|
+
// In content script isolated worlds, shadowRoot exists but content is empty.
|
|
1061
|
+
const canReadShadow = (firstShadow.shadowRoot?.childNodes?.length ?? 0) > 0;
|
|
1062
|
+
if (canReadShadow) {
|
|
1063
|
+
// Direct traversal works (main world / Node.js)
|
|
1064
|
+
for (let i = origElements.length - 1; i >= 0; i--) {
|
|
1065
|
+
const origEl = origElements[i];
|
|
1066
|
+
if (!origEl.shadowRoot)
|
|
1067
|
+
continue;
|
|
1068
|
+
const cloneEl = cloneElements[i];
|
|
1069
|
+
if (!cloneEl)
|
|
1070
|
+
continue;
|
|
1071
|
+
const shadowHtml = origEl.shadowRoot.innerHTML;
|
|
1072
|
+
if (shadowHtml.length > 0) {
|
|
1073
|
+
this.replaceShadowHost(cloneEl, shadowHtml, clone);
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
else {
|
|
1078
|
+
// Content script isolated world — read data-defuddle-shadow attributes
|
|
1079
|
+
// stamped by an external main-world script.
|
|
1080
|
+
const shadowData = [];
|
|
1081
|
+
for (let i = 0; i < origElements.length; i++) {
|
|
1082
|
+
const origEl = origElements[i];
|
|
1083
|
+
const shadowHtml = origEl.getAttribute('data-defuddle-shadow');
|
|
1084
|
+
if (!shadowHtml)
|
|
1085
|
+
continue;
|
|
1086
|
+
const cloneEl = cloneElements[i];
|
|
1087
|
+
if (!cloneEl)
|
|
1088
|
+
continue;
|
|
1089
|
+
shadowData.push({ cloneEl, html: shadowHtml });
|
|
1090
|
+
// Clean up temporary attributes from both original and clone
|
|
1091
|
+
origEl.removeAttribute('data-defuddle-shadow');
|
|
1092
|
+
cloneEl.removeAttribute('data-defuddle-shadow');
|
|
1093
|
+
}
|
|
1094
|
+
for (const { cloneEl, html } of shadowData) {
|
|
1095
|
+
this.replaceShadowHost(cloneEl, html, clone);
|
|
1096
|
+
}
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
/**
|
|
1100
|
+
* Replace a shadow DOM host element with a div containing its shadow content.
|
|
1101
|
+
* Custom elements (tag names with hyphens) would re-initialize when inserted
|
|
1102
|
+
* into a live DOM, recreating their shadow roots and hiding the content.
|
|
1103
|
+
*/
|
|
1104
|
+
replaceShadowHost(el, shadowHtml, doc) {
|
|
1105
|
+
const fragment = (0, dom_1.parseHTML)(doc, shadowHtml);
|
|
1106
|
+
if (el.tagName.includes('-')) {
|
|
1107
|
+
// Custom element — replace with a div to prevent re-initialization
|
|
1108
|
+
const div = doc.createElement('div');
|
|
1109
|
+
div.appendChild(fragment);
|
|
1110
|
+
el.parentNode?.replaceChild(div, el);
|
|
1111
|
+
}
|
|
1112
|
+
else {
|
|
1113
|
+
el.textContent = '';
|
|
1114
|
+
el.appendChild(fragment);
|
|
1115
|
+
}
|
|
1116
|
+
}
|
|
934
1117
|
/**
|
|
935
1118
|
* Resolve relative URLs in an HTML string
|
|
936
1119
|
*/
|