defuddle 0.2.4 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -2
- package/dist/constants.d.ts +14 -0
- package/dist/defuddle.d.ts +2 -0
- package/dist/index.js +1 -1
- package/dist/math.d.ts +14 -0
- package/dist/metadata.d.ts +1 -0
- package/package.json +7 -2
package/README.md
CHANGED
|
@@ -34,6 +34,19 @@ console.log(article.content); // HTML string of the main content
|
|
|
34
34
|
console.log(article.title); // Title of the article
|
|
35
35
|
```
|
|
36
36
|
|
|
37
|
+
### Debug mode
|
|
38
|
+
|
|
39
|
+
You can enable debug mode by passing an options object when creating a new Defuddle instance:
|
|
40
|
+
|
|
41
|
+
```typescript
|
|
42
|
+
const article = new Defuddle(document, { debug: true }).parse();
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
- More verbose console logging about the parsing process
|
|
46
|
+
- Preserves HTML class and id attributes that are normally stripped
|
|
47
|
+
- Retains all data-* attributes
|
|
48
|
+
- Skips div flattening to preserve document structure
|
|
49
|
+
|
|
37
50
|
### Server-side usage
|
|
38
51
|
|
|
39
52
|
When using Defuddle in a Node.js environment, you can use JSDOM to create a DOM document:
|
|
@@ -78,11 +91,13 @@ Defuddle attempts to standardize HTML elements to provide a consistent input for
|
|
|
78
91
|
|
|
79
92
|
### Headings
|
|
80
93
|
|
|
81
|
-
|
|
94
|
+
- The first H1 or H2 heading is removed if it matches the title.
|
|
95
|
+
- H1s are converted to H2s.
|
|
96
|
+
- Anchor links in H1 to H6 elements are removed and become plain headings.
|
|
82
97
|
|
|
83
98
|
### Code blocks
|
|
84
99
|
|
|
85
|
-
Code block are standardized
|
|
100
|
+
Code block are standardized. If present, line numbers and syntax highlighting are removed, but the language is retained and added as a data attribute and class.
|
|
86
101
|
|
|
87
102
|
```html
|
|
88
103
|
<pre>
|
|
@@ -110,6 +125,18 @@ Inline reference<sup id="fnref:1"><a href="#fn:1">1</a></sup>.
|
|
|
110
125
|
</div>
|
|
111
126
|
```
|
|
112
127
|
|
|
128
|
+
### Math
|
|
129
|
+
|
|
130
|
+
Math elements, including MathJax and KaTeX, are converted to standard MathML:
|
|
131
|
+
|
|
132
|
+
```html
|
|
133
|
+
<math xmlns="http://www.w3.org/1998/Math/MathML" display="inline" data-latex="a \neq 0">
|
|
134
|
+
<mi>a</mi>
|
|
135
|
+
<mo>≠</mo>
|
|
136
|
+
<mn>0</mn>
|
|
137
|
+
</math>
|
|
138
|
+
```
|
|
139
|
+
|
|
113
140
|
## Development
|
|
114
141
|
|
|
115
142
|
### Build
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export declare const ENTRY_POINT_ELEMENTS: string[];
|
|
2
|
+
export declare const MOBILE_WIDTH = 600;
|
|
3
|
+
export declare const BLOCK_ELEMENTS: string[];
|
|
4
|
+
export declare const PRESERVE_ELEMENTS: Set<string>;
|
|
5
|
+
export declare const INLINE_ELEMENTS: Set<string>;
|
|
6
|
+
export declare const HIDDEN_ELEMENT_SELECTORS: string;
|
|
7
|
+
export declare const EXACT_SELECTORS: string[];
|
|
8
|
+
export declare const PARTIAL_SELECTORS: string[];
|
|
9
|
+
export declare const FOOTNOTE_INLINE_REFERENCES: string;
|
|
10
|
+
export declare const FOOTNOTE_LIST_SELECTORS: string;
|
|
11
|
+
export declare const ALLOWED_EMPTY_ELEMENTS: Set<string>;
|
|
12
|
+
export declare const ALLOWED_ATTRIBUTES: Set<string>;
|
|
13
|
+
export declare const ALLOWED_ATTRIBUTES_DEBUG: Set<string>;
|
|
14
|
+
export declare const SUPPORTED_LANGUAGES: Set<string>;
|
package/dist/defuddle.d.ts
CHANGED
|
@@ -19,12 +19,14 @@ export declare class Defuddle {
|
|
|
19
19
|
private applyMobileStyles;
|
|
20
20
|
private removeHiddenElements;
|
|
21
21
|
private removeClutter;
|
|
22
|
+
private flattenDivs;
|
|
22
23
|
private cleanContent;
|
|
23
24
|
private removeTrailingHeadings;
|
|
24
25
|
private handleHeadings;
|
|
25
26
|
private removeHtmlComments;
|
|
26
27
|
private stripUnwantedAttributes;
|
|
27
28
|
private removeEmptyElements;
|
|
29
|
+
private removeEmptyLines;
|
|
28
30
|
private createFootnoteItem;
|
|
29
31
|
private collectFootnotes;
|
|
30
32
|
private findOuterFootnoteContainer;
|