umberto 8.4.0 → 9.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/CHANGELOG.md +21 -16
  2. package/package.json +8 -7
  3. package/scripts/filter/after-post-render/fix-code-samples.js +82 -18
  4. package/scripts/filter/after-post-render/gloria.js +27 -0
  5. package/scripts/filter/after-post-render/insert-error-codes.js +34 -26
  6. package/scripts/filter/after-post-render/validate-after-render.js +27 -6
  7. package/scripts/filter/after-render/process-svg.js +21 -0
  8. package/scripts/filter/before-post-render/gloria/render-post-render-pug-components.js +46 -18
  9. package/scripts/helper/u-extract-and-cache-title.js +27 -8
  10. package/scripts/helper/u-split-to-title-and-content.js +32 -8
  11. package/scripts/utils/gloria-after-post-render/append-copy-heading-buttons.js +119 -0
  12. package/scripts/utils/gloria-after-post-render/apply-design-doc-classes.js +157 -0
  13. package/scripts/utils/gloria-after-post-render/wrap-table-into-wrappers.js +25 -0
  14. package/scripts/utils/inline-svg.js +63 -94
  15. package/scripts/utils/spritesheet-svg.js +82 -102
  16. package/scripts/utils/toc.js +85 -31
  17. package/src/api-builder/api-builder.js +53 -40
  18. package/src/api-builder/build-page-worker.js +35 -0
  19. package/src/api-builder/classes/description-parser.js +77 -38
  20. package/src/data-converter/converters/jsduck2umberto.js +43 -15
  21. package/src/hexo/filter/project-locals.js +3 -0
  22. package/src/sdk-builder/get-sdk-sources.js +81 -44
  23. package/src/tasks/build-documentation.js +4 -0
  24. package/src/tasks/minify-html.js +1 -1
  25. package/src/tasks/validate-links-collect-worker.js +34 -0
  26. package/src/tasks/validate-links-worker.js +127 -0
  27. package/src/tasks/validate-links.js +61 -259
  28. package/themes/umberto/layout/gloria/_head/head.pug +3 -0
  29. package/themes/umberto/layout/gloria/_modules/index.pug +1 -0
  30. package/themes/umberto/layout/gloria/_modules/kapa/index.pug +0 -1
  31. package/themes/umberto/layout/gloria/_modules/sentry/index.pug +27 -0
  32. package/scripts/filter/after-post-render/gloria/append-copy-heading-buttons.js +0 -90
  33. package/scripts/filter/after-post-render/gloria/apply-design-doc-classes.js +0 -96
  34. package/scripts/filter/after-post-render/gloria/wrap-table-into-wrappers.js +0 -36
  35. package/scripts/filter/after-render/gloria/inline-svg.js +0 -14
  36. package/scripts/filter/after-render/gloria/spritesheet-svg.js +0 -14
  37. package/scripts/utils/apply-design-doc-classes.js +0 -82
  38. /package/src/tasks/{minify-worker.js → minify-html-worker.js} +0 -0
package/CHANGELOG.md CHANGED
@@ -1,6 +1,27 @@
1
1
  Changelog
2
2
  =========
3
3
 
4
+ ## [9.1.0](https://github.com/cksource/umberto/compare/v9.0.0...v9.1.0) (November 24, 2025)
5
+
6
+ ### Features
7
+
8
+ * Introduced support for [Sentry](https://sentry.io/welcome/) in the generated documentation.
9
+
10
+ Umberto configuration can now specify the `sentry` key that allows loading the Sentry SDK on production hosts: https://ckeditor.com/docs and https://ckeditor5.github.io/docs/nightly.
11
+
12
+ ### Other changes
13
+
14
+ * Improve performance of `api-builder` and `validate-links` scripts.
15
+ * Migrate from `cheerio` to `htmlparser2` and friends.
16
+
17
+
18
+ ## [9.0.0](https://github.com/cksource/umberto/compare/v8.4.0...v9.0.0) (November 13, 2025)
19
+
20
+ ### BREAKING CHANGES
21
+
22
+ * Updated the required version of Node.js to **v24.11**.
23
+
24
+
4
25
  ## [8.4.0](https://github.com/cksource/umberto/compare/v8.3.5...v8.4.0) (October 30, 2025)
5
26
 
6
27
  ### Features
@@ -29,22 +50,6 @@ Changelog
29
50
 
30
51
  * Bump dependencies (including `hexo` to version `8.0.0`).
31
52
 
32
-
33
- ## [8.3.3](https://github.com/cksource/umberto/compare/v8.3.2...v8.3.3) (October 15, 2025)
34
-
35
- ### Bug fixes
36
-
37
- * Link validator should not fail on links containing the word "latest", if they are not part of a version number.
38
- * Fixed links being unstyled in Kapa.ai answers.
39
- * Fix broken page sidebar layout on `959px` resolution.
40
-
41
-
42
- ## [8.3.2](https://github.com/cksource/umberto/compare/v8.3.1...v8.3.2) (September 16, 2025)
43
-
44
- ### Other changes
45
-
46
- * Update to TypeScript 5.3.
47
-
48
53
  ---
49
54
 
50
55
  To see all releases, visit the [release page](https://github.com/cksource/umberto/releases).
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "umberto",
3
- "version": "8.4.0",
3
+ "version": "9.1.0",
4
4
  "description": "CKSource Documentation builder",
5
5
  "main": "src/index.js",
6
6
  "files": [
@@ -19,7 +19,10 @@
19
19
  "@ckeditor/jsdoc-plugins": "^43.0.0",
20
20
  "@minify-html/node": "^0.17.1",
21
21
  "babel-loader": "^10.0.0",
22
- "cheerio": "^1.0.0",
22
+ "css-select": "^6.0.0",
23
+ "dom-serializer": "^2.0.0",
24
+ "domhandler": "^5.0.3",
25
+ "domutils": "^3.2.2",
23
26
  "escape-string-regexp": "^4.0.0",
24
27
  "fs-extra": "^11.0.0",
25
28
  "fuse.js": "^7.1.0",
@@ -55,9 +58,7 @@
55
58
  "webpack": "^5.94.0"
56
59
  },
57
60
  "engines": {
58
- "node": ">=22.0.0",
59
- "pnpm": ">=10.14.0",
60
- "yarn": "\n\n┌─────────────────────────┐\n│ Hey, we use pnpm now! │\n└─────────────────────────┘\n\n"
61
+ "node": ">=24.11.0"
61
62
  },
62
63
  "author": "CKSource (http://cksource.com/)",
63
64
  "license": "MIT",
@@ -73,6 +74,6 @@
73
74
  ]
74
75
  },
75
76
  "hexo": {
76
- "version": "8.0.0"
77
+ "version": "8.1.1"
77
78
  }
78
- }
79
+ }
@@ -5,7 +5,10 @@
5
5
 
6
6
  'use strict';
7
7
 
8
- const cheerio = require( 'cheerio' );
8
+ const { parseDocument } = require( 'htmlparser2' );
9
+ const { default: render } = require( 'dom-serializer' );
10
+ const { selectAll, selectOne } = require( 'css-select' );
11
+ const { hasAttrib, getChildren } = require( 'domutils' );
9
12
 
10
13
  const regexp = /(?<=>)utf8_encoded_content:([0-9,]+)(?=<\/code><\/pre>)/g;
11
14
  const textDecoder = new TextDecoder();
@@ -23,28 +26,89 @@ hexo.extend.filter.register( 'after_post_render', page => {
23
26
  } );
24
27
  }
25
28
 
26
- const $ = cheerio.load( page.content, null, false );
29
+ // Only bother scanning/transforming <pre> blocks if they are present.
30
+ if ( page.content.indexOf( '<pre' ) === -1 ) {
31
+ return page;
32
+ }
27
33
 
28
- // This is an attempt to automatically discover code blocks of HTML and add 'html' class if needed.
29
- // It is needed for triple backtick ``` codeblocks in markdown which contain HTML but don't use ```html notation.
30
- if ( page.content.indexOf( '<pre' ) !== -1 ) {
31
- $( 'pre' ).each( function() {
32
- const codeEl = $( this ).find( 'code' );
34
+ const doc = parseDocument( page.content );
35
+ const preBlocks = selectAll( 'pre', doc );
33
36
 
34
- if ( codeEl.is( '.html,.js,.php,.css,.java' ) ) {
35
- return true;
36
- }
37
+ for ( const pre of preBlocks ) {
38
+ const code = selectOne( 'code', pre );
37
39
 
38
- const codeContent = codeEl.html();
39
- const matches = codeContent ? codeContent.match( /(?:<|&lt;)\/?[a-z]+?(?:&gt;|>)/g ) : null;
40
+ // If the code element declares a language we recognize, skip (parity with original `return true`).
41
+ if ( hasAnyClass( code, [ 'html', 'js', 'php', 'css', 'java' ] ) ) {
42
+ continue;
43
+ }
40
44
 
41
- if ( matches && matches.length >= 2 ) {
42
- codeEl.addClass( 'html' );
43
- }
45
+ // Inspect inner HTML of <code> (serialized children) to detect HTML-like patterns.
46
+ const codeInner = code ? render( getChildren( code ) || [] ) : null;
47
+ const matches = codeInner ? codeInner.match( /(?:<|&lt;)\/?[a-z]+?(?:&gt;|>)/g ) : null;
44
48
 
45
- $( this ).replaceWith( $( this ).toString().replace( /\t/g, ' ' ) );
46
- } );
49
+ if ( matches && matches.length >= 2 && code ) {
50
+ addClass( code, 'html' );
51
+ }
52
+
53
+ // Normalize tabs to 4 spaces within the <pre> subtree (mirrors string replace in original).
54
+ replaceTabsInTextNodes( pre );
47
55
  }
48
56
 
49
- page.content = $.html();
57
+ page.content = render( doc );
58
+ return page;
50
59
  } );
60
+
61
+ /**
62
+ * Adds a class to an element's "class" attribute if not already present.
63
+ */
64
+ function addClass( el, className ) {
65
+ if ( !el ) {
66
+ return;
67
+ }
68
+
69
+ const current = hasAttrib( el, 'class' ) ? el.attribs.class : '';
70
+ const parts = current ? current.split( /\s+/ ).filter( Boolean ) : [];
71
+
72
+ if ( !parts.includes( className ) ) {
73
+ parts.push( className );
74
+ el.attribs.class = parts.join( ' ' );
75
+ }
76
+ }
77
+
78
+ /**
79
+ * Checks whether an element has *any* of the provided classes.
80
+ */
81
+ function hasAnyClass( el, classes ) {
82
+ if ( !el || !hasAttrib( el, 'class' ) ) {
83
+ return false;
84
+ }
85
+
86
+ const current = el.attribs.class || '';
87
+ const set = new Set( current.split( /\s+/ ).filter( Boolean ) );
88
+
89
+ for ( const cls of classes ) {
90
+ if ( set.has( cls ) ) {
91
+ return true;
92
+ }
93
+ }
94
+
95
+ return false;
96
+ }
97
+
98
+ /**
99
+ * Replaces tabs with four spaces in all text nodes within a subtree.
100
+ * Mirrors the original behavior of serializing the <pre> and doing a string replace.
101
+ */
102
+ function replaceTabsInTextNodes( root ) {
103
+ if ( !root || !root.children ) {
104
+ return;
105
+ }
106
+
107
+ for ( const child of root.children ) {
108
+ if ( child.type === 'text' && typeof child.data === 'string' ) {
109
+ child.data = child.data.replace( /\t/g, ' ' );
110
+ } else if ( child.children && child.children.length ) {
111
+ replaceTabsInTextNodes( child );
112
+ }
113
+ }
114
+ }
@@ -0,0 +1,27 @@
1
+ /**
2
+ * @license Copyright (c) 2017-2025, CKSource Holding sp. z o.o. All rights reserved.
3
+ * For licensing, see LICENSE.md.
4
+ */
5
+
6
+ 'use strict';
7
+
8
+ const { parseDocument } = require( 'htmlparser2' );
9
+ const { default: render } = require( 'dom-serializer' );
10
+
11
+ const appendCopyHeadingButtons = require( '../../utils/gloria-after-post-render/append-copy-heading-buttons' );
12
+ const applyDesignDocClasses = require( '../../utils/gloria-after-post-render/apply-design-doc-classes' );
13
+ const wrapTableIntoWrappers = require( '../../utils/gloria-after-post-render/wrap-table-into-wrappers' );
14
+
15
+ hexo.extend.filter.register( 'after_post_render', page => {
16
+ if ( page.projectTheme !== 'gloria' ) {
17
+ return page;
18
+ }
19
+
20
+ let doc = parseDocument( page.content );
21
+ doc = appendCopyHeadingButtons( doc );
22
+ doc = wrapTableIntoWrappers( doc );
23
+ doc = applyDesignDocClasses( doc );
24
+ page.content = render( doc );
25
+
26
+ return page;
27
+ }, 45 );
@@ -5,8 +5,12 @@
5
5
 
6
6
  'use strict';
7
7
 
8
- const cheerio = require( 'cheerio' );
9
8
  const upath = require( 'upath' );
9
+ const { parseDocument } = require( 'htmlparser2' );
10
+ const { default: render } = require( 'dom-serializer' );
11
+ const { selectAll } = require( 'css-select' );
12
+ const { getAttributeValue } = require( 'domutils' );
13
+ const applyDesignDocClasses = require( '../../utils/gloria-after-post-render/apply-design-doc-classes' );
10
14
 
11
15
  /**
12
16
  * Support @errors tag.
@@ -14,42 +18,46 @@ const upath = require( 'upath' );
14
18
  */
15
19
  hexo.extend.filter.register( 'after_post_render', page => {
16
20
  const errorsTagMatch = /{@errors}/.exec( page.content );
17
- // jscs:disable
18
- const relativeUrlHelper = hexo.extend.helper.store.relative_url;
19
- // jscs:enable
21
+ const errorsHtml = hexo.projectGlobals[ page.projectName ]?.errorsHtml;
20
22
 
21
- if ( !errorsTagMatch || !hexo.projectGlobals[ page.projectName ] || !hexo.projectGlobals[ page.projectName ].errorsHtml ) {
23
+ if ( !errorsTagMatch || !errorsHtml ) {
22
24
  return;
23
25
  }
24
26
 
25
- const errorsHtml = hexo.projectGlobals[ page.projectName ].errorsHtml;
26
- const $ = cheerio.load( errorsHtml, null, false );
27
+ const relativeUrlHelper = hexo.extend.helper.store.relative_url;
28
+ const doc = parseDocument( errorsHtml );
27
29
 
28
30
  // Errors are used in guides so links must be converted to have proper paths. Converting links to API here.
29
- $( 'a[href]' ).each( function() {
30
- const href = $( this ).attr( 'href' );
31
+ for ( const a of selectAll( 'a[href]', doc ) ) {
32
+ const href = getAttributeValue( a, 'href' );
31
33
 
32
- // Ignore external links, error anchors and links with / which means they're not links to api.
33
- if ( /[a-z:]*\/\//.exec( href ) || /^#.*/.exec( href ) || href.split( '/' ).length > 1 ) {
34
- return;
34
+ // Ignore external links, error anchors and links with / which means they're not links to API.
35
+ if ( /[a-z:]*\/\//.test( href ) || /^#/.test( href ) || href.split( '/' ).length > 1 ) {
36
+ continue;
35
37
  }
36
38
 
37
- const groups = hexo.projectGlobals[ page.projectName ].groups;
38
- const apiSlug = groups.find( g => g.id === 'api-reference' ).slug;
39
- const hrefPath = upath.join( page.BASE_PATH, apiSlug, href );
39
+ const groups = hexo.projectGlobals[ page.projectName ].groups || [];
40
+ const apiGroup = groups.find( g => g.id === 'api-reference' );
40
41
 
41
- $( this ).attr( 'href', relativeUrlHelper( page.path, hrefPath ) );
42
- } );
42
+ if ( !apiGroup?.slug ) {
43
+ continue;
44
+ }
45
+
46
+ const hrefPath = upath.join( page.BASE_PATH, apiGroup.slug, href );
47
+ a.attribs.href = relativeUrlHelper( page.path, hrefPath );
48
+ }
43
49
 
44
50
  // Converting links to guides created from {@glink}s.
45
- $( 'a[data-glink]' ).each( function() {
46
- const href = $( this ).attr( 'href' ).replace( '../', '' );
47
- const hrefPath = upath.join( page.BASE_PATH, href );
51
+ for ( const a of selectAll( 'a[data-glink]', doc ) ) {
52
+ const rawHref = getAttributeValue( a, 'href' ) || '';
53
+ const normalized = rawHref.replace( '../', '' );
54
+ const hrefPath = upath.join( page.BASE_PATH, normalized );
55
+
56
+ a.attribs.href = relativeUrlHelper( page.path, hrefPath );
57
+ }
48
58
 
49
- $( this ).attr( 'href', relativeUrlHelper( page.path, hrefPath ) );
50
- } );
59
+ applyDesignDocClasses( doc );
51
60
 
52
- page.content = page.content.replace( /<p[^>]*>{@errors}<\/p>/g, () => {
53
- return $.html();
54
- } );
55
- }, 39 );
61
+ // Insert processed errors HTML in place of the {@errors} paragraph.
62
+ page.content = page.content.replace( /<p[^>]*>{@errors}<\/p>/g, () => render( doc.children ) );
63
+ }, 46 );
@@ -5,7 +5,8 @@
5
5
 
6
6
  'use strict';
7
7
 
8
- const cheerio = require( 'cheerio' );
8
+ const { parseDocument } = require( 'htmlparser2' );
9
+ const { selectOne } = require( 'css-select' );
9
10
 
10
11
  /**
11
12
  * Make sure each page has a title.
@@ -19,14 +20,34 @@ hexo.extend.filter.register( 'after_post_render', page => {
19
20
  return;
20
21
  }
21
22
 
22
- const $ = cheerio.load( page.content, null, false );
23
- const title = $( 'h1:not( .live-snippet h1 )' ).first();
23
+ const doc = parseDocument( page.content );
24
+ const titleEl = selectOne( 'h1:not(.live-snippet h1)', doc );
24
25
 
25
- if ( !title.length && !page.title ) {
26
+ if ( !titleEl && !page.title ) {
26
27
  console.error( `Document: ${ page.source } has no title and no h1 which could be used as a title.` );
27
28
  }
28
29
 
29
- if ( !page.title ) {
30
- page.title = title.children().remove().end().text().trim();
30
+ if ( !page.title && titleEl ) {
31
+ page.title = getOwnText( titleEl ).trim();
31
32
  }
32
33
  }, 32 );
34
+
35
+ /**
36
+ * Concatenates the data of all direct text-node children of `el`.
37
+ * (Equivalent to removing all child elements, then taking `.text()`.)
38
+ */
39
+ function getOwnText( el ) {
40
+ if ( !el || !el.children ) {
41
+ return '';
42
+ }
43
+
44
+ let out = '';
45
+
46
+ for ( const child of el.children ) {
47
+ if ( child.type === 'text' && typeof child.data === 'string' ) {
48
+ out += child.data;
49
+ }
50
+ }
51
+
52
+ return out;
53
+ }
@@ -0,0 +1,21 @@
1
+ /**
2
+ * @license Copyright (c) 2017-2025, CKSource Holding sp. z o.o. All rights reserved.
3
+ * For licensing, see LICENSE.md.
4
+ */
5
+
6
+ 'use strict';
7
+
8
+ const { parseDocument } = require( 'htmlparser2' );
9
+ const { default: render } = require( 'dom-serializer' );
10
+ const inlineSvg = require( '../../utils/inline-svg' );
11
+ const spritesheetSvg = require( '../../utils/spritesheet-svg' );
12
+ const hexoManager = require( '../../../src/hexo-manager' );
13
+
14
+ hexo.extend.filter.register( 'after_render:html', html => {
15
+ let doc = parseDocument( html );
16
+
17
+ doc = spritesheetSvg( doc, hexoManager.hexo.theme_dir );
18
+ doc = inlineSvg( doc, hexoManager.hexo.theme_dir );
19
+
20
+ return render( doc );
21
+ }, 500 );
@@ -5,7 +5,11 @@
5
5
 
6
6
  'use strict';
7
7
 
8
- const cheerio = require( 'cheerio' );
8
+ const { parseDocument } = require( 'htmlparser2' );
9
+ const { cloneNode } = require( 'domhandler' );
10
+ const { default: render } = require( 'dom-serializer' );
11
+ const { selectAll, selectOne } = require( 'css-select' );
12
+ const { getAttributeValue, getChildren, replaceElement } = require( 'domutils' );
9
13
  const createPrerenderPugTemplate = require( '../../../utils/pug-renderer/create-prerender-pug-template' );
10
14
 
11
15
  /**
@@ -42,16 +46,16 @@ const POST_RENDER_COMPONENTS = [
42
46
  }
43
47
  } );
44
48
 
45
- return $element => {
46
- const $code = $element.find( 'code' );
49
+ return element => {
50
+ const codeEl = selectOne( 'code', element );
47
51
 
48
- if ( !$code.length ) {
52
+ if ( !codeEl ) {
49
53
  return null;
50
54
  }
51
55
 
52
56
  // Get the language from the class (e.g., "language-js" -> "js")
53
- const language = $code.attr( 'class' ).replace( 'doc', '' ).trim();
54
- const code = $code.html();
57
+ const language = getAttributeValue( codeEl, 'class' ).replace( 'doc', '' ).trim();
58
+ const code = render( getChildren( codeEl ) || [] );
55
59
 
56
60
  return template( {
57
61
  code,
@@ -69,29 +73,36 @@ hexo.extend.filter.register( 'after_post_render', page => {
69
73
  }
70
74
 
71
75
  try {
72
- let $ = null;
76
+ let doc = null;
73
77
  let hasChanges = false;
74
78
 
75
- POST_RENDER_COMPONENTS.forEach( component => {
79
+ for ( const component of POST_RENDER_COMPONENTS ) {
76
80
  if ( component.fastCheck && !component.fastCheck( page.content ) ) {
77
- return;
81
+ continue;
78
82
  }
79
83
 
80
- $ = $ || cheerio.load( page.content, null, false );
84
+ doc ||= parseDocument( page.content );
81
85
 
82
- $( component.selector ).each( ( i, element ) => {
83
- const $element = $( element );
84
- const replacement = component.handler( $element, { hexo, page } );
86
+ for ( const el of selectAll( component.selector, doc ) ) {
87
+ const replacement = component.handler( el, { hexo, page } );
85
88
 
86
- if ( replacement ) {
87
- $element.replaceWith( replacement );
89
+ if ( !replacement ) {
90
+ continue;
91
+ }
92
+
93
+ // Parse the replacement HTML and swap it in (assumes a single root element from the component).
94
+ const replacementEl = parseFirstElement( replacement );
95
+
96
+ if ( replacementEl ) {
97
+ // Clone to avoid moving nodes across documents.
98
+ replaceElement( el, cloneNode( replacementEl, true ) );
88
99
  hasChanges = true;
89
100
  }
90
- } );
91
- } );
101
+ }
102
+ }
92
103
 
93
104
  if ( hasChanges ) {
94
- page.content = $.html();
105
+ page.content = render( doc );
95
106
  }
96
107
  } catch ( error ) {
97
108
  console.error( error );
@@ -100,3 +111,20 @@ hexo.extend.filter.register( 'after_post_render', page => {
100
111
 
101
112
  return page;
102
113
  }, 100 );
114
+
115
+ /**
116
+ * Parses an HTML fragment and returns the first element node (skips text/whitespace).
117
+ */
118
+ function parseFirstElement( html ) {
119
+ const frag = parseDocument( html );
120
+ // Prefer body children if present (some parsers wrap fragments).
121
+ const body = selectOne( 'body', frag );
122
+ const nodes = body ? body.children : frag.children;
123
+
124
+ for ( const n of nodes || [] ) {
125
+ if ( n && n.type === 'tag' ) {
126
+ return n;
127
+ }
128
+ }
129
+ return null;
130
+ }
@@ -5,7 +5,9 @@
5
5
 
6
6
  'use strict';
7
7
 
8
- const cheerio = require( 'cheerio' );
8
+ const { parseDocument } = require( 'htmlparser2' );
9
+ const { selectOne } = require( 'css-select' );
10
+
9
11
  const CACHED_TITLES = new Map();
10
12
 
11
13
  hexo.extend.helper.register( 'uExtractAndCacheTitle', ( cacheKey, data ) => {
@@ -17,14 +19,11 @@ hexo.extend.helper.register( 'uExtractAndCacheTitle', ( cacheKey, data ) => {
17
19
  return CACHED_TITLES.get( cacheKey );
18
20
  }
19
21
 
20
- const $ = cheerio.load( data, null, false );
21
- const title = $( 'h1:not( .live-snippet h1 )' ).first();
22
+ const doc = parseDocument( data );
23
+ const titleEl = selectOne( 'h1:not(.live-snippet h1)', doc );
24
+
22
25
  const formattedTitle = (
23
- title
24
- .children()
25
- .remove()
26
- .end()
27
- .text()
26
+ getOwnText( titleEl )
28
27
  .trim()
29
28
  .replace( /</g, '&lt;' )
30
29
  .replace( />/g, '&gt;' )
@@ -34,3 +33,23 @@ hexo.extend.helper.register( 'uExtractAndCacheTitle', ( cacheKey, data ) => {
34
33
 
35
34
  return formattedTitle;
36
35
  } );
36
+
37
+ /**
38
+ * Concatenates the data of all direct text-node children of `el`.
39
+ * (Equivalent to removing all child elements, then taking `.text()`.)
40
+ */
41
+ function getOwnText( el ) {
42
+ if ( !el || !el.children ) {
43
+ return '';
44
+ }
45
+
46
+ let out = '';
47
+
48
+ for ( const child of el.children ) {
49
+ if ( child.type === 'text' && typeof child.data === 'string' ) {
50
+ out += child.data;
51
+ }
52
+ }
53
+
54
+ return out;
55
+ }
@@ -5,20 +5,44 @@
5
5
 
6
6
  'use strict';
7
7
 
8
- const cheerio = require( 'cheerio' );
8
+ const { parseDocument } = require( 'htmlparser2' );
9
+ const { default: render } = require( 'dom-serializer' );
10
+ const { selectOne } = require( 'css-select' );
11
+ const { removeElement } = require( 'domutils' );
9
12
 
10
13
  hexo.extend.helper.register( 'uSplitToTitleAndContent', data => {
11
- const $ = cheerio.load( data, null, false );
12
- const title = $( 'h1:not( .live-snippet h1 )' ).first().remove();
14
+ const doc = parseDocument( data );
15
+ const titleEl = selectOne( 'h1:not(.live-snippet h1)', doc );
16
+
17
+ if ( titleEl ) {
18
+ removeElement( titleEl );
19
+ }
13
20
 
14
21
  return {
15
- title: title.children()
16
- .remove()
17
- .end()
18
- .text()
22
+ title: getOwnText( titleEl )
19
23
  .trim()
20
24
  .replace( /</g, '&lt;' )
21
25
  .replace( />/g, '&gt;' ),
22
- content: $.html()
26
+ content: render( doc )
23
27
  };
24
28
  } );
29
+
30
+ /**
31
+ * Concatenates the data of all direct text-node children of `el`.
32
+ * (Equivalent to removing all child elements, then taking `.text()`.)
33
+ */
34
+ function getOwnText( el ) {
35
+ if ( !el || !el.children ) {
36
+ return '';
37
+ }
38
+
39
+ let out = '';
40
+
41
+ for ( const child of el.children ) {
42
+ if ( child.type === 'text' && typeof child.data === 'string' ) {
43
+ out += child.data;
44
+ }
45
+ }
46
+
47
+ return out;
48
+ }