@adobe/helix-html-pipeline 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,24 @@
1
+ # [1.2.0](https://github.com/adobe/helix-html-pipeline/compare/v1.1.3...v1.2.0) (2022-03-16)
2
+
3
+
4
+ ### Features
5
+
6
+ * use hast instead of jsdom ([#12](https://github.com/adobe/helix-html-pipeline/issues/12)) ([bee0a0b](https://github.com/adobe/helix-html-pipeline/commit/bee0a0b3309919f896520bc700dd2d867be19a1c)), closes [#11](https://github.com/adobe/helix-html-pipeline/issues/11)
7
+
8
+ ## [1.1.3](https://github.com/adobe/helix-html-pipeline/compare/v1.1.2...v1.1.3) (2022-03-12)
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * **deps:** update dependency @adobe/helix-shared-utils to v2.0.5 ([4ea15f9](https://github.com/adobe/helix-html-pipeline/commit/4ea15f9888486ba0e81e92c7796236726da5b74c))
14
+
15
+ ## [1.1.2](https://github.com/adobe/helix-html-pipeline/compare/v1.1.1...v1.1.2) (2022-03-11)
16
+
17
+
18
+ ### Bug Fixes
19
+
20
+ * handling invalid input url with 400 ([#16](https://github.com/adobe/helix-html-pipeline/issues/16)) ([4491691](https://github.com/adobe/helix-html-pipeline/commit/449169107cc1d6a3b7b5fd211b39174d59fb2a8e)), closes [#15](https://github.com/adobe/helix-html-pipeline/issues/15)
21
+
1
22
  ## [1.1.1](https://github.com/adobe/helix-html-pipeline/compare/v1.1.0...v1.1.1) (2022-03-10)
2
23
 
3
24
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/helix-html-pipeline",
3
- "version": "1.1.1",
3
+ "version": "1.2.0",
4
4
  "description": "Helix HTML Pipeline",
5
5
  "main": "src/index.js",
6
6
  "types": "src/index.d.ts",
@@ -33,10 +33,13 @@
33
33
  },
34
34
  "dependencies": {
35
35
  "@adobe/helix-markdown-support": "3.1.2",
36
- "@adobe/helix-shared-utils": "2.0.4",
36
+ "@adobe/helix-shared-utils": "2.0.5",
37
37
  "github-slugger": "1.4.0",
38
+ "hast-util-raw": "7.2.1",
39
+ "hast-util-select": "5.0.1",
38
40
  "hast-util-to-html": "8.0.3",
39
- "jsdom": "19.0.0",
41
+ "hast-util-to-string": "2.0.0",
42
+ "hastscript": "7.0.2",
40
43
  "mdast-util-gfm-footnote": "1.0.1",
41
44
  "mdast-util-gfm-strikethrough": "1.0.1",
42
45
  "mdast-util-gfm-table": "1.0.3",
@@ -50,19 +53,21 @@
50
53
  "micromark-extension-gfm-task-list-item": "1.0.3",
51
54
  "micromark-util-combine-extensions": "1.0.0",
52
55
  "mime": "3.0.0",
53
- "property-information": "6.1.1",
56
+ "rehype-format": "4.0.1",
57
+ "rehype-minify-whitespace": "5.0.0",
58
+ "rehype-parse": "8.0.4",
54
59
  "remark-parse": "10.0.1",
55
60
  "strip-markdown": "5.0.0",
56
- "unified": "10.1.1",
61
+ "unified": "10.1.2",
57
62
  "unist-util-map": "3.0.0",
63
+ "unist-util-remove": "3.1.0",
58
64
  "unist-util-remove-position": "4.0.1",
59
65
  "unist-util-select": "4.0.1",
60
- "unist-util-visit": "4.1.0",
61
- "uri-js": "4.4.1"
66
+ "unist-util-visit": "4.1.0"
62
67
  },
63
68
  "devDependencies": {
64
69
  "@adobe/eslint-config-helix": "1.3.2",
65
- "@markedjs/html-differ": "4.0.0",
70
+ "@markedjs/html-differ": "4.0.1",
66
71
  "@semantic-release/changelog": "6.0.1",
67
72
  "@semantic-release/git": "10.0.1",
68
73
  "@semantic-release/npm": "9.0.1",
@@ -71,23 +76,20 @@
71
76
  "codecov": "3.8.3",
72
77
  "commitizen": "4.2.4",
73
78
  "cz-conventional-changelog": "3.3.0",
74
- "eslint": "8.10.0",
79
+ "eslint": "8.11.0",
75
80
  "eslint-plugin-header": "3.1.1",
76
81
  "eslint-plugin-import": "2.25.4",
77
82
  "esmock": "1.7.4",
78
- "hastscript": "7.0.2",
79
83
  "husky": "7.0.4",
80
- "hyperscript": "2.0.2",
81
84
  "js-yaml": "4.1.0",
82
85
  "jsdoc-to-markdown": "7.1.1",
86
+ "jsdom": "19.0.0",
83
87
  "junit-report-builder": "3.0.0",
84
88
  "lint-staged": "12.3.5",
85
- "mocha": "9.2.1",
89
+ "mocha": "9.2.2",
86
90
  "mocha-multi-reporters": "1.5.1",
87
91
  "remark-gfm": "3.0.1",
88
- "semantic-release": "19.0.2",
89
- "sinon": "13.0.1",
90
- "unist-builder": "3.0.0"
92
+ "semantic-release": "19.0.2"
91
93
  },
92
94
  "lint-staged": {
93
95
  "*.js": "eslint",
@@ -11,6 +11,7 @@
11
11
  */
12
12
  import {Node} from "unist";
13
13
  import GithubSlugger from 'github-slugger';
14
+ import { Root } from 'hast';
14
15
 
15
16
  declare enum SourceType {
16
17
  CONTENT = 'content',
@@ -50,12 +51,9 @@ declare class PipelineContent {
50
51
  mdast: Node;
51
52
 
52
53
  /**
53
- * document specific metadata
54
+ * The transformed document (hast) representation
54
55
  */
55
- meta: object;
56
- title: string;
57
- intro: string;
58
- image: string;
56
+ hast: Root;
59
57
 
60
58
  /**
61
59
  * slugger to use for heading id calculations
@@ -63,7 +61,10 @@ declare class PipelineContent {
63
61
  slugger: GithubSlugger;
64
62
 
65
63
  /**
66
- * The transformed document (jsom) representation
64
+ * document specific metadata
67
65
  */
68
- document: Document;
66
+ meta: object;
67
+ title: string;
68
+ intro: string;
69
+ image: string;
69
70
  }
@@ -9,6 +9,7 @@
9
9
  * OF ANY KIND, either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
+ import GithubSlugger from 'github-slugger';
12
13
 
13
14
  /**
14
15
  * State of the pipeline
@@ -21,6 +22,7 @@ export class PipelineContent {
21
22
  constructor() {
22
23
  Object.assign(this, {
23
24
  sourceBus: 'content',
25
+ slugger: new GithubSlugger(),
24
26
  });
25
27
  }
26
28
  }
@@ -9,6 +9,8 @@
9
9
  * OF ANY KIND; either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
+ import { Element } from 'hast';
13
+
12
14
  declare interface PipelineResponseInit {
13
15
  status?: number;
14
16
  headers: Map<string, string> | object;
@@ -17,7 +19,10 @@ declare interface PipelineResponseInit {
17
19
  declare class PipelineResponse {
18
20
  constructor(body?:string, init?:PipelineResponseInit);
19
21
  status: number;
20
- document?: Document;
22
+ /**
23
+ * The transformed document (hast) representation
24
+ */
25
+ document: Element;
21
26
  body: string;
22
27
  headers: Map<string, string>;
23
28
  error: any;
package/src/html-pipe.js CHANGED
@@ -62,11 +62,14 @@ export async function htmlPipe(state, req) {
62
62
  });
63
63
 
64
64
  try { // fetch config first, since we need to compute the content-bus-id from the fstab ...
65
+ state.timer?.update('config-fetch');
65
66
  await fetchConfig(state, req, res);
67
+
66
68
  // ...and apply the folder mapping
67
69
  await folderMapping(state, req, res);
68
70
 
69
71
  // load metadata and content in parallel
72
+ state.timer?.update('content-fetch');
70
73
  await Promise.all([
71
74
  fetchMetadata(state, req, res),
72
75
  fetchContent(state, req, res),
@@ -80,9 +83,12 @@ export async function htmlPipe(state, req) {
80
83
  }
81
84
 
82
85
  if (state.content.sourceBus === 'code') {
86
+ state.timer?.update('serialize');
83
87
  await renderCode(state, req, res);
84
88
  } else {
89
+ state.timer?.update('parse');
85
90
  await parseMarkdown(state);
91
+ state.timer?.update('render');
86
92
  await splitSections(state);
87
93
  await getMetadata(state); // this one extracts the metadata from the mdast
88
94
  await unwrapSoleImages(state);
@@ -96,6 +102,7 @@ export async function htmlPipe(state, req) {
96
102
  await addHeadingIds(state);
97
103
  await render(state, req, res);
98
104
  await removeHlxProps(state, req, res);
105
+ state.timer?.update('serialize');
99
106
  await tohtml(state, req, res);
100
107
  }
101
108
 
@@ -110,6 +117,13 @@ export async function htmlPipe(state, req) {
110
117
  }
111
118
  log.error(`error running pipeline: ${res.status} ${res.error}`, e);
112
119
  res.headers.set('x-error', cleanupHeaderValue(res.error));
120
+
121
+ // turn any URL errors into a 400, since they are user input
122
+ // see https://github.com/adobe/helix-pipeline-service/issues/346
123
+ if (e.code === 'ERR_INVALID_URL') {
124
+ res.status = 400;
125
+ res.headers.set('x-error', cleanupHeaderValue(`invalid url: ${e.input}`));
126
+ }
113
127
  }
114
128
 
115
129
  return res;
@@ -9,6 +9,8 @@
9
9
  * OF ANY KIND, either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
+ import { toString } from 'hast-util-to-string';
13
+ import { visit } from 'unist-util-visit';
12
14
 
13
15
  /**
14
16
  * Adds missing `id` attributes to the headings
@@ -16,17 +18,16 @@
16
18
  * @param {PipelineContent } content The current context of processing pipeline
17
19
  */
18
20
  export default async function fixSections({ content }) {
19
- const { slugger, document } = content;
20
- ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
21
- .forEach((tagName) => {
22
- document.querySelectorAll(tagName)
23
- .forEach(($h) => {
24
- if (!$h.id) {
25
- const text = $h.textContent.trim();
26
- if (text) {
27
- $h.setAttribute('id', slugger.slug(text));
28
- }
29
- }
30
- });
31
- });
21
+ const { slugger, hast } = content;
22
+ visit(hast, (node) => {
23
+ if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(node.tagName)) {
24
+ const { properties } = node;
25
+ if (!properties.id) {
26
+ const text = toString(node).trim();
27
+ if (text) {
28
+ properties.id = slugger.slug(text);
29
+ }
30
+ }
31
+ }
32
+ });
32
33
  }
@@ -9,57 +9,57 @@
9
9
  * OF ANY KIND, either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
+ import { h } from 'hastscript';
13
+ import { selectAll, select } from 'hast-util-select';
14
+ import { toString } from 'hast-util-to-string';
12
15
  import { toClassName } from './utils.js';
16
+ import { replace, childNodes } from '../utils/hast-utils.js';
13
17
 
14
18
  /**
15
19
  * Creates a "DIV representation" of a table.
16
20
  * @type PipelineStep
17
- * @param {Document} document
18
- * @param {HTMLTableElement} $table the table element
21
+ * @param {HTMLTTableElement} $table the table element
19
22
  * @returns {HTMLDivElement} the resulting div
20
23
  */
21
- function tableToDivs(document, $table) {
22
- const $cards = document.createElement('div');
23
-
24
- // iterate over the table to avoid problem with query selector and nested tables
24
+ function tableToDivs($table) {
25
+ const $cards = h('div');
25
26
  const $rows = [];
26
- if ($table.tHead) {
27
- $rows.push(...$table.tHead.rows);
28
- }
29
- for (const $tbody of $table.tBodies) {
30
- $rows.push(...$tbody.rows);
27
+ for (const child of $table.children) {
28
+ if (child.tagName === 'thead' || child.tagName === 'tbody') {
29
+ $rows.push(...childNodes(child));
30
+ }
31
31
  }
32
+
32
33
  if ($rows.length === 0) {
33
34
  return $cards;
34
35
  }
35
- const $headerRow = $rows.shift();
36
+ const $headerCols = childNodes($rows.shift());
36
37
 
37
38
  // special case, only 1 row and 1 column with a nested table
38
- if ($rows.length === 0 && $headerRow.cells.length === 1) {
39
- const $nestedTable = $headerRow.cells[0].querySelector(':scope table');
39
+ if ($rows.length === 0 && $headerCols.length === 1) {
40
+ const $nestedTable = select(':scope table', $headerCols[0]);
40
41
  if ($nestedTable) {
41
42
  return $nestedTable;
42
43
  }
43
44
  }
44
45
 
45
46
  // get columns names
46
- const clazz = Array.from($headerRow.cells)
47
- .map((e) => toClassName(e.textContent))
47
+ const clazz = $headerCols
48
+ .map((e) => toClassName(toString(e)))
48
49
  .filter((c) => !!c)
49
50
  .join('-');
50
51
  if (clazz) {
51
- $cards.classList.add(clazz);
52
+ $cards.properties.className = [clazz];
52
53
  }
53
54
 
54
55
  // construct page block
55
56
  for (const $row of $rows) {
56
- const $card = document.createElement('div');
57
- for (const $cell of $row.cells) {
58
- const $div = document.createElement('div');
59
- $div.append(...$cell.childNodes);
60
- $card.append($div);
57
+ const $card = h('div');
58
+ for (const $cell of childNodes($row)) {
59
+ // convert to div
60
+ $card.children.push(h('div', $cell.children));
61
61
  }
62
- $cards.append($card);
62
+ $cards.children.push($card);
63
63
  }
64
64
  return $cards;
65
65
  }
@@ -70,9 +70,10 @@ function tableToDivs(document, $table) {
70
70
  * @param context The current context of processing pipeline
71
71
  */
72
72
  export default function createPageBlocks({ content }) {
73
- const { document } = content;
74
- document.querySelectorAll('body > div > table').forEach(($table) => {
75
- const $div = tableToDivs(document, $table);
76
- $table.parentNode.replaceChild($div, $table);
73
+ const { hast } = content;
74
+ selectAll('div > table', hast).forEach(($table) => {
75
+ const $div = tableToDivs($table);
76
+ // replace child in parent
77
+ replace(hast, $table, $div);
77
78
  });
78
79
  }
@@ -9,6 +9,9 @@
9
9
  * OF ANY KIND, either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
+ import { h } from 'hastscript';
13
+ import { selectAll } from 'hast-util-select';
14
+ import { replace } from '../utils/hast-utils.js';
12
15
  import { optimizeImageURL } from './utils.js';
13
16
 
14
17
  /**
@@ -17,19 +20,20 @@ import { optimizeImageURL } from './utils.js';
17
20
  * @param context The current context of processing pipeline
18
21
  */
19
22
  export default async function createPictures({ content }) {
20
- const { document } = content;
23
+ const { hast } = content;
21
24
 
22
25
  // transform <img> to <picture>
23
- document.querySelectorAll('img[src^="./media_"]').forEach((img, i) => {
24
- const picture = document.createElement('picture');
25
- const source = document.createElement('source');
26
- const src = img.getAttribute('src');
27
- source.setAttribute('media', '(max-width: 400px)');
28
- source.setAttribute('srcset', optimizeImageURL(src, 750));
29
- picture.appendChild(source);
30
- img.setAttribute('loading', i > 0 ? 'lazy' : 'eager'); // load all but first image lazy
31
- img.setAttribute('src', optimizeImageURL(src, 2000));
32
- img.parentNode.insertBefore(picture, img);
33
- picture.appendChild(img);
26
+ selectAll('img[src^="./media_"]', hast).forEach((img, i) => {
27
+ const { src } = img.properties;
28
+ const source = h('source');
29
+ source.properties.media = '(max-width: 400px)';
30
+ source.properties.srcset = optimizeImageURL(src, 750);
31
+
32
+ const picture = h('picture', source);
33
+ img.properties.loading = i > 0 ? 'lazy' : 'eager';
34
+ img.properties.src = optimizeImageURL(src, 2000);
35
+
36
+ replace(hast, img, picture);
37
+ picture.children.push(img);
34
38
  });
35
39
  }
@@ -9,10 +9,15 @@
9
9
  * OF ANY KIND, either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
+ import { selectAll, select } from 'hast-util-select';
13
+ import { toString } from 'hast-util-to-string';
14
+ import { remove } from 'unist-util-remove';
15
+ import { visit, EXIT, CONTINUE } from 'unist-util-visit';
12
16
  import {
13
17
  getAbsoluteUrl, makeCanonicalHtmlUrl, optimizeImageURL, resolveUrl,
14
18
  } from './utils.js';
15
19
  import { filterGlobalMetadata, toMetaName, ALLOWED_RESPONSE_HEADERS } from '../utils/metadata.js';
20
+ import { childNodes } from '../utils/hast-utils.js';
16
21
 
17
22
  /**
18
23
  * Cleans up comma-separated string lists and returns an array.
@@ -28,55 +33,53 @@ function toList(list) {
28
33
 
29
34
  /**
30
35
  * Returns the config from a block element as object with key/value pairs.
31
- * @param {HTMLDivElement} $block The block element
36
+ * @param {Element} $block The block element
32
37
  * @returns {object} The block config
33
38
  */
34
39
  function readBlockConfig($block) {
35
40
  const config = {};
36
- $block.querySelectorAll(':scope>div').forEach(($row) => {
37
- if ($row.children && $row.children[1]) {
38
- const name = toMetaName($row.children[0].textContent);
41
+ selectAll(':scope>div', $block).forEach(($row) => {
42
+ if ($row?.children[1]) {
43
+ const [$name, $value] = $row.children;
44
+ const name = toMetaName(toString($name));
39
45
  if (name) {
40
46
  let value;
41
- if ($row.children[1].hasChildNodes() && $row.children[1].firstElementChild) {
47
+ const $firstChild = childNodes($value)[0];
48
+ if ($firstChild) {
42
49
  // check for multiple paragraph or a list
43
- let childNodes;
44
- const { tagName } = $row.children[1].firstElementChild;
45
- if (tagName === 'P') {
50
+ let list;
51
+ const { tagName } = $firstChild;
52
+ if (tagName === 'p') {
46
53
  // contains a list of <p> paragraphs
47
- childNodes = $row.children[1].childNodes;
48
- } else if (tagName === 'UL' || tagName === 'OL') {
54
+ list = childNodes($value);
55
+ } else if (tagName === 'ul' || tagName === 'ol') {
49
56
  // contains a list
50
- childNodes = $row.children[1].children[0].childNodes;
57
+ list = childNodes($firstChild);
51
58
  }
52
59
 
53
- if (childNodes) {
54
- value = '';
55
- childNodes.forEach((child) => {
56
- value += `${child.textContent}, `;
57
- });
58
- value = value.substring(0, value.length - 2);
60
+ if (list) {
61
+ value = list.map((child) => toString(child)).join(', ');
59
62
  }
60
63
  }
61
64
 
62
65
  if (!value) {
63
66
  // for text content only
64
- value = $row.children[1].textContent.trim().replace(/ {3}/g, ',');
67
+ value = toString($value).trim().replace(/ {3}/g, ',');
65
68
  }
66
69
 
67
70
  if (!value) {
68
71
  // check for value inside link
69
- const $a = $row.children[1].querySelector('a');
72
+ const $a = select('a', $value);
70
73
  if ($a) {
71
- value = $a.getAttribute('href');
74
+ value = $a.properties.href;
72
75
  }
73
76
  }
74
77
  if (!value) {
75
78
  // check for value inside img
76
- const $img = $row.children[1].querySelector('img');
79
+ const $img = select('img', $value);
77
80
  if ($img) {
78
81
  // strip query string
79
- value = $img.getAttribute('src');
82
+ value = $img.properties.src;
80
83
  }
81
84
  }
82
85
  if (value) {
@@ -91,15 +94,17 @@ function readBlockConfig($block) {
91
94
 
92
95
  /**
93
96
  * Looks for metadata in the document.
94
- * @param {HTMLDocument} document The document
97
+ * @param {Root} document The hast document
95
98
  * @return {object} The metadata
96
99
  */
97
100
  function getLocalMetadata(document) {
98
101
  let metaConfig = {};
99
- const metaBlock = document.querySelector('body div.metadata');
102
+ const metaBlock = select('div.metadata', document);
100
103
  if (metaBlock) {
101
104
  metaConfig = readBlockConfig(metaBlock);
102
- metaBlock.remove();
105
+ // TODO: here we should also remove the parent div of the former table, otherwise it results
106
+ // TODO: in an empty <div></div>
107
+ remove(document, { cascade: false }, metaBlock);
103
108
  }
104
109
  return metaConfig;
105
110
  }
@@ -118,6 +123,27 @@ function optimizeMetaImage(pagePath, imgUrl) {
118
123
  return src;
119
124
  }
120
125
 
126
+ /**
127
+ * Extracts the description from the document. note, that the selectAll('div > p') used in
128
+ * jsdom doesn't work as expected in hast
129
+ * @param {Root} hast
130
+ * @see https://github.com/syntax-tree/unist/discussions/66
131
+ */
132
+ function extractDescription(hast) {
133
+ let desc = '';
134
+ visit(hast, (node, idx, parent) => {
135
+ if (parent?.tagName === 'div' && node.tagName === 'p') {
136
+ const words = toString(node).trim().split(/\s+/);
137
+ if (words.length >= 10 || words.some((w) => w.length > 25 && !w.startsWith('http'))) {
138
+ desc = `${words.slice(0, 25).join(' ')}${words.length > 25 ? ' ...' : ''}`;
139
+ return EXIT;
140
+ }
141
+ }
142
+ return CONTINUE;
143
+ });
144
+ return desc;
145
+ }
146
+
121
147
  /**
122
148
  * Extracts the metadata and stores it in the content meta
123
149
  * @type PipelineStep
@@ -126,13 +152,13 @@ function optimizeMetaImage(pagePath, imgUrl) {
126
152
  */
127
153
  export default function extractMetaData(state, req) {
128
154
  const { content } = state;
129
- const { meta, document } = content;
155
+ const { meta, hast } = content;
130
156
 
131
157
  // extract global metadata from spreadsheet, and overlay
132
158
  // with local metadata from document
133
159
  const metaConfig = Object.assign(
134
160
  filterGlobalMetadata(state.metadata, state.info.path),
135
- getLocalMetadata(document),
161
+ getLocalMetadata(hast),
136
162
  );
137
163
 
138
164
  // first process supported metadata properties
@@ -173,25 +199,16 @@ export default function extractMetaData(state, req) {
173
199
  if (!meta.title) {
174
200
  // content.title is not correct if the h1 is in a page-block since the pipeline
175
201
  // only respects the heading nodes in the mdast
176
- const $title = document.querySelector('body > div h1');
202
+ const $title = select('div h1', hast);
177
203
  if ($title) {
178
- content.title = $title.textContent;
204
+ content.title = toString($title);
179
205
  }
180
206
  meta.title = content.title;
181
207
  }
182
208
  if (!meta.description) {
183
- // description: text from paragraphs with 10 or more words
184
- let desc = [];
185
- document.querySelectorAll('div > p').forEach((p) => {
186
- if (desc.length === 0) {
187
- const words = p.textContent.trim().split(/\s+/);
188
- if (words.length >= 10 || words.some((w) => w.length > 25 && !w.startsWith('http'))) {
189
- desc = desc.concat(words);
190
- }
191
- }
192
- });
193
- meta.description = `${desc.slice(0, 25).join(' ')}${desc.length > 25 ? ' ...' : ''}`;
209
+ meta.description = extractDescription(hast);
194
210
  }
211
+
195
212
  // use the req.url and not the state.info.path in case of folder mapping
196
213
  meta.url = makeCanonicalHtmlUrl(getAbsoluteUrl(req.headers, req.url.pathname));
197
214
  if (!meta.canonical) {
@@ -200,11 +217,11 @@ export default function extractMetaData(state, req) {
200
217
 
201
218
  // content.image is not correct if the first image is in a page-block. since the pipeline
202
219
  // only respects the image nodes in the mdast
203
- const $hero = document.querySelector('body > div img');
220
+ const $hero = select('div img', hast);
204
221
  if ($hero) {
205
- content.image = $hero.src;
206
- if ($hero.alt) {
207
- content.imageAlt = $hero.alt;
222
+ content.image = $hero.properties.src;
223
+ if ($hero.properties.alt) {
224
+ content.imageAlt = $hero.properties.alt;
208
225
  }
209
226
  }
210
227
 
@@ -9,6 +9,8 @@
9
9
  * OF ANY KIND, either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
+ import { selectAll } from 'hast-util-select';
13
+ import { h } from 'hastscript';
12
14
  import { wrapContent } from './utils.js';
13
15
 
14
16
  /**
@@ -17,20 +19,17 @@ import { wrapContent } from './utils.js';
17
19
  * @param {PipelineContent} content
18
20
  */
19
21
  export default async function fixSections({ content }) {
20
- const { document } = content;
21
- const $sections = document.querySelectorAll('body > div');
22
+ const { hast } = content;
23
+ const $sections = selectAll('div', hast);
22
24
 
23
25
  // if there are no sections wrap everything in a div with appropriate class names from meta
24
26
  if ($sections.length === 0) {
25
- const $outerDiv = document.createElement('div');
27
+ const $outerDiv = h('div');
26
28
  if (content.meta && content.meta.class) {
27
- content.meta.class.split(/[ ,]/)
29
+ $outerDiv.properties.className = content.meta.class.split(/[ ,]/)
28
30
  .map((c) => c.trim())
29
- .filter((c) => !!c)
30
- .forEach((c) => {
31
- $outerDiv.classList.add(c);
32
- });
31
+ .filter((c) => !!c);
33
32
  }
34
- wrapContent($outerDiv, document.body);
33
+ wrapContent($outerDiv, hast);
35
34
  }
36
35
  }