npm - @adobe/helix-html-pipeline - Versions diffs - 1.1.3 → 1.3.0 - Mend

@adobe/helix-html-pipeline 1.1.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/CHANGELOG.md +21 -0
package/package.json +12 -10
package/src/PipelineContent.d.ts +8 -7
package/src/PipelineContent.js +2 -0
package/src/PipelineResponse.d.ts +6 -1
package/src/PipelineState.d.ts +3 -1
package/src/PipelineState.js +1 -0
package/src/forms-pipe.js +160 -0
package/src/html-pipe.js +7 -0
package/src/index.d.ts +13 -0
package/src/index.js +2 -0
package/src/options-pipe.js +37 -0
package/src/steps/add-heading-ids.js +14 -13
package/src/steps/create-page-blocks.js +28 -27
package/src/steps/create-pictures.js +16 -12
package/src/steps/extract-metadata.js +61 -44
package/src/steps/fix-sections.js +8 -9
package/src/steps/get-metadata.js +5 -4
package/src/steps/make-html.js +3 -14
package/src/steps/removeHlxProps.js +9 -10
package/src/steps/render.js +68 -116
package/src/steps/rewrite-blob-images.js +6 -24
package/src/steps/rewrite-icons.js +30 -44
package/src/steps/stringify-response.js +11 -11
package/src/steps/utils.js +26 -4
package/src/utils/{table-handler.js → hast-utils.js} +13 -15
package/src/utils/heading-handler.js +11 -24
package/src/utils/mdast-to-hast.js +60 -0
package/src/utils/path.js +4 -1
package/src/utils/section-handler.js +6 -4
package/src/utils/hast-util-to-dom.js +0 -190
package/src/utils/icon-handler.js +0 -40
package/src/utils/link-handler.js +0 -25
package/src/utils/mdast-to-vdom.js +0 -323

package/src/steps/extract-metadata.js CHANGED Viewed

@@ -9,10 +9,15 @@
  * OF ANY KIND, either express or implied. See the License for the specific language
  * governing permissions and limitations under the License.
  */
+import { selectAll, select } from 'hast-util-select';
+import { toString } from 'hast-util-to-string';
+import { remove } from 'unist-util-remove';
+import { visit, EXIT, CONTINUE } from 'unist-util-visit';
 import {
   getAbsoluteUrl, makeCanonicalHtmlUrl, optimizeImageURL, resolveUrl,
 } from './utils.js';
 import { filterGlobalMetadata, toMetaName, ALLOWED_RESPONSE_HEADERS } from '../utils/metadata.js';
+import { childNodes } from '../utils/hast-utils.js';
 /**
  * Cleans up comma-separated string lists and returns an array.
@@ -28,55 +33,53 @@ function toList(list) {
 /**
  * Returns the config from a block element as object with key/value pairs.
- * @param {HTMLDivElement} $block The block element
+ * @param {Element} $block The block element
  * @returns {object} The block config
  */
 function readBlockConfig($block) {
   const config = {};
-  $block.querySelectorAll(':scope>div').forEach(($row) => {
-    if ($row.children && $row.children[1]) {
-      const name = toMetaName($row.children[0].textContent);
+  selectAll(':scope>div', $block).forEach(($row) => {
+    if ($row?.children[1]) {
+      const [$name, $value] = $row.children;
+      const name = toMetaName(toString($name));
       if (name) {
         let value;
-        if ($row.children[1].hasChildNodes() && $row.children[1].firstElementChild) {
+        const $firstChild = childNodes($value)[0];
+        if ($firstChild) {
           // check for multiple paragraph or a list
-          let childNodes;
-          const { tagName } = $row.children[1].firstElementChild;
-          if (tagName === 'P') {
+          let list;
+          const { tagName } = $firstChild;
+          if (tagName === 'p') {
             // contains a list of <p> paragraphs
-            childNodes = $row.children[1].childNodes;
-          } else if (tagName === 'UL' || tagName === 'OL') {
+            list = childNodes($value);
+          } else if (tagName === 'ul' || tagName === 'ol') {
             // contains a list
-            childNodes = $row.children[1].children[0].childNodes;
+            list = childNodes($firstChild);
           }
-          if (childNodes) {
-            value = '';
-            childNodes.forEach((child) => {
-              value += `${child.textContent}, `;
-            });
-            value = value.substring(0, value.length - 2);
+          if (list) {
+            value = list.map((child) => toString(child)).join(', ');
           }
         }
         if (!value) {
           // for text content only
-          value = $row.children[1].textContent.trim().replace(/ {3}/g, ',');
+          value = toString($value).trim().replace(/ {3}/g, ',');
         }
         if (!value) {
           // check for value inside link
-          const $a = $row.children[1].querySelector('a');
+          const $a = select('a', $value);
           if ($a) {
-            value = $a.getAttribute('href');
+            value = $a.properties.href;
           }
         }
         if (!value) {
           // check for value inside img
-          const $img = $row.children[1].querySelector('img');
+          const $img = select('img', $value);
           if ($img) {
             // strip query string
-            value = $img.getAttribute('src');
+            value = $img.properties.src;
           }
         }
         if (value) {
@@ -91,15 +94,17 @@ function readBlockConfig($block) {
 /**
  * Looks for metadata in the document.
- * @param {HTMLDocument} document The document
+ * @param {Root} document The hast document
  * @return {object} The metadata
  */
 function getLocalMetadata(document) {
   let metaConfig = {};
-  const metaBlock = document.querySelector('body div.metadata');
+  const metaBlock = select('div.metadata', document);
   if (metaBlock) {
     metaConfig = readBlockConfig(metaBlock);
-    metaBlock.remove();
+    // TODO: here we should also remove the parent div of the former table, otherwise it results
+    // TODO: in an empty <div></div>
+    remove(document, { cascade: false }, metaBlock);
   }
   return metaConfig;
 }
@@ -118,6 +123,27 @@ function optimizeMetaImage(pagePath, imgUrl) {
   return src;
 }
+/**
+ * Extracts the description from the document. note, that the selectAll('div > p') used in
+ * jsdom doesn't work as expected in hast
+ * @param {Root} hast
+ * @see https://github.com/syntax-tree/unist/discussions/66
+ */
+function extractDescription(hast) {
+  let desc = '';
+  visit(hast, (node, idx, parent) => {
+    if (parent?.tagName === 'div' && node.tagName === 'p') {
+      const words = toString(node).trim().split(/\s+/);
+      if (words.length >= 10 || words.some((w) => w.length > 25 && !w.startsWith('http'))) {
+        desc = `${words.slice(0, 25).join(' ')}${words.length > 25 ? ' ...' : ''}`;
+        return EXIT;
+      }
+    }
+    return CONTINUE;
+  });
+  return desc;
+}
 /**
  * Extracts the metadata and stores it in the content meta
  * @type PipelineStep
@@ -126,13 +152,13 @@ function optimizeMetaImage(pagePath, imgUrl) {
  */
 export default function extractMetaData(state, req) {
   const { content } = state;
-  const { meta, document } = content;
+  const { meta, hast } = content;
   // extract global metadata from spreadsheet, and overlay
   // with local metadata from document
   const metaConfig = Object.assign(
     filterGlobalMetadata(state.metadata, state.info.path),
-    getLocalMetadata(document),
+    getLocalMetadata(hast),
   );
   // first process supported metadata properties
@@ -173,25 +199,16 @@ export default function extractMetaData(state, req) {
   if (!meta.title) {
     // content.title is not correct if the h1 is in a page-block since the pipeline
     // only respects the heading nodes in the mdast
-    const $title = document.querySelector('body > div h1');
+    const $title = select('div h1', hast);
     if ($title) {
-      content.title = $title.textContent;
+      content.title = toString($title);
     }
     meta.title = content.title;
   }
   if (!meta.description) {
-    // description: text from paragraphs with 10 or more words
-    let desc = [];
-    document.querySelectorAll('div > p').forEach((p) => {
-      if (desc.length === 0) {
-        const words = p.textContent.trim().split(/\s+/);
-        if (words.length >= 10 || words.some((w) => w.length > 25 && !w.startsWith('http'))) {
-          desc = desc.concat(words);
-        }
-      }
-    });
-    meta.description = `${desc.slice(0, 25).join(' ')}${desc.length > 25 ? ' ...' : ''}`;
+    meta.description = extractDescription(hast);
   }
   // use the req.url and not the state.info.path in case of folder mapping
   meta.url = makeCanonicalHtmlUrl(getAbsoluteUrl(req.headers, req.url.pathname));
   if (!meta.canonical) {
@@ -200,11 +217,11 @@ export default function extractMetaData(state, req) {
   // content.image is not correct if the first image is in a page-block. since the pipeline
   // only respects the image nodes in the mdast
-  const $hero = document.querySelector('body > div img');
+  const $hero = select('div img', hast);
   if ($hero) {
-    content.image = $hero.src;
-    if ($hero.alt) {
-      content.imageAlt = $hero.alt;
+    content.image = $hero.properties.src;
+    if ($hero.properties.alt) {
+      content.imageAlt = $hero.properties.alt;
     }
   }

package/src/steps/fix-sections.js CHANGED Viewed

@@ -9,6 +9,8 @@
  * OF ANY KIND, either express or implied. See the License for the specific language
  * governing permissions and limitations under the License.
  */
+import { selectAll } from 'hast-util-select';
+import { h } from 'hastscript';
 import { wrapContent } from './utils.js';
 /**
@@ -17,20 +19,17 @@ import { wrapContent } from './utils.js';
  * @param {PipelineContent} content
  */
 export default async function fixSections({ content }) {
-  const { document } = content;
-  const $sections = document.querySelectorAll('body > div');
+  const { hast } = content;
+  const $sections = selectAll('div', hast);
   // if there are no sections wrap everything in a div with appropriate class names from meta
   if ($sections.length === 0) {
-    const $outerDiv = document.createElement('div');
+    const $outerDiv = h('div');
     if (content.meta && content.meta.class) {
-      content.meta.class.split(/[ ,]/)
+      $outerDiv.properties.className = content.meta.class.split(/[ ,]/)
         .map((c) => c.trim())
-        .filter((c) => !!c)
-        .forEach((c) => {
-          $outerDiv.classList.add(c);
-        });
+        .filter((c) => !!c);
     }
-    wrapContent($outerDiv, document.body);
+    wrapContent($outerDiv, hast);
   }
 }

package/src/steps/get-metadata.js CHANGED Viewed

@@ -11,6 +11,7 @@
  */
 import { select, selectAll } from 'unist-util-select';
 import { toString as plain } from 'mdast-util-to-string';
+import { rewriteBlobLink } from './utils.js';
 function yaml(section) {
   section.meta = selectAll('yaml', section)
@@ -39,13 +40,13 @@ function image(section) {
   // TODO: get a better measure of prominence than "first"
   const img = select('image', section);
   if (img) {
-    section.image = img.url;
+    section.image = rewriteBlobLink(img.url);
   }
 }
 /**
- * Construct the strings corresponding to the number of occurences per type.
- * @param {Object} typecounter Type as a key, number of occurences as value
+ * Construct the strings corresponding to the number of occurrences per type.
+ * @param {Object} typecounter Type as a key, number of occurrences as value
  */
 function constructTypes(typecounter) {
   const types = Object.keys(typecounter).map((type) => `has-${type}`); // has-{type}
@@ -71,7 +72,7 @@ function constructTypes(typecounter) {
  * 1. has-<type> for every type of content found in the section
  * 2. is-<type>-only for sections that have only content of type
  * 3. is-<type1>-<type2>-<type3> ranks the top three most common types of content
- * 4. nb-<type>-<nb_occurences> is the number of occurences per type
+ * 4. nb-<type>-<nb_occurrences> is the number of occurrences per type
  * @param {*} section
  */
 function sectiontype(section) {

package/src/steps/make-html.js CHANGED Viewed

@@ -9,9 +9,7 @@
  * OF ANY KIND, either express or implied. See the License for the specific language
  * governing permissions and limitations under the License.
  */
-import GithubSlugger from 'github-slugger';
-import VDOMTransformer from '../utils/mdast-to-vdom.js';
+import mdast2hast from '../utils/mdast-to-hast.js';
 /**
  * Converts the markdown to a jsdom dom and stores it in `content.document`
@@ -19,16 +17,7 @@ import VDOMTransformer from '../utils/mdast-to-vdom.js';
  * @param {PipelineState} state
  */
 export default function html(state) {
-  const { log, content } = state;
+  const { content } = state;
   const { mdast } = content;
-  log.debug(`Turning Markdown into HTML from ${typeof mdast}`);
-  // initialize transformer
-  content.slugger = new GithubSlugger();
-  const transformer = new VDOMTransformer()
-    .withOptions({
-      slugger: content.slugger,
-    });
-  content.document = transformer
-    .withMdast(mdast)
-    .getDocument();
+  content.hast = mdast2hast(mdast, content.slugger);
 }

package/src/steps/removeHlxProps.js CHANGED Viewed

@@ -9,6 +9,8 @@
  * OF ANY KIND, either express or implied. See the License for the specific language
  * governing permissions and limitations under the License.
  */
+import { selectAll } from 'hast-util-select';
 /**
  * Cleans the response document by removing `hlx-` stuff
  * @param {PipelineState} state
@@ -17,18 +19,15 @@
  */
 export default function clean(state, req, res) {
   const { document } = res;
-  document.querySelectorAll('[class]').forEach((el) => {
-    // Remove all `hlx-*` classes on the elements
-    el.classList.value.split(' ')
-      .filter((cls) => cls.indexOf('hlx-') === 0)
-      .forEach((cls) => el.classList.remove(cls));
-    if (!el.classList.length) {
-      el.removeAttribute('class');
+  selectAll('[class]', document).forEach(({ properties }) => {
+    properties.className = properties.className.filter((name) => !name.startsWith('hlx-'));
+    if (properties.className.length === 0) {
+      delete properties.className;
     }
     // Remove all `data-hlx-*` attributes on these elements
-    Object.keys(el.dataset)
-      .filter((key) => key.match(/^hlx[A-Z]/))
-      .forEach((key) => delete el.dataset[key]);
+    Object.keys(properties)
+      .filter((key) => key.match(/^dataHlx[A-Z].*/))
+      .forEach((key) => delete properties[key]);
   });
 }

package/src/steps/render.js CHANGED Viewed

@@ -11,70 +11,27 @@
  */
 /* eslint-disable max-len */
-import { JSDOM } from 'jsdom';
-/*
-<!DOCTYPE html>
-<html data-sly-attribute="${content.document.documentElement.attributesMap}">
-<head>
-  <title>${content.meta.title}</title>
-  <link data-sly-test="${content.meta.url}" rel="canonical" href="${content.meta.url}"/>
-  <meta data-sly-test="${content.meta.description}" name="description" content="${content.meta.description}"/>
-  <meta data-sly-test="${content.meta.keywords}" name="keywords" content="${content.meta.keywords}"/>
-  <meta data-sly-test="${content.meta.title}" property="og:title" content="${content.meta.title}"/>
-  <meta data-sly-test="${content.meta.description}" property="og:description" content="${content.meta.description}"/>
-  <meta data-sly-test="${content.meta.url}" property="og:url" content="${content.meta.url}"/>
-  <meta data-sly-test="${content.meta.image}" property="og:image" content="${content.meta.image}"/>
-  <meta data-sly-test="${content.meta.image}" property="og:image:secure_url" content="${content.meta.image}"/>
-<sly data-sly-test="${content.meta.imageAlt}">
-  <meta data-sly-test="${content.meta.imageAlt}" property="og:image:alt" content="${content.meta.imageAlt}"/>
-</sly>
-  <meta data-sly-test="${content.meta.modifiedTime}" property="og:updated_time" content="${content.meta.modified_time}"/>
-<sly data-sly-test="${content.meta.tags}" data-sly-list.tag="${content.meta.tags}">
-  <meta property="article:tag" content="${tag}"/>
-</sly>
-  <meta data-sly-test="${content.meta.section}" property="article:section" content="${section}"/>
-  <meta data-sly-test="${content.meta.published_time}" property="article:published_time" content="${content.meta.published_time}"/>
-  <meta data-sly-test="${content.meta.modified_time}" property="article:modified_time" content="${content.meta.modified_time}"/>
-  <meta data-sly-test="${content.meta.title}" name="twitter:title" content="${content.meta.title}"/>
-  <meta data-sly-test="${content.meta.description}" name="twitter:description" content="${content.meta.description}"/>
-  <meta data-sly-test="${content.meta.image}" name="twitter:image" content="${content.meta.image}"/>
-<sly data-sly-test="${content.meta.custom}" data-sly-list="${content.meta.custom}">
-  <meta data-sly-test="${item.property}" property="${item.name}" content="${item.value}">
-  <meta data-sly-test="${!item.property}" name="${item.name}" content="${item.value}">
-</sly>
-  <esi:include src="/head.html" onerror="continue"/>
-</head>
-<body data-sly-attribute="${content.document.body.attributesMap}">
-  <!--  header -->
-  <header><esi:include src="/header.plain.html" onerror="continue"/></header>
-  <!--  main content -->
-  <main>${content.document.body}</main>
-  <!--  footer -->
-  <footer><esi:include src="/footer.plain.html"  onerror="continue"/></footer>
-</body>
-</html>
-*/
+import { h } from 'hastscript';
+import { unified } from 'unified';
+import rehypeParse from 'rehype-parse';
 function appendElement($parent, $el) {
   if ($el) {
-    $parent.append($el);
+    $parent.children.push($el);
   }
 }
-function createElement(doc, name, ...attrs) {
+function createElement(name, ...attrs) {
   // check for empty values
+  const properties = {};
   for (let i = 0; i < attrs.length; i += 2) {
-    if (!attrs[i + 1]) {
+    const value = attrs[i + 1];
+    if (!value) {
       return null;
     }
+    properties[attrs[i]] = value;
   }
-  const $el = doc.createElement(name);
-  for (let i = 0; i < attrs.length; i += 2) {
-    $el.setAttribute(attrs[i], attrs[i + 1]);
-  }
-  return $el;
+  return h(name, properties);
 }
 /**
@@ -86,73 +43,68 @@ function createElement(doc, name, ...attrs) {
  */
 export default async function render(state, req, res) {
   const { content } = state;
-  const srcDoc = content.document;
+  const { hast, meta } = content;
   if (state.info.selector === 'plain') {
     // just return body
-    res.document = srcDoc.body;
-  } else {
-    // create document like HTL used to do
-    const dom = new JSDOM('<!DOCTYPE html>'
-      + '<html>'
-      + '<head></head>'
-      + '<body>'
-      + '<header></header>' // todo: are those still required ?
-      + '<main></main>'
-      + '<footer></footer>' // todo: are those still required ?
-      + '</body>'
-      + '</html>');
-    const doc = dom.window.document;
-    // add title
-    const $head = doc.head;
-    const { meta } = content;
-    const $title = doc.createElement('title');
-    $title.innerHTML = meta.title;
-    $head.append($title);
-    // add meta
-    appendElement($head, createElement(doc, 'link', 'rel', 'canonical', 'href', content.meta.canonical));
-    appendElement($head, createElement(doc, 'meta', 'name', 'description', 'content', content.meta.description));
-    appendElement($head, createElement(doc, 'meta', 'name', 'keywords', 'content', content.meta.keywords));
-    appendElement($head, createElement(doc, 'meta', 'property', 'og:title', 'content', content.meta.title));
-    appendElement($head, createElement(doc, 'meta', 'property', 'og:description', 'content', content.meta.description));
-    appendElement($head, createElement(doc, 'meta', 'property', 'og:url', 'content', content.meta.url));
-    appendElement($head, createElement(doc, 'meta', 'property', 'og:image', 'content', content.meta.image));
-    appendElement($head, createElement(doc, 'meta', 'property', 'og:image:secure_url', 'content', content.meta.image));
-    if (content.meta.imageAlt) {
-      appendElement($head, createElement(doc, 'meta', 'property', 'og:image:alt', 'content', content.meta.imageAlt));
-    }
-    appendElement($head, createElement(doc, 'meta', 'property', 'og:updated_time', 'content', content.meta.modified_time));
-    for (const tag of (meta.tags || [])) {
-      appendElement($head, createElement(doc, 'meta', 'property', 'article:tag', 'content', tag));
-    }
-    appendElement($head, createElement(doc, 'meta', 'property', 'article:section', 'content', content.meta.section));
-    appendElement($head, createElement(doc, 'meta', 'property', 'article:published_time', 'content', content.meta.published_time));
-    appendElement($head, createElement(doc, 'meta', 'property', 'article:modified_time', 'content', content.meta.modified_time));
-    appendElement($head, createElement(doc, 'meta', 'name', 'twitter:title', 'content', content.meta.title));
-    appendElement($head, createElement(doc, 'meta', 'name', 'twitter:description', 'content', content.meta.description));
-    appendElement($head, createElement(doc, 'meta', 'name', 'twitter:image', 'content', content.meta.image));
+    res.document = hast;
+    return;
+  }
+  const $head = h('head', [
+    h('title', meta.title),
+  ]);
-    for (const custom of (meta.custom || [])) {
-      appendElement($head, createElement(doc, 'meta', custom.property ? 'property' : 'name', custom.name, 'content', custom.value));
-    }
-    if (meta.feed) {
-      appendElement($head, createElement(doc, 'link', 'rel', 'alternate', 'type', 'application/xml+atom', 'href', meta.feed, 'title', `${meta.title} feed`));
-    }
-    // inject head.html
-    const $headHtml = doc.createElement('template');
-    $headHtml.innerHTML = state.helixConfig?.head?.html ?? `
-        <meta name="viewport" content="width=device-width, initial-scale=1"/>
-        <script src="/scripts.js" type="module" crossorigin="use-credentials"></script>
-        <link rel="stylesheet" href="/styles.css"/>`;
-    $head.appendChild($headHtml.content);
+  // add meta
+  appendElement($head, createElement('link', 'rel', 'canonical', 'href', content.meta.canonical));
+  appendElement($head, createElement('meta', 'name', 'description', 'content', content.meta.description));
+  appendElement($head, createElement('meta', 'name', 'keywords', 'content', content.meta.keywords));
+  appendElement($head, createElement('meta', 'property', 'og:title', 'content', content.meta.title));
+  appendElement($head, createElement('meta', 'property', 'og:description', 'content', content.meta.description));
+  appendElement($head, createElement('meta', 'property', 'og:url', 'content', content.meta.url));
+  appendElement($head, createElement('meta', 'property', 'og:image', 'content', content.meta.image));
+  appendElement($head, createElement('meta', 'property', 'og:image:secure_url', 'content', content.meta.image));
+  appendElement($head, createElement('meta', 'property', 'og:image:alt', 'content', content.meta.imageAlt));
+  appendElement($head, createElement('meta', 'property', 'og:updated_time', 'content', content.meta.modified_time));
+  for (const tag of (meta.tags || [])) {
+    appendElement($head, createElement('meta', 'property', 'article:tag', 'content', tag));
+  }
+  appendElement($head, createElement('meta', 'property', 'article:section', 'content', content.meta.section));
+  appendElement($head, createElement('meta', 'property', 'article:published_time', 'content', content.meta.published_time));
+  appendElement($head, createElement('meta', 'property', 'article:modified_time', 'content', content.meta.modified_time));
+  appendElement($head, createElement('meta', 'name', 'twitter:title', 'content', content.meta.title));
+  appendElement($head, createElement('meta', 'name', 'twitter:description', 'content', content.meta.description));
+  appendElement($head, createElement('meta', 'name', 'twitter:image', 'content', content.meta.image));
-    // add body to main
-    const $main = doc.querySelector('main');
+  for (const custom of (meta.custom || [])) {
+    appendElement($head, createElement('meta', custom.property ? 'property' : 'name', custom.name, 'content', custom.value));
+  }
+  appendElement($head, createElement('link', 'rel', 'alternate', 'type', 'application/xml+atom', 'href', meta.feed, 'title', `${meta.title} feed`));
-    $main.append(...srcDoc.body.childNodes);
-    res.document = doc;
+  // inject head.html
+  const headHtml = state.helixConfig?.head?.html;
+  if (headHtml) {
+    const $headHtml = await unified()
+      .use(rehypeParse, { fragment: true })
+      .parse(headHtml);
+    $head.children.push(...$headHtml.children);
+  } else {
+    appendElement($head, createElement('meta', 'name', 'viewport', 'content', 'width=device-width, initial-scale=1'));
+    appendElement($head, createElement('script', 'src', '/scripts.js', 'type', 'module', 'crossorigin', 'use-credentials'));
+    appendElement($head, createElement('link', 'rel', 'stylesheet', 'href', '/styles.css'));
   }
+  res.document = {
+    type: 'root',
+    children: [
+      { type: 'doctype' },
+      h('html', [
+        $head,
+        h('body', [
+          h('header', []), // todo: are those still required ?
+          h('main', hast),
+          h('footer', []), // todo: are those still required ?
+        ]),
+      ]),
+    ],
+  };
 }

package/src/steps/rewrite-blob-images.js CHANGED Viewed

@@ -9,36 +9,18 @@
  * OF ANY KIND, either express or implied. See the License for the specific language
  * governing permissions and limitations under the License.
  */
-const AZURE_BLOB_REGEXP = /^https:\/\/hlx\.blob\.core\.windows\.net\/external\//;
-const MEDIA_BLOB_REGEXP = /^https:\/\/.*\.hlx3?\.(live|page)\/media_.*/;
+import { selectAll } from 'hast-util-select';
+import { rewriteBlobLink } from './utils.js';
 /**
  * Rewrite blob store image URLs to /hlx_* URLs
  *
- * @param {Document} document The (vdom) document
- */
-function images(document) {
-  document.querySelectorAll('img').forEach((img) => {
-    if (AZURE_BLOB_REGEXP.test(img.src)) {
-      const { pathname, hash } = new URL(img.src);
-      const filename = pathname.split('/').pop();
-      const extension = hash.split('?').shift().split('.').pop() || 'jpg';
-      img.src = `./media_${filename}.${extension}`;
-    } else if (MEDIA_BLOB_REGEXP.test(img.src)) {
-      const { pathname } = new URL(img.src);
-      img.src = `.${pathname}`; // don't append fragment until picture tag supports width/height
-    }
-  });
-}
-/**
  * @type PipelineStep
  * @param content
  */
 export default function rewrite({ content }) {
-  if (content.document) {
-    images(content.document);
-  }
+  const { hast } = content;
+  selectAll('img', hast).forEach((img) => {
+    img.properties.src = rewriteBlobLink(img.properties.src);
+  });
 }