@adobe/helix-html-pipeline 1.1.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,10 +9,15 @@
9
9
  * OF ANY KIND, either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
+ import { selectAll, select } from 'hast-util-select';
13
+ import { toString } from 'hast-util-to-string';
14
+ import { remove } from 'unist-util-remove';
15
+ import { visit, EXIT, CONTINUE } from 'unist-util-visit';
12
16
  import {
13
17
  getAbsoluteUrl, makeCanonicalHtmlUrl, optimizeImageURL, resolveUrl,
14
18
  } from './utils.js';
15
19
  import { filterGlobalMetadata, toMetaName, ALLOWED_RESPONSE_HEADERS } from '../utils/metadata.js';
20
+ import { childNodes } from '../utils/hast-utils.js';
16
21
 
17
22
  /**
18
23
  * Cleans up comma-separated string lists and returns an array.
@@ -28,55 +33,53 @@ function toList(list) {
28
33
 
29
34
  /**
30
35
  * Returns the config from a block element as object with key/value pairs.
31
- * @param {HTMLDivElement} $block The block element
36
+ * @param {Element} $block The block element
32
37
  * @returns {object} The block config
33
38
  */
34
39
  function readBlockConfig($block) {
35
40
  const config = {};
36
- $block.querySelectorAll(':scope>div').forEach(($row) => {
37
- if ($row.children && $row.children[1]) {
38
- const name = toMetaName($row.children[0].textContent);
41
+ selectAll(':scope>div', $block).forEach(($row) => {
42
+ if ($row?.children[1]) {
43
+ const [$name, $value] = $row.children;
44
+ const name = toMetaName(toString($name));
39
45
  if (name) {
40
46
  let value;
41
- if ($row.children[1].hasChildNodes() && $row.children[1].firstElementChild) {
47
+ const $firstChild = childNodes($value)[0];
48
+ if ($firstChild) {
42
49
  // check for multiple paragraph or a list
43
- let childNodes;
44
- const { tagName } = $row.children[1].firstElementChild;
45
- if (tagName === 'P') {
50
+ let list;
51
+ const { tagName } = $firstChild;
52
+ if (tagName === 'p') {
46
53
  // contains a list of <p> paragraphs
47
- childNodes = $row.children[1].childNodes;
48
- } else if (tagName === 'UL' || tagName === 'OL') {
54
+ list = childNodes($value);
55
+ } else if (tagName === 'ul' || tagName === 'ol') {
49
56
  // contains a list
50
- childNodes = $row.children[1].children[0].childNodes;
57
+ list = childNodes($firstChild);
51
58
  }
52
59
 
53
- if (childNodes) {
54
- value = '';
55
- childNodes.forEach((child) => {
56
- value += `${child.textContent}, `;
57
- });
58
- value = value.substring(0, value.length - 2);
60
+ if (list) {
61
+ value = list.map((child) => toString(child)).join(', ');
59
62
  }
60
63
  }
61
64
 
62
65
  if (!value) {
63
66
  // for text content only
64
- value = $row.children[1].textContent.trim().replace(/ {3}/g, ',');
67
+ value = toString($value).trim().replace(/ {3}/g, ',');
65
68
  }
66
69
 
67
70
  if (!value) {
68
71
  // check for value inside link
69
- const $a = $row.children[1].querySelector('a');
72
+ const $a = select('a', $value);
70
73
  if ($a) {
71
- value = $a.getAttribute('href');
74
+ value = $a.properties.href;
72
75
  }
73
76
  }
74
77
  if (!value) {
75
78
  // check for value inside img
76
- const $img = $row.children[1].querySelector('img');
79
+ const $img = select('img', $value);
77
80
  if ($img) {
78
81
  // strip query string
79
- value = $img.getAttribute('src');
82
+ value = $img.properties.src;
80
83
  }
81
84
  }
82
85
  if (value) {
@@ -91,15 +94,17 @@ function readBlockConfig($block) {
91
94
 
92
95
  /**
93
96
  * Looks for metadata in the document.
94
- * @param {HTMLDocument} document The document
97
+ * @param {Root} document The hast document
95
98
  * @return {object} The metadata
96
99
  */
97
100
  function getLocalMetadata(document) {
98
101
  let metaConfig = {};
99
- const metaBlock = document.querySelector('body div.metadata');
102
+ const metaBlock = select('div.metadata', document);
100
103
  if (metaBlock) {
101
104
  metaConfig = readBlockConfig(metaBlock);
102
- metaBlock.remove();
105
+ // TODO: here we should also remove the parent div of the former table, otherwise it results
106
+ // TODO: in an empty <div></div>
107
+ remove(document, { cascade: false }, metaBlock);
103
108
  }
104
109
  return metaConfig;
105
110
  }
@@ -118,6 +123,27 @@ function optimizeMetaImage(pagePath, imgUrl) {
118
123
  return src;
119
124
  }
120
125
 
126
+ /**
127
+ * Extracts the description from the document. note, that the selectAll('div > p') used in
128
+ * jsdom doesn't work as expected in hast
129
+ * @param {Root} hast
130
+ * @see https://github.com/syntax-tree/unist/discussions/66
131
+ */
132
+ function extractDescription(hast) {
133
+ let desc = '';
134
+ visit(hast, (node, idx, parent) => {
135
+ if (parent?.tagName === 'div' && node.tagName === 'p') {
136
+ const words = toString(node).trim().split(/\s+/);
137
+ if (words.length >= 10 || words.some((w) => w.length > 25 && !w.startsWith('http'))) {
138
+ desc = `${words.slice(0, 25).join(' ')}${words.length > 25 ? ' ...' : ''}`;
139
+ return EXIT;
140
+ }
141
+ }
142
+ return CONTINUE;
143
+ });
144
+ return desc;
145
+ }
146
+
121
147
  /**
122
148
  * Extracts the metadata and stores it in the content meta
123
149
  * @type PipelineStep
@@ -126,13 +152,13 @@ function optimizeMetaImage(pagePath, imgUrl) {
126
152
  */
127
153
  export default function extractMetaData(state, req) {
128
154
  const { content } = state;
129
- const { meta, document } = content;
155
+ const { meta, hast } = content;
130
156
 
131
157
  // extract global metadata from spreadsheet, and overlay
132
158
  // with local metadata from document
133
159
  const metaConfig = Object.assign(
134
160
  filterGlobalMetadata(state.metadata, state.info.path),
135
- getLocalMetadata(document),
161
+ getLocalMetadata(hast),
136
162
  );
137
163
 
138
164
  // first process supported metadata properties
@@ -173,25 +199,16 @@ export default function extractMetaData(state, req) {
173
199
  if (!meta.title) {
174
200
  // content.title is not correct if the h1 is in a page-block since the pipeline
175
201
  // only respects the heading nodes in the mdast
176
- const $title = document.querySelector('body > div h1');
202
+ const $title = select('div h1', hast);
177
203
  if ($title) {
178
- content.title = $title.textContent;
204
+ content.title = toString($title);
179
205
  }
180
206
  meta.title = content.title;
181
207
  }
182
208
  if (!meta.description) {
183
- // description: text from paragraphs with 10 or more words
184
- let desc = [];
185
- document.querySelectorAll('div > p').forEach((p) => {
186
- if (desc.length === 0) {
187
- const words = p.textContent.trim().split(/\s+/);
188
- if (words.length >= 10 || words.some((w) => w.length > 25 && !w.startsWith('http'))) {
189
- desc = desc.concat(words);
190
- }
191
- }
192
- });
193
- meta.description = `${desc.slice(0, 25).join(' ')}${desc.length > 25 ? ' ...' : ''}`;
209
+ meta.description = extractDescription(hast);
194
210
  }
211
+
195
212
  // use the req.url and not the state.info.path in case of folder mapping
196
213
  meta.url = makeCanonicalHtmlUrl(getAbsoluteUrl(req.headers, req.url.pathname));
197
214
  if (!meta.canonical) {
@@ -200,11 +217,11 @@ export default function extractMetaData(state, req) {
200
217
 
201
218
  // content.image is not correct if the first image is in a page-block. since the pipeline
202
219
  // only respects the image nodes in the mdast
203
- const $hero = document.querySelector('body > div img');
220
+ const $hero = select('div img', hast);
204
221
  if ($hero) {
205
- content.image = $hero.src;
206
- if ($hero.alt) {
207
- content.imageAlt = $hero.alt;
222
+ content.image = $hero.properties.src;
223
+ if ($hero.properties.alt) {
224
+ content.imageAlt = $hero.properties.alt;
208
225
  }
209
226
  }
210
227
 
@@ -9,6 +9,8 @@
9
9
  * OF ANY KIND, either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
+ import { selectAll } from 'hast-util-select';
13
+ import { h } from 'hastscript';
12
14
  import { wrapContent } from './utils.js';
13
15
 
14
16
  /**
@@ -17,20 +19,17 @@ import { wrapContent } from './utils.js';
17
19
  * @param {PipelineContent} content
18
20
  */
19
21
  export default async function fixSections({ content }) {
20
- const { document } = content;
21
- const $sections = document.querySelectorAll('body > div');
22
+ const { hast } = content;
23
+ const $sections = selectAll('div', hast);
22
24
 
23
25
  // if there are no sections wrap everything in a div with appropriate class names from meta
24
26
  if ($sections.length === 0) {
25
- const $outerDiv = document.createElement('div');
27
+ const $outerDiv = h('div');
26
28
  if (content.meta && content.meta.class) {
27
- content.meta.class.split(/[ ,]/)
29
+ $outerDiv.properties.className = content.meta.class.split(/[ ,]/)
28
30
  .map((c) => c.trim())
29
- .filter((c) => !!c)
30
- .forEach((c) => {
31
- $outerDiv.classList.add(c);
32
- });
31
+ .filter((c) => !!c);
33
32
  }
34
- wrapContent($outerDiv, document.body);
33
+ wrapContent($outerDiv, hast);
35
34
  }
36
35
  }
@@ -11,6 +11,7 @@
11
11
  */
12
12
  import { select, selectAll } from 'unist-util-select';
13
13
  import { toString as plain } from 'mdast-util-to-string';
14
+ import { rewriteBlobLink } from './utils.js';
14
15
 
15
16
  function yaml(section) {
16
17
  section.meta = selectAll('yaml', section)
@@ -39,13 +40,13 @@ function image(section) {
39
40
  // TODO: get a better measure of prominence than "first"
40
41
  const img = select('image', section);
41
42
  if (img) {
42
- section.image = img.url;
43
+ section.image = rewriteBlobLink(img.url);
43
44
  }
44
45
  }
45
46
 
46
47
  /**
47
- * Construct the strings corresponding to the number of occurences per type.
48
- * @param {Object} typecounter Type as a key, number of occurences as value
48
+ * Construct the strings corresponding to the number of occurrences per type.
49
+ * @param {Object} typecounter Type as a key, number of occurrences as value
49
50
  */
50
51
  function constructTypes(typecounter) {
51
52
  const types = Object.keys(typecounter).map((type) => `has-${type}`); // has-{type}
@@ -71,7 +72,7 @@ function constructTypes(typecounter) {
71
72
  * 1. has-<type> for every type of content found in the section
72
73
  * 2. is-<type>-only for sections that have only content of type
73
74
  * 3. is-<type1>-<type2>-<type3> ranks the top three most common types of content
74
- * 4. nb-<type>-<nb_occurences> is the number of occurences per type
75
+ * 4. nb-<type>-<nb_occurrences> is the number of occurrences per type
75
76
  * @param {*} section
76
77
  */
77
78
  function sectiontype(section) {
@@ -9,9 +9,7 @@
9
9
  * OF ANY KIND, either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
-
13
- import GithubSlugger from 'github-slugger';
14
- import VDOMTransformer from '../utils/mdast-to-vdom.js';
12
+ import mdast2hast from '../utils/mdast-to-hast.js';
15
13
 
16
14
  /**
17
15
  * Converts the markdown to a jsdom dom and stores it in `content.document`
@@ -19,16 +17,7 @@ import VDOMTransformer from '../utils/mdast-to-vdom.js';
19
17
  * @param {PipelineState} state
20
18
  */
21
19
  export default function html(state) {
22
- const { log, content } = state;
20
+ const { content } = state;
23
21
  const { mdast } = content;
24
- log.debug(`Turning Markdown into HTML from ${typeof mdast}`);
25
- // initialize transformer
26
- content.slugger = new GithubSlugger();
27
- const transformer = new VDOMTransformer()
28
- .withOptions({
29
- slugger: content.slugger,
30
- });
31
- content.document = transformer
32
- .withMdast(mdast)
33
- .getDocument();
22
+ content.hast = mdast2hast(mdast, content.slugger);
34
23
  }
@@ -9,6 +9,8 @@
9
9
  * OF ANY KIND, either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
+ import { selectAll } from 'hast-util-select';
13
+
12
14
  /**
13
15
  * Cleans the response document by removing `hlx-` stuff
14
16
  * @param {PipelineState} state
@@ -17,18 +19,15 @@
17
19
  */
18
20
  export default function clean(state, req, res) {
19
21
  const { document } = res;
20
- document.querySelectorAll('[class]').forEach((el) => {
21
- // Remove all `hlx-*` classes on the elements
22
- el.classList.value.split(' ')
23
- .filter((cls) => cls.indexOf('hlx-') === 0)
24
- .forEach((cls) => el.classList.remove(cls));
25
- if (!el.classList.length) {
26
- el.removeAttribute('class');
22
+ selectAll('[class]', document).forEach(({ properties }) => {
23
+ properties.className = properties.className.filter((name) => !name.startsWith('hlx-'));
24
+ if (properties.className.length === 0) {
25
+ delete properties.className;
27
26
  }
28
27
 
29
28
  // Remove all `data-hlx-*` attributes on these elements
30
- Object.keys(el.dataset)
31
- .filter((key) => key.match(/^hlx[A-Z]/))
32
- .forEach((key) => delete el.dataset[key]);
29
+ Object.keys(properties)
30
+ .filter((key) => key.match(/^dataHlx[A-Z].*/))
31
+ .forEach((key) => delete properties[key]);
33
32
  });
34
33
  }
@@ -11,70 +11,27 @@
11
11
  */
12
12
 
13
13
  /* eslint-disable max-len */
14
-
15
- import { JSDOM } from 'jsdom';
16
-
17
- /*
18
- <!DOCTYPE html>
19
- <html data-sly-attribute="${content.document.documentElement.attributesMap}">
20
- <head>
21
- <title>${content.meta.title}</title>
22
- <link data-sly-test="${content.meta.url}" rel="canonical" href="${content.meta.url}"/>
23
- <meta data-sly-test="${content.meta.description}" name="description" content="${content.meta.description}"/>
24
- <meta data-sly-test="${content.meta.keywords}" name="keywords" content="${content.meta.keywords}"/>
25
- <meta data-sly-test="${content.meta.title}" property="og:title" content="${content.meta.title}"/>
26
- <meta data-sly-test="${content.meta.description}" property="og:description" content="${content.meta.description}"/>
27
- <meta data-sly-test="${content.meta.url}" property="og:url" content="${content.meta.url}"/>
28
- <meta data-sly-test="${content.meta.image}" property="og:image" content="${content.meta.image}"/>
29
- <meta data-sly-test="${content.meta.image}" property="og:image:secure_url" content="${content.meta.image}"/>
30
- <sly data-sly-test="${content.meta.imageAlt}">
31
- <meta data-sly-test="${content.meta.imageAlt}" property="og:image:alt" content="${content.meta.imageAlt}"/>
32
- </sly>
33
- <meta data-sly-test="${content.meta.modifiedTime}" property="og:updated_time" content="${content.meta.modified_time}"/>
34
- <sly data-sly-test="${content.meta.tags}" data-sly-list.tag="${content.meta.tags}">
35
- <meta property="article:tag" content="${tag}"/>
36
- </sly>
37
- <meta data-sly-test="${content.meta.section}" property="article:section" content="${section}"/>
38
- <meta data-sly-test="${content.meta.published_time}" property="article:published_time" content="${content.meta.published_time}"/>
39
- <meta data-sly-test="${content.meta.modified_time}" property="article:modified_time" content="${content.meta.modified_time}"/>
40
- <meta data-sly-test="${content.meta.title}" name="twitter:title" content="${content.meta.title}"/>
41
- <meta data-sly-test="${content.meta.description}" name="twitter:description" content="${content.meta.description}"/>
42
- <meta data-sly-test="${content.meta.image}" name="twitter:image" content="${content.meta.image}"/>
43
- <sly data-sly-test="${content.meta.custom}" data-sly-list="${content.meta.custom}">
44
- <meta data-sly-test="${item.property}" property="${item.name}" content="${item.value}">
45
- <meta data-sly-test="${!item.property}" name="${item.name}" content="${item.value}">
46
- </sly>
47
- <esi:include src="/head.html" onerror="continue"/>
48
- </head>
49
- <body data-sly-attribute="${content.document.body.attributesMap}">
50
- <!-- header -->
51
- <header><esi:include src="/header.plain.html" onerror="continue"/></header>
52
- <!-- main content -->
53
- <main>${content.document.body}</main>
54
- <!-- footer -->
55
- <footer><esi:include src="/footer.plain.html" onerror="continue"/></footer>
56
- </body>
57
- </html>
58
- */
14
+ import { h } from 'hastscript';
15
+ import { unified } from 'unified';
16
+ import rehypeParse from 'rehype-parse';
59
17
 
60
18
  function appendElement($parent, $el) {
61
19
  if ($el) {
62
- $parent.append($el);
20
+ $parent.children.push($el);
63
21
  }
64
22
  }
65
23
 
66
- function createElement(doc, name, ...attrs) {
24
+ function createElement(name, ...attrs) {
67
25
  // check for empty values
26
+ const properties = {};
68
27
  for (let i = 0; i < attrs.length; i += 2) {
69
- if (!attrs[i + 1]) {
28
+ const value = attrs[i + 1];
29
+ if (!value) {
70
30
  return null;
71
31
  }
32
+ properties[attrs[i]] = value;
72
33
  }
73
- const $el = doc.createElement(name);
74
- for (let i = 0; i < attrs.length; i += 2) {
75
- $el.setAttribute(attrs[i], attrs[i + 1]);
76
- }
77
- return $el;
34
+ return h(name, properties);
78
35
  }
79
36
 
80
37
  /**
@@ -86,73 +43,68 @@ function createElement(doc, name, ...attrs) {
86
43
  */
87
44
  export default async function render(state, req, res) {
88
45
  const { content } = state;
89
- const srcDoc = content.document;
46
+ const { hast, meta } = content;
47
+
90
48
  if (state.info.selector === 'plain') {
91
49
  // just return body
92
- res.document = srcDoc.body;
93
- } else {
94
- // create document like HTL used to do
95
- const dom = new JSDOM('<!DOCTYPE html>'
96
- + '<html>'
97
- + '<head></head>'
98
- + '<body>'
99
- + '<header></header>' // todo: are those still required ?
100
- + '<main></main>'
101
- + '<footer></footer>' // todo: are those still required ?
102
- + '</body>'
103
- + '</html>');
104
- const doc = dom.window.document;
105
-
106
- // add title
107
- const $head = doc.head;
108
- const { meta } = content;
109
- const $title = doc.createElement('title');
110
- $title.innerHTML = meta.title;
111
- $head.append($title);
112
-
113
- // add meta
114
- appendElement($head, createElement(doc, 'link', 'rel', 'canonical', 'href', content.meta.canonical));
115
-
116
- appendElement($head, createElement(doc, 'meta', 'name', 'description', 'content', content.meta.description));
117
- appendElement($head, createElement(doc, 'meta', 'name', 'keywords', 'content', content.meta.keywords));
118
- appendElement($head, createElement(doc, 'meta', 'property', 'og:title', 'content', content.meta.title));
119
- appendElement($head, createElement(doc, 'meta', 'property', 'og:description', 'content', content.meta.description));
120
- appendElement($head, createElement(doc, 'meta', 'property', 'og:url', 'content', content.meta.url));
121
- appendElement($head, createElement(doc, 'meta', 'property', 'og:image', 'content', content.meta.image));
122
- appendElement($head, createElement(doc, 'meta', 'property', 'og:image:secure_url', 'content', content.meta.image));
123
- if (content.meta.imageAlt) {
124
- appendElement($head, createElement(doc, 'meta', 'property', 'og:image:alt', 'content', content.meta.imageAlt));
125
- }
126
- appendElement($head, createElement(doc, 'meta', 'property', 'og:updated_time', 'content', content.meta.modified_time));
127
- for (const tag of (meta.tags || [])) {
128
- appendElement($head, createElement(doc, 'meta', 'property', 'article:tag', 'content', tag));
129
- }
130
- appendElement($head, createElement(doc, 'meta', 'property', 'article:section', 'content', content.meta.section));
131
- appendElement($head, createElement(doc, 'meta', 'property', 'article:published_time', 'content', content.meta.published_time));
132
- appendElement($head, createElement(doc, 'meta', 'property', 'article:modified_time', 'content', content.meta.modified_time));
133
-
134
- appendElement($head, createElement(doc, 'meta', 'name', 'twitter:title', 'content', content.meta.title));
135
- appendElement($head, createElement(doc, 'meta', 'name', 'twitter:description', 'content', content.meta.description));
136
- appendElement($head, createElement(doc, 'meta', 'name', 'twitter:image', 'content', content.meta.image));
50
+ res.document = hast;
51
+ return;
52
+ }
53
+ const $head = h('head', [
54
+ h('title', meta.title),
55
+ ]);
137
56
 
138
- for (const custom of (meta.custom || [])) {
139
- appendElement($head, createElement(doc, 'meta', custom.property ? 'property' : 'name', custom.name, 'content', custom.value));
140
- }
141
- if (meta.feed) {
142
- appendElement($head, createElement(doc, 'link', 'rel', 'alternate', 'type', 'application/xml+atom', 'href', meta.feed, 'title', `${meta.title} feed`));
143
- }
144
- // inject head.html
145
- const $headHtml = doc.createElement('template');
146
- $headHtml.innerHTML = state.helixConfig?.head?.html ?? `
147
- <meta name="viewport" content="width=device-width, initial-scale=1"/>
148
- <script src="/scripts.js" type="module" crossorigin="use-credentials"></script>
149
- <link rel="stylesheet" href="/styles.css"/>`;
150
- $head.appendChild($headHtml.content);
57
+ // add meta
58
+ appendElement($head, createElement('link', 'rel', 'canonical', 'href', content.meta.canonical));
59
+ appendElement($head, createElement('meta', 'name', 'description', 'content', content.meta.description));
60
+ appendElement($head, createElement('meta', 'name', 'keywords', 'content', content.meta.keywords));
61
+ appendElement($head, createElement('meta', 'property', 'og:title', 'content', content.meta.title));
62
+ appendElement($head, createElement('meta', 'property', 'og:description', 'content', content.meta.description));
63
+ appendElement($head, createElement('meta', 'property', 'og:url', 'content', content.meta.url));
64
+ appendElement($head, createElement('meta', 'property', 'og:image', 'content', content.meta.image));
65
+ appendElement($head, createElement('meta', 'property', 'og:image:secure_url', 'content', content.meta.image));
66
+ appendElement($head, createElement('meta', 'property', 'og:image:alt', 'content', content.meta.imageAlt));
67
+ appendElement($head, createElement('meta', 'property', 'og:updated_time', 'content', content.meta.modified_time));
68
+ for (const tag of (meta.tags || [])) {
69
+ appendElement($head, createElement('meta', 'property', 'article:tag', 'content', tag));
70
+ }
71
+ appendElement($head, createElement('meta', 'property', 'article:section', 'content', content.meta.section));
72
+ appendElement($head, createElement('meta', 'property', 'article:published_time', 'content', content.meta.published_time));
73
+ appendElement($head, createElement('meta', 'property', 'article:modified_time', 'content', content.meta.modified_time));
74
+ appendElement($head, createElement('meta', 'name', 'twitter:title', 'content', content.meta.title));
75
+ appendElement($head, createElement('meta', 'name', 'twitter:description', 'content', content.meta.description));
76
+ appendElement($head, createElement('meta', 'name', 'twitter:image', 'content', content.meta.image));
151
77
 
152
- // add body to main
153
- const $main = doc.querySelector('main');
78
+ for (const custom of (meta.custom || [])) {
79
+ appendElement($head, createElement('meta', custom.property ? 'property' : 'name', custom.name, 'content', custom.value));
80
+ }
81
+ appendElement($head, createElement('link', 'rel', 'alternate', 'type', 'application/xml+atom', 'href', meta.feed, 'title', `${meta.title} feed`));
154
82
 
155
- $main.append(...srcDoc.body.childNodes);
156
- res.document = doc;
83
+ // inject head.html
84
+ const headHtml = state.helixConfig?.head?.html;
85
+ if (headHtml) {
86
+ const $headHtml = await unified()
87
+ .use(rehypeParse, { fragment: true })
88
+ .parse(headHtml);
89
+ $head.children.push(...$headHtml.children);
90
+ } else {
91
+ appendElement($head, createElement('meta', 'name', 'viewport', 'content', 'width=device-width, initial-scale=1'));
92
+ appendElement($head, createElement('script', 'src', '/scripts.js', 'type', 'module', 'crossorigin', 'use-credentials'));
93
+ appendElement($head, createElement('link', 'rel', 'stylesheet', 'href', '/styles.css'));
157
94
  }
95
+
96
+ res.document = {
97
+ type: 'root',
98
+ children: [
99
+ { type: 'doctype' },
100
+ h('html', [
101
+ $head,
102
+ h('body', [
103
+ h('header', []), // todo: are those still required ?
104
+ h('main', hast),
105
+ h('footer', []), // todo: are those still required ?
106
+ ]),
107
+ ]),
108
+ ],
109
+ };
158
110
  }
@@ -9,36 +9,18 @@
9
9
  * OF ANY KIND, either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
-
13
- const AZURE_BLOB_REGEXP = /^https:\/\/hlx\.blob\.core\.windows\.net\/external\//;
14
-
15
- const MEDIA_BLOB_REGEXP = /^https:\/\/.*\.hlx3?\.(live|page)\/media_.*/;
12
+ import { selectAll } from 'hast-util-select';
13
+ import { rewriteBlobLink } from './utils.js';
16
14
 
17
15
  /**
18
16
  * Rewrite blob store image URLs to /hlx_* URLs
19
17
  *
20
- * @param {Document} document The (vdom) document
21
- */
22
- function images(document) {
23
- document.querySelectorAll('img').forEach((img) => {
24
- if (AZURE_BLOB_REGEXP.test(img.src)) {
25
- const { pathname, hash } = new URL(img.src);
26
- const filename = pathname.split('/').pop();
27
- const extension = hash.split('?').shift().split('.').pop() || 'jpg';
28
- img.src = `./media_${filename}.${extension}`;
29
- } else if (MEDIA_BLOB_REGEXP.test(img.src)) {
30
- const { pathname } = new URL(img.src);
31
- img.src = `.${pathname}`; // don't append fragment until picture tag supports width/height
32
- }
33
- });
34
- }
35
-
36
- /**
37
18
  * @type PipelineStep
38
19
  * @param content
39
20
  */
40
21
  export default function rewrite({ content }) {
41
- if (content.document) {
42
- images(content.document);
43
- }
22
+ const { hast } = content;
23
+ selectAll('img', hast).forEach((img) => {
24
+ img.properties.src = rewriteBlobLink(img.properties.src);
25
+ });
44
26
  }