@adobe/helix-html-pipeline 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/.eslintrc.cjs +33 -0
  2. package/.husky/pre-commit +4 -0
  3. package/.mocha-multi.json +6 -0
  4. package/.nycrc.json +10 -0
  5. package/.releaserc.cjs +16 -0
  6. package/CHANGELOG.md +6 -0
  7. package/CODE_OF_CONDUCT.md +74 -0
  8. package/CONTRIBUTING.md +74 -0
  9. package/LICENSE.txt +264 -0
  10. package/README.md +45 -0
  11. package/docs/API.md +12 -0
  12. package/package.json +101 -0
  13. package/src/PipelineContent.d.ts +69 -0
  14. package/src/PipelineContent.js +26 -0
  15. package/src/PipelineRequest.d.ts +26 -0
  16. package/src/PipelineRequest.js +36 -0
  17. package/src/PipelineResponse.d.ts +32 -0
  18. package/src/PipelineResponse.js +44 -0
  19. package/src/PipelineState.d.ts +72 -0
  20. package/src/PipelineState.js +42 -0
  21. package/src/PipelineStatusError.d.ts +14 -0
  22. package/src/PipelineStatusError.js +17 -0
  23. package/src/html-pipe.js +100 -0
  24. package/src/index.d.ts +98 -0
  25. package/src/index.js +18 -0
  26. package/src/json-pipe.js +87 -0
  27. package/src/steps/add-heading-ids.js +32 -0
  28. package/src/steps/create-page-blocks.js +78 -0
  29. package/src/steps/create-pictures.js +35 -0
  30. package/src/steps/extract-metadata.js +257 -0
  31. package/src/steps/fetch-config.js +42 -0
  32. package/src/steps/fetch-content.js +83 -0
  33. package/src/steps/fetch-metadata.js +53 -0
  34. package/src/steps/fix-sections.js +36 -0
  35. package/src/steps/folder-mapping.js +61 -0
  36. package/src/steps/get-metadata.js +170 -0
  37. package/src/steps/make-html.js +34 -0
  38. package/src/steps/parse-markdown.js +42 -0
  39. package/src/steps/removeHlxProps.js +34 -0
  40. package/src/steps/render-code.js +25 -0
  41. package/src/steps/render.js +158 -0
  42. package/src/steps/rewrite-blob-images.js +44 -0
  43. package/src/steps/rewrite-icons.js +93 -0
  44. package/src/steps/set-custom-response-headers.js +41 -0
  45. package/src/steps/set-x-surrogate-key-header.js +35 -0
  46. package/src/steps/split-sections.js +57 -0
  47. package/src/steps/stringify-response.js +39 -0
  48. package/src/steps/utils.js +107 -0
  49. package/src/utils/hast-util-to-dom.js +190 -0
  50. package/src/utils/heading-handler.js +42 -0
  51. package/src/utils/icon-handler.js +40 -0
  52. package/src/utils/json-filter.js +143 -0
  53. package/src/utils/last-modified.js +48 -0
  54. package/src/utils/link-handler.js +25 -0
  55. package/src/utils/mdast-to-vdom.js +323 -0
  56. package/src/utils/mdast-util-gfm-nolink.js +93 -0
  57. package/src/utils/path.js +103 -0
  58. package/src/utils/remark-gfm-nolink.js +128 -0
  59. package/src/utils/section-handler.js +69 -0
  60. package/src/utils/table-handler.js +27 -0
@@ -0,0 +1,87 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ import fetchMetadata from './steps/fetch-metadata.js';
13
+ import setCustomResponseHeaders from './steps/set-custom-response-headers.js';
14
+ import { PipelineResponse } from './PipelineResponse.js';
15
+ import jsonFilter from './utils/json-filter.js';
16
+
17
+ /**
18
+ * Runs the default pipeline and returns the response.
19
+ * @param {PipelineState} state
20
+ * @param {PipelineRequest} req
21
+ * @returns {PipelineResponse}
22
+ */
23
+ export async function jsonPipe(state, req) {
24
+ const { log } = state;
25
+ const {
26
+ owner, repo, ref, contentBusId, partition, s3Loader,
27
+ } = state;
28
+ const { path } = state.info;
29
+ const { searchParams } = req.url;
30
+ const params = Object.fromEntries(searchParams.entries());
31
+ if (params.sheet) {
32
+ params.sheet = searchParams.getAll('sheet');
33
+ }
34
+ const {
35
+ limit,
36
+ offset,
37
+ sheet,
38
+ } = params;
39
+
40
+ if (!path.endsWith('.json')) {
41
+ log.error('only json resources supported.');
42
+ return new PipelineResponse('', {
43
+ status: 400,
44
+ headers: {
45
+ 'x-error': 'only json resources supported.',
46
+ },
47
+ });
48
+ }
49
+
50
+ // fetch data from content bus
51
+ state.timer?.update('json-fetch');
52
+ let dataResponse = await s3Loader.getObject('helix-content-bus', `${contentBusId}/${partition}${path}`);
53
+
54
+ // if not found, fall back to code bus
55
+ if (dataResponse.status === 404) {
56
+ dataResponse = await s3Loader.getObject('helix-code-bus', `${owner}/${repo}/${ref}${path}`);
57
+ }
58
+
59
+ // if still not found, return status
60
+ if (dataResponse.status !== 200) {
61
+ return dataResponse;
62
+ }
63
+ const data = dataResponse.body;
64
+
65
+ // filter data
66
+ const response = jsonFilter(state, data, {
67
+ limit: limit ? Number.parseInt(limit, 10) : undefined,
68
+ offset: offset ? Number.parseInt(offset, 10) : undefined,
69
+ sheet,
70
+ raw: limit === undefined && offset === undefined && sheet === undefined,
71
+ });
72
+
73
+ // set last-modified (note, that it is not influenced by metadata or helix-config.json)
74
+ const lastModified = dataResponse.headers.get('last-modified');
75
+ if (lastModified) {
76
+ response.headers.set('last-modified', lastModified);
77
+ }
78
+
79
+ // set surrogate key
80
+ response.headers.set('x-surrogate-key', `${contentBusId}${path}`.replace(/\//g, '_'));
81
+
82
+ // Load metadata from metadata.json
83
+ await fetchMetadata(state, req, response);
84
+ await setCustomResponseHeaders(state, req, response);
85
+
86
+ return response;
87
+ }
@@ -0,0 +1,32 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ /**
14
+ * Adds missing `id` attributes to the headings
15
+ * @type PipelineStep
16
+ * @param {PipelineContent } content The current context of processing pipeline
17
+ */
18
+ export default async function fixSections({ content }) {
19
+ const { slugger, document } = content;
20
+ ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
21
+ .forEach((tagName) => {
22
+ document.querySelectorAll(tagName)
23
+ .forEach(($h) => {
24
+ if (!$h.id) {
25
+ const text = $h.textContent.trim();
26
+ if (text) {
27
+ $h.setAttribute('id', slugger.slug(text));
28
+ }
29
+ }
30
+ });
31
+ });
32
+ }
@@ -0,0 +1,78 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ import { toClassName } from './utils.js';
13
+
14
+ /**
15
+ * Creates a "DIV representation" of a table.
16
+ * @type PipelineStep
17
+ * @param {Document} document
18
+ * @param {HTMLTableElement} $table the table element
19
+ * @returns {HTMLDivElement} the resulting div
20
+ */
21
+ function tableToDivs(document, $table) {
22
+ const $cards = document.createElement('div');
23
+
24
+ // iterate over the table to avoid problem with query selector and nested tables
25
+ const $rows = [];
26
+ if ($table.tHead) {
27
+ $rows.push(...$table.tHead.rows);
28
+ }
29
+ for (const $tbody of $table.tBodies) {
30
+ $rows.push(...$tbody.rows);
31
+ }
32
+ if ($rows.length === 0) {
33
+ return $cards;
34
+ }
35
+ const $headerRow = $rows.shift();
36
+
37
+ // special case, only 1 row and 1 column with a nested table
38
+ if ($rows.length === 0 && $headerRow.cells.length === 1) {
39
+ const $nestedTable = $headerRow.cells[0].querySelector(':scope table');
40
+ if ($nestedTable) {
41
+ return $nestedTable;
42
+ }
43
+ }
44
+
45
+ // get columns names
46
+ const clazz = Array.from($headerRow.cells)
47
+ .map((e) => toClassName(e.textContent))
48
+ .filter((c) => !!c)
49
+ .join('-');
50
+ if (clazz) {
51
+ $cards.classList.add(clazz);
52
+ }
53
+
54
+ // construct page block
55
+ for (const $row of $rows) {
56
+ const $card = document.createElement('div');
57
+ for (const $cell of $row.cells) {
58
+ const $div = document.createElement('div');
59
+ $div.append(...$cell.childNodes);
60
+ $card.append($div);
61
+ }
62
+ $cards.append($card);
63
+ }
64
+ return $cards;
65
+ }
66
+
67
+ /**
68
+ * Converts tables into page blocks.
69
+ * see https://github.com/adobe/helix-pages/issues/638
70
+ * @param context The current context of processing pipeline
71
+ */
72
+ export default function createPageBlocks({ content }) {
73
+ const { document } = content;
74
+ document.querySelectorAll('body > div > table').forEach(($table) => {
75
+ const $div = tableToDivs(document, $table);
76
+ $table.parentNode.replaceChild($div, $table);
77
+ });
78
+ }
@@ -0,0 +1,35 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ import { optimizeImageURL } from './utils.js';
13
+
14
+ /**
15
+ * Converts imgs to pictures
16
+ * @type PipelineStep
17
+ * @param context The current context of processing pipeline
18
+ */
19
+ export default async function createPictures({ content }) {
20
+ const { document } = content;
21
+
22
+ // transform <img> to <picture>
23
+ document.querySelectorAll('img[src^="./media_"]').forEach((img, i) => {
24
+ const picture = document.createElement('picture');
25
+ const source = document.createElement('source');
26
+ const src = img.getAttribute('src');
27
+ source.setAttribute('media', '(max-width: 400px)');
28
+ source.setAttribute('srcset', optimizeImageURL(src, 750));
29
+ picture.appendChild(source);
30
+ img.setAttribute('loading', i > 0 ? 'lazy' : 'eager'); // load all but first image lazy
31
+ img.setAttribute('src', optimizeImageURL(src, 2000));
32
+ img.parentNode.insertBefore(picture, img);
33
+ picture.appendChild(img);
34
+ });
35
+ }
@@ -0,0 +1,257 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ import { resolve } from 'url';
13
+ import { getAbsoluteUrl, makeCanonicalHtmlUrl, optimizeImageURL } from './utils.js';
14
+
15
+ /**
16
+ * Converts all non-valid characters to `-`.
17
+ * @param {string} text input text
18
+ * @returns {string} the meta name
19
+ */
20
+ function toMetaName(text) {
21
+ return text
22
+ .toLowerCase()
23
+ .replace(/[^0-9a-z:_]/gi, '-');
24
+ }
25
+
26
+ /**
27
+ * Cleans up comma-separated string lists and returns an array.
28
+ * @param {string} list A comma-separated list
29
+ * @returns {string[]} The clean list
30
+ */
31
+ function toList(list) {
32
+ return list
33
+ .split(',')
34
+ .map((key) => key.trim())
35
+ .filter((key) => !!key);
36
+ }
37
+
38
+ /**
39
+ * Returns the config from a block element as object with key/value pairs.
40
+ * @param {HTMLDivElement} $block The block element
41
+ * @returns {object} The block config
42
+ */
43
+ function readBlockConfig($block) {
44
+ const config = {};
45
+ $block.querySelectorAll(':scope>div').forEach(($row) => {
46
+ if ($row.children && $row.children[1]) {
47
+ const name = toMetaName($row.children[0].textContent);
48
+ if (name) {
49
+ let value;
50
+ if ($row.children[1].hasChildNodes() && $row.children[1].firstElementChild) {
51
+ // check for multiple paragraph or a list
52
+ let childNodes;
53
+ const { tagName } = $row.children[1].firstElementChild;
54
+ if (tagName === 'P') {
55
+ // contains a list of <p> paragraphs
56
+ childNodes = $row.children[1].childNodes;
57
+ } else if (tagName === 'UL' || tagName === 'OL') {
58
+ // contains a list
59
+ childNodes = $row.children[1].children[0].childNodes;
60
+ }
61
+
62
+ if (childNodes) {
63
+ value = '';
64
+ childNodes.forEach((child) => {
65
+ value += `${child.textContent}, `;
66
+ });
67
+ value = value.substring(0, value.length - 2);
68
+ }
69
+ }
70
+
71
+ if (!value) {
72
+ // for text content only
73
+ value = $row.children[1].textContent.trim().replace(/ {3}/g, ',');
74
+ }
75
+
76
+ if (!value) {
77
+ // check for value inside link
78
+ const $a = $row.children[1].querySelector('a');
79
+ if ($a) {
80
+ value = $a.getAttribute('href');
81
+ }
82
+ }
83
+ if (!value) {
84
+ // check for value inside img
85
+ const $img = $row.children[1].querySelector('img');
86
+ if ($img) {
87
+ // strip query string
88
+ value = $img.getAttribute('src');
89
+ }
90
+ }
91
+ if (value) {
92
+ // only keep non-empty value
93
+ config[name] = value;
94
+ }
95
+ }
96
+ }
97
+ });
98
+ return config;
99
+ }
100
+
101
+ function applyMetaRule(target, obj) {
102
+ Object.keys(obj).forEach((key) => {
103
+ const metaKey = toMetaName(key);
104
+ if (metaKey !== 'url' && obj[key]) {
105
+ target[metaKey] = obj[key];
106
+ }
107
+ });
108
+ }
109
+
110
+ function globToRegExp(glob) {
111
+ const reString = glob
112
+ .replace(/\*\*/g, '_')
113
+ .replace(/\*/g, '[0-9a-z-.]*')
114
+ .replace(/_/g, '.*');
115
+ return new RegExp(`^${reString}$`);
116
+ }
117
+
118
+ export function filterGlobalMetadata(metaRules, path) {
119
+ const metaConfig = {};
120
+ metaRules.forEach((rule) => {
121
+ const glob = rule.url || rule.URL || rule.Url;
122
+ if (glob && typeof glob === 'string' && /[0-9a-z-/*]/.test(glob)) {
123
+ if (glob.indexOf('*') >= 0) {
124
+ if (globToRegExp(glob).test(path)) {
125
+ applyMetaRule(metaConfig, rule);
126
+ }
127
+ } else if (glob === path) {
128
+ applyMetaRule(metaConfig, rule);
129
+ }
130
+ }
131
+ });
132
+ return metaConfig;
133
+ }
134
+
135
+ /**
136
+ * Looks for metadata in the document.
137
+ * @param {HTMLDocument} document The document
138
+ * @return {object} The metadata
139
+ */
140
+ function getLocalMetadata(document) {
141
+ let metaConfig = {};
142
+ const metaBlock = document.querySelector('body div.metadata');
143
+ if (metaBlock) {
144
+ metaConfig = readBlockConfig(metaBlock);
145
+ metaBlock.remove();
146
+ }
147
+ return metaConfig;
148
+ }
149
+
150
+ /**
151
+ * Adds image optimization parameters suitable for meta images to a URL.
152
+ * @param {string} pagePath The path of the requested page
153
+ * @param {string} imgUrl The image URL
154
+ * @returns The optimized image URL
155
+ */
156
+ function optimizeMetaImage(pagePath, imgUrl) {
157
+ const src = resolve(pagePath, imgUrl);
158
+ if (src.startsWith('/')) {
159
+ return optimizeImageURL(src, 1200, 'pjpg');
160
+ }
161
+ return src;
162
+ }
163
+
164
+ /**
165
+ * Extracts the metadata and stores it in the content meta
166
+ * @type PipelineStep
167
+ * @param {PipelineState} state
168
+ * @param {PipelineRequest} req
169
+ */
170
+ export default function extractMetaData(state, req) {
171
+ const { content } = state;
172
+ const { meta, document } = content;
173
+
174
+ // extract global metadata from spreadsheet, and overlay
175
+ // with local metadata from document
176
+ const metaConfig = Object.assign(
177
+ filterGlobalMetadata(state.metadata, state.info.path),
178
+ getLocalMetadata(document),
179
+ );
180
+
181
+ // first process supported metadata properties
182
+ [
183
+ 'title',
184
+ 'description',
185
+ 'keywords',
186
+ 'tags',
187
+ 'image',
188
+ 'image-alt',
189
+ 'canonical',
190
+ 'feed',
191
+ ].forEach((name) => {
192
+ if (metaConfig[name]) {
193
+ meta[name] = metaConfig[name];
194
+ delete metaConfig[name];
195
+ }
196
+ });
197
+ if (Object.keys(metaConfig).length > 0) {
198
+ // add rest to meta.custom
199
+ meta.custom = Object.keys(metaConfig).map((name) => ({
200
+ name,
201
+ value: metaConfig[name],
202
+ property: name.includes(':'),
203
+ }));
204
+ }
205
+
206
+ if (meta.keywords) {
207
+ meta.keywords = toList(meta.keywords).join(', ');
208
+ }
209
+ if (meta.tags) {
210
+ meta.tags = toList(meta.tags);
211
+ }
212
+
213
+ // complete metadata with insights from content
214
+ if (!meta.title) {
215
+ // content.title is not correct if the h1 is in a page-block since the pipeline
216
+ // only respects the heading nodes in the mdast
217
+ const $title = document.querySelector('body > div h1');
218
+ if ($title) {
219
+ content.title = $title.textContent;
220
+ }
221
+ meta.title = content.title;
222
+ }
223
+ if (!meta.description) {
224
+ // description: text from paragraphs with 10 or more words
225
+ let desc = [];
226
+ document.querySelectorAll('div > p').forEach((p) => {
227
+ if (desc.length === 0) {
228
+ const words = p.textContent.trim().split(/\s+/);
229
+ if (words.length >= 10 || words.some((w) => w.length > 25 && !w.startsWith('http'))) {
230
+ desc = desc.concat(words);
231
+ }
232
+ }
233
+ });
234
+ meta.description = `${desc.slice(0, 25).join(' ')}${desc.length > 25 ? ' ...' : ''}`;
235
+ }
236
+ meta.url = makeCanonicalHtmlUrl(getAbsoluteUrl(req.headers, req.url.href));
237
+ if (!meta.canonical) {
238
+ meta.canonical = meta.url;
239
+ }
240
+
241
+ // content.image is not correct if the first image is in a page-block. since the pipeline
242
+ // only respects the image nodes in the mdast
243
+ const $hero = document.querySelector('body > div img');
244
+ if ($hero) {
245
+ content.image = $hero.src;
246
+ if ($hero.alt) {
247
+ content.imageAlt = $hero.alt;
248
+ }
249
+ }
250
+
251
+ meta.image = getAbsoluteUrl(
252
+ req.headers,
253
+ optimizeMetaImage(state.info.path, meta.image || content.image || '/default-meta-image.png'),
254
+ );
255
+
256
+ meta.imageAlt = meta['image-alt'] || content.imageAlt;
257
+ }
@@ -0,0 +1,42 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ import { extractLastModified, updateLastModified } from '../utils/last-modified.js';
13
+ import { PipelineStatusError } from '../PipelineStatusError.js';
14
+
15
+ /**
16
+ * Fetches the helix-config.json from the code-bus and stores it in `state.helixConfig`
17
+ * @type PipelineStep
18
+ * @param {PipelineState} state
19
+ * @param {PipelineRequest} req
20
+ * @param {PipelineResponse} res
21
+ * @returns {Promise<void>}
22
+ */
23
+ export default async function fetchConfig(state, req, res) {
24
+ const {
25
+ log, owner, repo, ref,
26
+ } = state;
27
+
28
+ const key = `${owner}/${repo}/${ref}/helix-config.json`;
29
+ const ret = await state.s3Loader.getObject('helix-code-bus', key);
30
+ if (ret.status !== 200) {
31
+ throw new PipelineStatusError(ret.status === 404 ? 404 : 502, `unable to load /helix-config.json: ${ret.status}`);
32
+ }
33
+ try {
34
+ state.helixConfig = JSON.parse(ret.body);
35
+ } catch (e) {
36
+ log.info('failed to parse helix-config.json', e);
37
+ throw new PipelineStatusError(400, `Failed parsing of /helix-config.json: ${e.message}`);
38
+ }
39
+
40
+ // also update last-modified
41
+ updateLastModified(state, res, extractLastModified(ret.headers));
42
+ }
@@ -0,0 +1,83 @@
1
+ /*
2
+ * Copyright 2022 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ import { extractLastModified, updateLastModified } from '../utils/last-modified.js';
13
+
14
+ /**
15
+ * Loads the content from either the content-bus or code-bus and stores it in `state.content`
16
+ * @type PipelineStep
17
+ * @param {PipelineState} state
18
+ * @param {PipelineRequest} req
19
+ * @param {PipelineResponse} res
20
+ * @returns {Promise<void>}
21
+ */
22
+ export default async function fetchContent(state, req, res) {
23
+ const {
24
+ log, contentBusId, info, partition, owner, repo, ref,
25
+ } = state;
26
+
27
+ const isCode = state.content.sourceBus === 'code';
28
+ const key = isCode
29
+ ? `${owner}/${repo}/${ref}/${info.resourcePath}`
30
+ : `${contentBusId}/${partition}${info.resourcePath}`;
31
+ const bucketId = isCode ? 'helix-code-bus' : 'helix-content-bus';
32
+
33
+ const ret = await state.s3Loader.getObject(bucketId, key);
34
+
35
+ // check for redirect
36
+ const redirectLocation = ret.headers.get('x-amz-meta-redirect-location');
37
+ if (redirectLocation) {
38
+ res.status = 301;
39
+ res.body = '';
40
+ res.headers.set('location', redirectLocation);
41
+ res.error = 'moved';
42
+ return;
43
+ }
44
+
45
+ if (ret.status === 200) {
46
+ state.content.data = ret.body;
47
+
48
+ // store extra source location if present
49
+ state.content.sourceLocation = ret.headers.get('x-amz-meta-x-source-location');
50
+ log.info(`source-location: ${state.content.sourceLocation}`);
51
+
52
+ updateLastModified(state, res, extractLastModified(ret.headers));
53
+
54
+ // reject requests to /index *after* checking for redirects
55
+ // (https://github.com/adobe/helix-pipeline-service/issues/290)
56
+ if (state.info.originalFilename === 'index') {
57
+ res.status = 404;
58
+ res.error = `request to ${info.path} not allowed (no-index).`;
59
+ }
60
+ } else {
61
+ // keep 404, but propagate others as 502
62
+ res.status = ret.status === 404 ? 404 : 502;
63
+ res.error = `failed to load ${info.path} from ${state.content.sourceBus}-bus: ${ret.status}`;
64
+ }
65
+
66
+ if (res.status === 404) {
67
+ // try to load 404.html from code-bus
68
+ const ret404 = await state.s3Loader.getObject('helix-code-bus', `${owner}/${repo}/${ref}/404.html`);
69
+ if (ret404.status === 200) {
70
+ // override last-modified if source-last-modified is set
71
+ const lastModified = extractLastModified(ret404.headers);
72
+ if (lastModified) {
73
+ ret404.headers.set('last-modified', lastModified);
74
+ }
75
+
76
+ // keep 404 response status
77
+ res.body = ret.body;
78
+ res.headers.set('last-modified', ret404.headers.get('last-modified'));
79
+ res.headers.set('content-type', 'text/html; charset=utf-8');
80
+ res.headers.set('x-surrogate-key', `${ref}--${repo}--${owner}_404`);
81
+ }
82
+ }
83
+ }
@@ -0,0 +1,53 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import { PipelineStatusError } from '../PipelineStatusError.js';
14
+ import { extractLastModified, updateLastModified } from '../utils/last-modified.js';
15
+
16
+ /**
17
+ * Loads the metadata.json from the content-bus and stores it in `state.metadata`
18
+ * @type PipelineStep
19
+ * @param {PipelineState} state
20
+ * @param {PipelineRequest} req
21
+ * @param {PipelineResponse} res
22
+ * @returns {Promise<void>}
23
+ */
24
+ export default async function fetchMetadata(state, req, res) {
25
+ const { contentBusId, partition } = state;
26
+ const key = `${contentBusId}/${partition}/metadata.json`;
27
+ const ret = await state.s3Loader.getObject('helix-content-bus', key);
28
+ if (ret.status === 200) {
29
+ let json;
30
+ try {
31
+ json = JSON.parse(ret.body);
32
+ } catch (e) {
33
+ throw new PipelineStatusError(400, `failed parsing of /metadata.json: ${e.message}`);
34
+ }
35
+
36
+ const { data } = json.default ?? json;
37
+ if (!Array.isArray(data)) {
38
+ throw new PipelineStatusError(400, 'failed loading of /metadata.json: data must be an array');
39
+ }
40
+ state.metadata = data;
41
+
42
+ // also update last-modified
43
+ updateLastModified(state, res, extractLastModified(ret.headers));
44
+ return;
45
+ }
46
+
47
+ if (ret.status !== 404) {
48
+ throw new PipelineStatusError(502, `failed to load /metadata.json: ${ret.status}`);
49
+ }
50
+
51
+ // ignore 404
52
+ state.metadata = [];
53
+ }