@adobe/helix-html-pipeline 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.cjs +33 -0
- package/.husky/pre-commit +4 -0
- package/.mocha-multi.json +6 -0
- package/.nycrc.json +10 -0
- package/.releaserc.cjs +16 -0
- package/CHANGELOG.md +6 -0
- package/CODE_OF_CONDUCT.md +74 -0
- package/CONTRIBUTING.md +74 -0
- package/LICENSE.txt +264 -0
- package/README.md +45 -0
- package/docs/API.md +12 -0
- package/package.json +101 -0
- package/src/PipelineContent.d.ts +69 -0
- package/src/PipelineContent.js +26 -0
- package/src/PipelineRequest.d.ts +26 -0
- package/src/PipelineRequest.js +36 -0
- package/src/PipelineResponse.d.ts +32 -0
- package/src/PipelineResponse.js +44 -0
- package/src/PipelineState.d.ts +72 -0
- package/src/PipelineState.js +42 -0
- package/src/PipelineStatusError.d.ts +14 -0
- package/src/PipelineStatusError.js +17 -0
- package/src/html-pipe.js +100 -0
- package/src/index.d.ts +98 -0
- package/src/index.js +18 -0
- package/src/json-pipe.js +87 -0
- package/src/steps/add-heading-ids.js +32 -0
- package/src/steps/create-page-blocks.js +78 -0
- package/src/steps/create-pictures.js +35 -0
- package/src/steps/extract-metadata.js +257 -0
- package/src/steps/fetch-config.js +42 -0
- package/src/steps/fetch-content.js +83 -0
- package/src/steps/fetch-metadata.js +53 -0
- package/src/steps/fix-sections.js +36 -0
- package/src/steps/folder-mapping.js +61 -0
- package/src/steps/get-metadata.js +170 -0
- package/src/steps/make-html.js +34 -0
- package/src/steps/parse-markdown.js +42 -0
- package/src/steps/removeHlxProps.js +34 -0
- package/src/steps/render-code.js +25 -0
- package/src/steps/render.js +158 -0
- package/src/steps/rewrite-blob-images.js +44 -0
- package/src/steps/rewrite-icons.js +93 -0
- package/src/steps/set-custom-response-headers.js +41 -0
- package/src/steps/set-x-surrogate-key-header.js +35 -0
- package/src/steps/split-sections.js +57 -0
- package/src/steps/stringify-response.js +39 -0
- package/src/steps/utils.js +107 -0
- package/src/utils/hast-util-to-dom.js +190 -0
- package/src/utils/heading-handler.js +42 -0
- package/src/utils/icon-handler.js +40 -0
- package/src/utils/json-filter.js +143 -0
- package/src/utils/last-modified.js +48 -0
- package/src/utils/link-handler.js +25 -0
- package/src/utils/mdast-to-vdom.js +323 -0
- package/src/utils/mdast-util-gfm-nolink.js +93 -0
- package/src/utils/path.js +103 -0
- package/src/utils/remark-gfm-nolink.js +128 -0
- package/src/utils/section-handler.js +69 -0
- package/src/utils/table-handler.js +27 -0
package/src/json-pipe.js
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
import fetchMetadata from './steps/fetch-metadata.js';
|
|
13
|
+
import setCustomResponseHeaders from './steps/set-custom-response-headers.js';
|
|
14
|
+
import { PipelineResponse } from './PipelineResponse.js';
|
|
15
|
+
import jsonFilter from './utils/json-filter.js';
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Runs the default pipeline and returns the response.
|
|
19
|
+
* @param {PipelineState} state
|
|
20
|
+
* @param {PipelineRequest} req
|
|
21
|
+
* @returns {PipelineResponse}
|
|
22
|
+
*/
|
|
23
|
+
export async function jsonPipe(state, req) {
|
|
24
|
+
const { log } = state;
|
|
25
|
+
const {
|
|
26
|
+
owner, repo, ref, contentBusId, partition, s3Loader,
|
|
27
|
+
} = state;
|
|
28
|
+
const { path } = state.info;
|
|
29
|
+
const { searchParams } = req.url;
|
|
30
|
+
const params = Object.fromEntries(searchParams.entries());
|
|
31
|
+
if (params.sheet) {
|
|
32
|
+
params.sheet = searchParams.getAll('sheet');
|
|
33
|
+
}
|
|
34
|
+
const {
|
|
35
|
+
limit,
|
|
36
|
+
offset,
|
|
37
|
+
sheet,
|
|
38
|
+
} = params;
|
|
39
|
+
|
|
40
|
+
if (!path.endsWith('.json')) {
|
|
41
|
+
log.error('only json resources supported.');
|
|
42
|
+
return new PipelineResponse('', {
|
|
43
|
+
status: 400,
|
|
44
|
+
headers: {
|
|
45
|
+
'x-error': 'only json resources supported.',
|
|
46
|
+
},
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// fetch data from content bus
|
|
51
|
+
state.timer?.update('json-fetch');
|
|
52
|
+
let dataResponse = await s3Loader.getObject('helix-content-bus', `${contentBusId}/${partition}${path}`);
|
|
53
|
+
|
|
54
|
+
// if not found, fall back to code bus
|
|
55
|
+
if (dataResponse.status === 404) {
|
|
56
|
+
dataResponse = await s3Loader.getObject('helix-code-bus', `${owner}/${repo}/${ref}${path}`);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// if still not found, return status
|
|
60
|
+
if (dataResponse.status !== 200) {
|
|
61
|
+
return dataResponse;
|
|
62
|
+
}
|
|
63
|
+
const data = dataResponse.body;
|
|
64
|
+
|
|
65
|
+
// filter data
|
|
66
|
+
const response = jsonFilter(state, data, {
|
|
67
|
+
limit: limit ? Number.parseInt(limit, 10) : undefined,
|
|
68
|
+
offset: offset ? Number.parseInt(offset, 10) : undefined,
|
|
69
|
+
sheet,
|
|
70
|
+
raw: limit === undefined && offset === undefined && sheet === undefined,
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
// set last-modified (note, that it is not influenced by metadata or helix-config.json)
|
|
74
|
+
const lastModified = dataResponse.headers.get('last-modified');
|
|
75
|
+
if (lastModified) {
|
|
76
|
+
response.headers.set('last-modified', lastModified);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// set surrogate key
|
|
80
|
+
response.headers.set('x-surrogate-key', `${contentBusId}${path}`.replace(/\//g, '_'));
|
|
81
|
+
|
|
82
|
+
// Load metadata from metadata.json
|
|
83
|
+
await fetchMetadata(state, req, response);
|
|
84
|
+
await setCustomResponseHeaders(state, req, response);
|
|
85
|
+
|
|
86
|
+
return response;
|
|
87
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Adds missing `id` attributes to the headings
|
|
15
|
+
* @type PipelineStep
|
|
16
|
+
* @param {PipelineContent } content The current context of processing pipeline
|
|
17
|
+
*/
|
|
18
|
+
export default async function fixSections({ content }) {
|
|
19
|
+
const { slugger, document } = content;
|
|
20
|
+
['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
|
|
21
|
+
.forEach((tagName) => {
|
|
22
|
+
document.querySelectorAll(tagName)
|
|
23
|
+
.forEach(($h) => {
|
|
24
|
+
if (!$h.id) {
|
|
25
|
+
const text = $h.textContent.trim();
|
|
26
|
+
if (text) {
|
|
27
|
+
$h.setAttribute('id', slugger.slug(text));
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
});
|
|
31
|
+
});
|
|
32
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
import { toClassName } from './utils.js';
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Creates a "DIV representation" of a table.
|
|
16
|
+
* @type PipelineStep
|
|
17
|
+
* @param {Document} document
|
|
18
|
+
* @param {HTMLTableElement} $table the table element
|
|
19
|
+
* @returns {HTMLDivElement} the resulting div
|
|
20
|
+
*/
|
|
21
|
+
function tableToDivs(document, $table) {
|
|
22
|
+
const $cards = document.createElement('div');
|
|
23
|
+
|
|
24
|
+
// iterate over the table to avoid problem with query selector and nested tables
|
|
25
|
+
const $rows = [];
|
|
26
|
+
if ($table.tHead) {
|
|
27
|
+
$rows.push(...$table.tHead.rows);
|
|
28
|
+
}
|
|
29
|
+
for (const $tbody of $table.tBodies) {
|
|
30
|
+
$rows.push(...$tbody.rows);
|
|
31
|
+
}
|
|
32
|
+
if ($rows.length === 0) {
|
|
33
|
+
return $cards;
|
|
34
|
+
}
|
|
35
|
+
const $headerRow = $rows.shift();
|
|
36
|
+
|
|
37
|
+
// special case, only 1 row and 1 column with a nested table
|
|
38
|
+
if ($rows.length === 0 && $headerRow.cells.length === 1) {
|
|
39
|
+
const $nestedTable = $headerRow.cells[0].querySelector(':scope table');
|
|
40
|
+
if ($nestedTable) {
|
|
41
|
+
return $nestedTable;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// get columns names
|
|
46
|
+
const clazz = Array.from($headerRow.cells)
|
|
47
|
+
.map((e) => toClassName(e.textContent))
|
|
48
|
+
.filter((c) => !!c)
|
|
49
|
+
.join('-');
|
|
50
|
+
if (clazz) {
|
|
51
|
+
$cards.classList.add(clazz);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// construct page block
|
|
55
|
+
for (const $row of $rows) {
|
|
56
|
+
const $card = document.createElement('div');
|
|
57
|
+
for (const $cell of $row.cells) {
|
|
58
|
+
const $div = document.createElement('div');
|
|
59
|
+
$div.append(...$cell.childNodes);
|
|
60
|
+
$card.append($div);
|
|
61
|
+
}
|
|
62
|
+
$cards.append($card);
|
|
63
|
+
}
|
|
64
|
+
return $cards;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Converts tables into page blocks.
|
|
69
|
+
* see https://github.com/adobe/helix-pages/issues/638
|
|
70
|
+
* @param context The current context of processing pipeline
|
|
71
|
+
*/
|
|
72
|
+
export default function createPageBlocks({ content }) {
|
|
73
|
+
const { document } = content;
|
|
74
|
+
document.querySelectorAll('body > div > table').forEach(($table) => {
|
|
75
|
+
const $div = tableToDivs(document, $table);
|
|
76
|
+
$table.parentNode.replaceChild($div, $table);
|
|
77
|
+
});
|
|
78
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
import { optimizeImageURL } from './utils.js';
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Converts imgs to pictures
|
|
16
|
+
* @type PipelineStep
|
|
17
|
+
* @param context The current context of processing pipeline
|
|
18
|
+
*/
|
|
19
|
+
export default async function createPictures({ content }) {
|
|
20
|
+
const { document } = content;
|
|
21
|
+
|
|
22
|
+
// transform <img> to <picture>
|
|
23
|
+
document.querySelectorAll('img[src^="./media_"]').forEach((img, i) => {
|
|
24
|
+
const picture = document.createElement('picture');
|
|
25
|
+
const source = document.createElement('source');
|
|
26
|
+
const src = img.getAttribute('src');
|
|
27
|
+
source.setAttribute('media', '(max-width: 400px)');
|
|
28
|
+
source.setAttribute('srcset', optimizeImageURL(src, 750));
|
|
29
|
+
picture.appendChild(source);
|
|
30
|
+
img.setAttribute('loading', i > 0 ? 'lazy' : 'eager'); // load all but first image lazy
|
|
31
|
+
img.setAttribute('src', optimizeImageURL(src, 2000));
|
|
32
|
+
img.parentNode.insertBefore(picture, img);
|
|
33
|
+
picture.appendChild(img);
|
|
34
|
+
});
|
|
35
|
+
}
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
import { resolve } from 'url';
|
|
13
|
+
import { getAbsoluteUrl, makeCanonicalHtmlUrl, optimizeImageURL } from './utils.js';
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Converts all non-valid characters to `-`.
|
|
17
|
+
* @param {string} text input text
|
|
18
|
+
* @returns {string} the meta name
|
|
19
|
+
*/
|
|
20
|
+
function toMetaName(text) {
|
|
21
|
+
return text
|
|
22
|
+
.toLowerCase()
|
|
23
|
+
.replace(/[^0-9a-z:_]/gi, '-');
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Cleans up comma-separated string lists and returns an array.
|
|
28
|
+
* @param {string} list A comma-separated list
|
|
29
|
+
* @returns {string[]} The clean list
|
|
30
|
+
*/
|
|
31
|
+
function toList(list) {
|
|
32
|
+
return list
|
|
33
|
+
.split(',')
|
|
34
|
+
.map((key) => key.trim())
|
|
35
|
+
.filter((key) => !!key);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Returns the config from a block element as object with key/value pairs.
|
|
40
|
+
* @param {HTMLDivElement} $block The block element
|
|
41
|
+
* @returns {object} The block config
|
|
42
|
+
*/
|
|
43
|
+
function readBlockConfig($block) {
|
|
44
|
+
const config = {};
|
|
45
|
+
$block.querySelectorAll(':scope>div').forEach(($row) => {
|
|
46
|
+
if ($row.children && $row.children[1]) {
|
|
47
|
+
const name = toMetaName($row.children[0].textContent);
|
|
48
|
+
if (name) {
|
|
49
|
+
let value;
|
|
50
|
+
if ($row.children[1].hasChildNodes() && $row.children[1].firstElementChild) {
|
|
51
|
+
// check for multiple paragraph or a list
|
|
52
|
+
let childNodes;
|
|
53
|
+
const { tagName } = $row.children[1].firstElementChild;
|
|
54
|
+
if (tagName === 'P') {
|
|
55
|
+
// contains a list of <p> paragraphs
|
|
56
|
+
childNodes = $row.children[1].childNodes;
|
|
57
|
+
} else if (tagName === 'UL' || tagName === 'OL') {
|
|
58
|
+
// contains a list
|
|
59
|
+
childNodes = $row.children[1].children[0].childNodes;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (childNodes) {
|
|
63
|
+
value = '';
|
|
64
|
+
childNodes.forEach((child) => {
|
|
65
|
+
value += `${child.textContent}, `;
|
|
66
|
+
});
|
|
67
|
+
value = value.substring(0, value.length - 2);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (!value) {
|
|
72
|
+
// for text content only
|
|
73
|
+
value = $row.children[1].textContent.trim().replace(/ {3}/g, ',');
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (!value) {
|
|
77
|
+
// check for value inside link
|
|
78
|
+
const $a = $row.children[1].querySelector('a');
|
|
79
|
+
if ($a) {
|
|
80
|
+
value = $a.getAttribute('href');
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
if (!value) {
|
|
84
|
+
// check for value inside img
|
|
85
|
+
const $img = $row.children[1].querySelector('img');
|
|
86
|
+
if ($img) {
|
|
87
|
+
// strip query string
|
|
88
|
+
value = $img.getAttribute('src');
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
if (value) {
|
|
92
|
+
// only keep non-empty value
|
|
93
|
+
config[name] = value;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
return config;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function applyMetaRule(target, obj) {
|
|
102
|
+
Object.keys(obj).forEach((key) => {
|
|
103
|
+
const metaKey = toMetaName(key);
|
|
104
|
+
if (metaKey !== 'url' && obj[key]) {
|
|
105
|
+
target[metaKey] = obj[key];
|
|
106
|
+
}
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function globToRegExp(glob) {
|
|
111
|
+
const reString = glob
|
|
112
|
+
.replace(/\*\*/g, '_')
|
|
113
|
+
.replace(/\*/g, '[0-9a-z-.]*')
|
|
114
|
+
.replace(/_/g, '.*');
|
|
115
|
+
return new RegExp(`^${reString}$`);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export function filterGlobalMetadata(metaRules, path) {
|
|
119
|
+
const metaConfig = {};
|
|
120
|
+
metaRules.forEach((rule) => {
|
|
121
|
+
const glob = rule.url || rule.URL || rule.Url;
|
|
122
|
+
if (glob && typeof glob === 'string' && /[0-9a-z-/*]/.test(glob)) {
|
|
123
|
+
if (glob.indexOf('*') >= 0) {
|
|
124
|
+
if (globToRegExp(glob).test(path)) {
|
|
125
|
+
applyMetaRule(metaConfig, rule);
|
|
126
|
+
}
|
|
127
|
+
} else if (glob === path) {
|
|
128
|
+
applyMetaRule(metaConfig, rule);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
});
|
|
132
|
+
return metaConfig;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Looks for metadata in the document.
|
|
137
|
+
* @param {HTMLDocument} document The document
|
|
138
|
+
* @return {object} The metadata
|
|
139
|
+
*/
|
|
140
|
+
function getLocalMetadata(document) {
|
|
141
|
+
let metaConfig = {};
|
|
142
|
+
const metaBlock = document.querySelector('body div.metadata');
|
|
143
|
+
if (metaBlock) {
|
|
144
|
+
metaConfig = readBlockConfig(metaBlock);
|
|
145
|
+
metaBlock.remove();
|
|
146
|
+
}
|
|
147
|
+
return metaConfig;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Adds image optimization parameters suitable for meta images to a URL.
|
|
152
|
+
* @param {string} pagePath The path of the requested page
|
|
153
|
+
* @param {string} imgUrl The image URL
|
|
154
|
+
* @returns The optimized image URL
|
|
155
|
+
*/
|
|
156
|
+
function optimizeMetaImage(pagePath, imgUrl) {
|
|
157
|
+
const src = resolve(pagePath, imgUrl);
|
|
158
|
+
if (src.startsWith('/')) {
|
|
159
|
+
return optimizeImageURL(src, 1200, 'pjpg');
|
|
160
|
+
}
|
|
161
|
+
return src;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Extracts the metadata and stores it in the content meta
|
|
166
|
+
* @type PipelineStep
|
|
167
|
+
* @param {PipelineState} state
|
|
168
|
+
* @param {PipelineRequest} req
|
|
169
|
+
*/
|
|
170
|
+
export default function extractMetaData(state, req) {
|
|
171
|
+
const { content } = state;
|
|
172
|
+
const { meta, document } = content;
|
|
173
|
+
|
|
174
|
+
// extract global metadata from spreadsheet, and overlay
|
|
175
|
+
// with local metadata from document
|
|
176
|
+
const metaConfig = Object.assign(
|
|
177
|
+
filterGlobalMetadata(state.metadata, state.info.path),
|
|
178
|
+
getLocalMetadata(document),
|
|
179
|
+
);
|
|
180
|
+
|
|
181
|
+
// first process supported metadata properties
|
|
182
|
+
[
|
|
183
|
+
'title',
|
|
184
|
+
'description',
|
|
185
|
+
'keywords',
|
|
186
|
+
'tags',
|
|
187
|
+
'image',
|
|
188
|
+
'image-alt',
|
|
189
|
+
'canonical',
|
|
190
|
+
'feed',
|
|
191
|
+
].forEach((name) => {
|
|
192
|
+
if (metaConfig[name]) {
|
|
193
|
+
meta[name] = metaConfig[name];
|
|
194
|
+
delete metaConfig[name];
|
|
195
|
+
}
|
|
196
|
+
});
|
|
197
|
+
if (Object.keys(metaConfig).length > 0) {
|
|
198
|
+
// add rest to meta.custom
|
|
199
|
+
meta.custom = Object.keys(metaConfig).map((name) => ({
|
|
200
|
+
name,
|
|
201
|
+
value: metaConfig[name],
|
|
202
|
+
property: name.includes(':'),
|
|
203
|
+
}));
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
if (meta.keywords) {
|
|
207
|
+
meta.keywords = toList(meta.keywords).join(', ');
|
|
208
|
+
}
|
|
209
|
+
if (meta.tags) {
|
|
210
|
+
meta.tags = toList(meta.tags);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// complete metadata with insights from content
|
|
214
|
+
if (!meta.title) {
|
|
215
|
+
// content.title is not correct if the h1 is in a page-block since the pipeline
|
|
216
|
+
// only respects the heading nodes in the mdast
|
|
217
|
+
const $title = document.querySelector('body > div h1');
|
|
218
|
+
if ($title) {
|
|
219
|
+
content.title = $title.textContent;
|
|
220
|
+
}
|
|
221
|
+
meta.title = content.title;
|
|
222
|
+
}
|
|
223
|
+
if (!meta.description) {
|
|
224
|
+
// description: text from paragraphs with 10 or more words
|
|
225
|
+
let desc = [];
|
|
226
|
+
document.querySelectorAll('div > p').forEach((p) => {
|
|
227
|
+
if (desc.length === 0) {
|
|
228
|
+
const words = p.textContent.trim().split(/\s+/);
|
|
229
|
+
if (words.length >= 10 || words.some((w) => w.length > 25 && !w.startsWith('http'))) {
|
|
230
|
+
desc = desc.concat(words);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
});
|
|
234
|
+
meta.description = `${desc.slice(0, 25).join(' ')}${desc.length > 25 ? ' ...' : ''}`;
|
|
235
|
+
}
|
|
236
|
+
meta.url = makeCanonicalHtmlUrl(getAbsoluteUrl(req.headers, req.url.href));
|
|
237
|
+
if (!meta.canonical) {
|
|
238
|
+
meta.canonical = meta.url;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// content.image is not correct if the first image is in a page-block. since the pipeline
|
|
242
|
+
// only respects the image nodes in the mdast
|
|
243
|
+
const $hero = document.querySelector('body > div img');
|
|
244
|
+
if ($hero) {
|
|
245
|
+
content.image = $hero.src;
|
|
246
|
+
if ($hero.alt) {
|
|
247
|
+
content.imageAlt = $hero.alt;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
meta.image = getAbsoluteUrl(
|
|
252
|
+
req.headers,
|
|
253
|
+
optimizeMetaImage(state.info.path, meta.image || content.image || '/default-meta-image.png'),
|
|
254
|
+
);
|
|
255
|
+
|
|
256
|
+
meta.imageAlt = meta['image-alt'] || content.imageAlt;
|
|
257
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
import { extractLastModified, updateLastModified } from '../utils/last-modified.js';
|
|
13
|
+
import { PipelineStatusError } from '../PipelineStatusError.js';
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Fetches the helix-config.json from the code-bus and stores it in `state.helixConfig`
|
|
17
|
+
* @type PipelineStep
|
|
18
|
+
* @param {PipelineState} state
|
|
19
|
+
* @param {PipelineRequest} req
|
|
20
|
+
* @param {PipelineResponse} res
|
|
21
|
+
* @returns {Promise<void>}
|
|
22
|
+
*/
|
|
23
|
+
export default async function fetchConfig(state, req, res) {
|
|
24
|
+
const {
|
|
25
|
+
log, owner, repo, ref,
|
|
26
|
+
} = state;
|
|
27
|
+
|
|
28
|
+
const key = `${owner}/${repo}/${ref}/helix-config.json`;
|
|
29
|
+
const ret = await state.s3Loader.getObject('helix-code-bus', key);
|
|
30
|
+
if (ret.status !== 200) {
|
|
31
|
+
throw new PipelineStatusError(ret.status === 404 ? 404 : 502, `unable to load /helix-config.json: ${ret.status}`);
|
|
32
|
+
}
|
|
33
|
+
try {
|
|
34
|
+
state.helixConfig = JSON.parse(ret.body);
|
|
35
|
+
} catch (e) {
|
|
36
|
+
log.info('failed to parse helix-config.json', e);
|
|
37
|
+
throw new PipelineStatusError(400, `Failed parsing of /helix-config.json: ${e.message}`);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// also update last-modified
|
|
41
|
+
updateLastModified(state, res, extractLastModified(ret.headers));
|
|
42
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2022 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
import { extractLastModified, updateLastModified } from '../utils/last-modified.js';
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Loads the content from either the content-bus or code-bus and stores it in `state.content`
|
|
16
|
+
* @type PipelineStep
|
|
17
|
+
* @param {PipelineState} state
|
|
18
|
+
* @param {PipelineRequest} req
|
|
19
|
+
* @param {PipelineResponse} res
|
|
20
|
+
* @returns {Promise<void>}
|
|
21
|
+
*/
|
|
22
|
+
export default async function fetchContent(state, req, res) {
|
|
23
|
+
const {
|
|
24
|
+
log, contentBusId, info, partition, owner, repo, ref,
|
|
25
|
+
} = state;
|
|
26
|
+
|
|
27
|
+
const isCode = state.content.sourceBus === 'code';
|
|
28
|
+
const key = isCode
|
|
29
|
+
? `${owner}/${repo}/${ref}/${info.resourcePath}`
|
|
30
|
+
: `${contentBusId}/${partition}${info.resourcePath}`;
|
|
31
|
+
const bucketId = isCode ? 'helix-code-bus' : 'helix-content-bus';
|
|
32
|
+
|
|
33
|
+
const ret = await state.s3Loader.getObject(bucketId, key);
|
|
34
|
+
|
|
35
|
+
// check for redirect
|
|
36
|
+
const redirectLocation = ret.headers.get('x-amz-meta-redirect-location');
|
|
37
|
+
if (redirectLocation) {
|
|
38
|
+
res.status = 301;
|
|
39
|
+
res.body = '';
|
|
40
|
+
res.headers.set('location', redirectLocation);
|
|
41
|
+
res.error = 'moved';
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (ret.status === 200) {
|
|
46
|
+
state.content.data = ret.body;
|
|
47
|
+
|
|
48
|
+
// store extra source location if present
|
|
49
|
+
state.content.sourceLocation = ret.headers.get('x-amz-meta-x-source-location');
|
|
50
|
+
log.info(`source-location: ${state.content.sourceLocation}`);
|
|
51
|
+
|
|
52
|
+
updateLastModified(state, res, extractLastModified(ret.headers));
|
|
53
|
+
|
|
54
|
+
// reject requests to /index *after* checking for redirects
|
|
55
|
+
// (https://github.com/adobe/helix-pipeline-service/issues/290)
|
|
56
|
+
if (state.info.originalFilename === 'index') {
|
|
57
|
+
res.status = 404;
|
|
58
|
+
res.error = `request to ${info.path} not allowed (no-index).`;
|
|
59
|
+
}
|
|
60
|
+
} else {
|
|
61
|
+
// keep 404, but propagate others as 502
|
|
62
|
+
res.status = ret.status === 404 ? 404 : 502;
|
|
63
|
+
res.error = `failed to load ${info.path} from ${state.content.sourceBus}-bus: ${ret.status}`;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (res.status === 404) {
|
|
67
|
+
// try to load 404.html from code-bus
|
|
68
|
+
const ret404 = await state.s3Loader.getObject('helix-code-bus', `${owner}/${repo}/${ref}/404.html`);
|
|
69
|
+
if (ret404.status === 200) {
|
|
70
|
+
// override last-modified if source-last-modified is set
|
|
71
|
+
const lastModified = extractLastModified(ret404.headers);
|
|
72
|
+
if (lastModified) {
|
|
73
|
+
ret404.headers.set('last-modified', lastModified);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// keep 404 response status
|
|
77
|
+
res.body = ret.body;
|
|
78
|
+
res.headers.set('last-modified', ret404.headers.get('last-modified'));
|
|
79
|
+
res.headers.set('content-type', 'text/html; charset=utf-8');
|
|
80
|
+
res.headers.set('x-surrogate-key', `${ref}--${repo}--${owner}_404`);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { PipelineStatusError } from '../PipelineStatusError.js';
|
|
14
|
+
import { extractLastModified, updateLastModified } from '../utils/last-modified.js';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Loads the metadata.json from the content-bus and stores it in `state.metadata`
|
|
18
|
+
* @type PipelineStep
|
|
19
|
+
* @param {PipelineState} state
|
|
20
|
+
* @param {PipelineRequest} req
|
|
21
|
+
* @param {PipelineResponse} res
|
|
22
|
+
* @returns {Promise<void>}
|
|
23
|
+
*/
|
|
24
|
+
export default async function fetchMetadata(state, req, res) {
|
|
25
|
+
const { contentBusId, partition } = state;
|
|
26
|
+
const key = `${contentBusId}/${partition}/metadata.json`;
|
|
27
|
+
const ret = await state.s3Loader.getObject('helix-content-bus', key);
|
|
28
|
+
if (ret.status === 200) {
|
|
29
|
+
let json;
|
|
30
|
+
try {
|
|
31
|
+
json = JSON.parse(ret.body);
|
|
32
|
+
} catch (e) {
|
|
33
|
+
throw new PipelineStatusError(400, `failed parsing of /metadata.json: ${e.message}`);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const { data } = json.default ?? json;
|
|
37
|
+
if (!Array.isArray(data)) {
|
|
38
|
+
throw new PipelineStatusError(400, 'failed loading of /metadata.json: data must be an array');
|
|
39
|
+
}
|
|
40
|
+
state.metadata = data;
|
|
41
|
+
|
|
42
|
+
// also update last-modified
|
|
43
|
+
updateLastModified(state, res, extractLastModified(ret.headers));
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (ret.status !== 404) {
|
|
48
|
+
throw new PipelineStatusError(502, `failed to load /metadata.json: ${ret.status}`);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// ignore 404
|
|
52
|
+
state.metadata = [];
|
|
53
|
+
}
|