@adobe/helix-html-pipeline 1.1.2 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/package.json +17 -15
- package/src/PipelineContent.d.ts +8 -7
- package/src/PipelineContent.js +2 -0
- package/src/PipelineResponse.d.ts +6 -1
- package/src/html-pipe.js +7 -0
- package/src/steps/add-heading-ids.js +14 -13
- package/src/steps/create-page-blocks.js +28 -27
- package/src/steps/create-pictures.js +16 -12
- package/src/steps/extract-metadata.js +61 -44
- package/src/steps/fix-sections.js +8 -9
- package/src/steps/get-metadata.js +5 -4
- package/src/steps/make-html.js +3 -14
- package/src/steps/removeHlxProps.js +9 -10
- package/src/steps/render.js +68 -116
- package/src/steps/rewrite-blob-images.js +6 -24
- package/src/steps/rewrite-icons.js +30 -44
- package/src/steps/stringify-response.js +11 -11
- package/src/steps/utils.js +26 -4
- package/src/utils/{table-handler.js → hast-utils.js} +13 -15
- package/src/utils/heading-handler.js +11 -24
- package/src/utils/mdast-to-hast.js +60 -0
- package/src/utils/path.js +4 -1
- package/src/utils/section-handler.js +6 -4
- package/src/utils/hast-util-to-dom.js +0 -190
- package/src/utils/icon-handler.js +0 -40
- package/src/utils/link-handler.js +0 -25
- package/src/utils/mdast-to-vdom.js +0 -323
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,24 @@
|
|
|
1
|
+
## [1.2.1](https://github.com/adobe/helix-html-pipeline/compare/v1.2.0...v1.2.1) (2022-03-16)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Bug Fixes
|
|
5
|
+
|
|
6
|
+
* reject double-slashes ([#22](https://github.com/adobe/helix-html-pipeline/issues/22)) ([5aee75d](https://github.com/adobe/helix-html-pipeline/commit/5aee75d4109550525d971c64d87e4f2420863c30)), closes [#20](https://github.com/adobe/helix-html-pipeline/issues/20)
|
|
7
|
+
|
|
8
|
+
# [1.2.0](https://github.com/adobe/helix-html-pipeline/compare/v1.1.3...v1.2.0) (2022-03-16)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Features
|
|
12
|
+
|
|
13
|
+
* use hast instead of jsdom ([#12](https://github.com/adobe/helix-html-pipeline/issues/12)) ([bee0a0b](https://github.com/adobe/helix-html-pipeline/commit/bee0a0b3309919f896520bc700dd2d867be19a1c)), closes [#11](https://github.com/adobe/helix-html-pipeline/issues/11)
|
|
14
|
+
|
|
15
|
+
## [1.1.3](https://github.com/adobe/helix-html-pipeline/compare/v1.1.2...v1.1.3) (2022-03-12)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
### Bug Fixes
|
|
19
|
+
|
|
20
|
+
* **deps:** update dependency @adobe/helix-shared-utils to v2.0.5 ([4ea15f9](https://github.com/adobe/helix-html-pipeline/commit/4ea15f9888486ba0e81e92c7796236726da5b74c))
|
|
21
|
+
|
|
1
22
|
## [1.1.2](https://github.com/adobe/helix-html-pipeline/compare/v1.1.1...v1.1.2) (2022-03-11)
|
|
2
23
|
|
|
3
24
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@adobe/helix-html-pipeline",
|
|
3
|
-
"version": "1.1
|
|
3
|
+
"version": "1.2.1",
|
|
4
4
|
"description": "Helix HTML Pipeline",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"types": "src/index.d.ts",
|
|
@@ -33,10 +33,13 @@
|
|
|
33
33
|
},
|
|
34
34
|
"dependencies": {
|
|
35
35
|
"@adobe/helix-markdown-support": "3.1.2",
|
|
36
|
-
"@adobe/helix-shared-utils": "2.0.
|
|
36
|
+
"@adobe/helix-shared-utils": "2.0.5",
|
|
37
37
|
"github-slugger": "1.4.0",
|
|
38
|
+
"hast-util-raw": "7.2.1",
|
|
39
|
+
"hast-util-select": "5.0.1",
|
|
38
40
|
"hast-util-to-html": "8.0.3",
|
|
39
|
-
"
|
|
41
|
+
"hast-util-to-string": "2.0.0",
|
|
42
|
+
"hastscript": "7.0.2",
|
|
40
43
|
"mdast-util-gfm-footnote": "1.0.1",
|
|
41
44
|
"mdast-util-gfm-strikethrough": "1.0.1",
|
|
42
45
|
"mdast-util-gfm-table": "1.0.3",
|
|
@@ -50,19 +53,21 @@
|
|
|
50
53
|
"micromark-extension-gfm-task-list-item": "1.0.3",
|
|
51
54
|
"micromark-util-combine-extensions": "1.0.0",
|
|
52
55
|
"mime": "3.0.0",
|
|
53
|
-
"
|
|
56
|
+
"rehype-format": "4.0.1",
|
|
57
|
+
"rehype-minify-whitespace": "5.0.0",
|
|
58
|
+
"rehype-parse": "8.0.4",
|
|
54
59
|
"remark-parse": "10.0.1",
|
|
55
60
|
"strip-markdown": "5.0.0",
|
|
56
|
-
"unified": "10.1.
|
|
61
|
+
"unified": "10.1.2",
|
|
57
62
|
"unist-util-map": "3.0.0",
|
|
63
|
+
"unist-util-remove": "3.1.0",
|
|
58
64
|
"unist-util-remove-position": "4.0.1",
|
|
59
65
|
"unist-util-select": "4.0.1",
|
|
60
|
-
"unist-util-visit": "4.1.0"
|
|
61
|
-
"uri-js": "4.4.1"
|
|
66
|
+
"unist-util-visit": "4.1.0"
|
|
62
67
|
},
|
|
63
68
|
"devDependencies": {
|
|
64
69
|
"@adobe/eslint-config-helix": "1.3.2",
|
|
65
|
-
"@markedjs/html-differ": "4.0.
|
|
70
|
+
"@markedjs/html-differ": "4.0.1",
|
|
66
71
|
"@semantic-release/changelog": "6.0.1",
|
|
67
72
|
"@semantic-release/git": "10.0.1",
|
|
68
73
|
"@semantic-release/npm": "9.0.1",
|
|
@@ -71,23 +76,20 @@
|
|
|
71
76
|
"codecov": "3.8.3",
|
|
72
77
|
"commitizen": "4.2.4",
|
|
73
78
|
"cz-conventional-changelog": "3.3.0",
|
|
74
|
-
"eslint": "8.
|
|
79
|
+
"eslint": "8.11.0",
|
|
75
80
|
"eslint-plugin-header": "3.1.1",
|
|
76
81
|
"eslint-plugin-import": "2.25.4",
|
|
77
82
|
"esmock": "1.7.4",
|
|
78
|
-
"hastscript": "7.0.2",
|
|
79
83
|
"husky": "7.0.4",
|
|
80
|
-
"hyperscript": "2.0.2",
|
|
81
84
|
"js-yaml": "4.1.0",
|
|
82
85
|
"jsdoc-to-markdown": "7.1.1",
|
|
86
|
+
"jsdom": "19.0.0",
|
|
83
87
|
"junit-report-builder": "3.0.0",
|
|
84
88
|
"lint-staged": "12.3.5",
|
|
85
|
-
"mocha": "9.2.
|
|
89
|
+
"mocha": "9.2.2",
|
|
86
90
|
"mocha-multi-reporters": "1.5.1",
|
|
87
91
|
"remark-gfm": "3.0.1",
|
|
88
|
-
"semantic-release": "19.0.2"
|
|
89
|
-
"sinon": "13.0.1",
|
|
90
|
-
"unist-builder": "3.0.0"
|
|
92
|
+
"semantic-release": "19.0.2"
|
|
91
93
|
},
|
|
92
94
|
"lint-staged": {
|
|
93
95
|
"*.js": "eslint",
|
package/src/PipelineContent.d.ts
CHANGED
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
import {Node} from "unist";
|
|
13
13
|
import GithubSlugger from 'github-slugger';
|
|
14
|
+
import { Root } from 'hast';
|
|
14
15
|
|
|
15
16
|
declare enum SourceType {
|
|
16
17
|
CONTENT = 'content',
|
|
@@ -50,12 +51,9 @@ declare class PipelineContent {
|
|
|
50
51
|
mdast: Node;
|
|
51
52
|
|
|
52
53
|
/**
|
|
53
|
-
* document
|
|
54
|
+
* The transformed document (hast) representation
|
|
54
55
|
*/
|
|
55
|
-
|
|
56
|
-
title: string;
|
|
57
|
-
intro: string;
|
|
58
|
-
image: string;
|
|
56
|
+
hast: Root;
|
|
59
57
|
|
|
60
58
|
/**
|
|
61
59
|
* slugger to use for heading id calculations
|
|
@@ -63,7 +61,10 @@ declare class PipelineContent {
|
|
|
63
61
|
slugger: GithubSlugger;
|
|
64
62
|
|
|
65
63
|
/**
|
|
66
|
-
*
|
|
64
|
+
* document specific metadata
|
|
67
65
|
*/
|
|
68
|
-
|
|
66
|
+
meta: object;
|
|
67
|
+
title: string;
|
|
68
|
+
intro: string;
|
|
69
|
+
image: string;
|
|
69
70
|
}
|
package/src/PipelineContent.js
CHANGED
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
|
+
import GithubSlugger from 'github-slugger';
|
|
12
13
|
|
|
13
14
|
/**
|
|
14
15
|
* State of the pipeline
|
|
@@ -21,6 +22,7 @@ export class PipelineContent {
|
|
|
21
22
|
constructor() {
|
|
22
23
|
Object.assign(this, {
|
|
23
24
|
sourceBus: 'content',
|
|
25
|
+
slugger: new GithubSlugger(),
|
|
24
26
|
});
|
|
25
27
|
}
|
|
26
28
|
}
|
|
@@ -9,6 +9,8 @@
|
|
|
9
9
|
* OF ANY KIND; either express or implied. See the License for the specific language
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
|
+
import { Element } from 'hast';
|
|
13
|
+
|
|
12
14
|
declare interface PipelineResponseInit {
|
|
13
15
|
status?: number;
|
|
14
16
|
headers: Map<string, string> | object;
|
|
@@ -17,7 +19,10 @@ declare interface PipelineResponseInit {
|
|
|
17
19
|
declare class PipelineResponse {
|
|
18
20
|
constructor(body?:string, init?:PipelineResponseInit);
|
|
19
21
|
status: number;
|
|
20
|
-
|
|
22
|
+
/**
|
|
23
|
+
* The transformed document (hast) representation
|
|
24
|
+
*/
|
|
25
|
+
document: Element;
|
|
21
26
|
body: string;
|
|
22
27
|
headers: Map<string, string>;
|
|
23
28
|
error: any;
|
package/src/html-pipe.js
CHANGED
|
@@ -62,11 +62,14 @@ export async function htmlPipe(state, req) {
|
|
|
62
62
|
});
|
|
63
63
|
|
|
64
64
|
try { // fetch config first, since we need to compute the content-bus-id from the fstab ...
|
|
65
|
+
state.timer?.update('config-fetch');
|
|
65
66
|
await fetchConfig(state, req, res);
|
|
67
|
+
|
|
66
68
|
// ...and apply the folder mapping
|
|
67
69
|
await folderMapping(state, req, res);
|
|
68
70
|
|
|
69
71
|
// load metadata and content in parallel
|
|
72
|
+
state.timer?.update('content-fetch');
|
|
70
73
|
await Promise.all([
|
|
71
74
|
fetchMetadata(state, req, res),
|
|
72
75
|
fetchContent(state, req, res),
|
|
@@ -80,9 +83,12 @@ export async function htmlPipe(state, req) {
|
|
|
80
83
|
}
|
|
81
84
|
|
|
82
85
|
if (state.content.sourceBus === 'code') {
|
|
86
|
+
state.timer?.update('serialize');
|
|
83
87
|
await renderCode(state, req, res);
|
|
84
88
|
} else {
|
|
89
|
+
state.timer?.update('parse');
|
|
85
90
|
await parseMarkdown(state);
|
|
91
|
+
state.timer?.update('render');
|
|
86
92
|
await splitSections(state);
|
|
87
93
|
await getMetadata(state); // this one extracts the metadata from the mdast
|
|
88
94
|
await unwrapSoleImages(state);
|
|
@@ -96,6 +102,7 @@ export async function htmlPipe(state, req) {
|
|
|
96
102
|
await addHeadingIds(state);
|
|
97
103
|
await render(state, req, res);
|
|
98
104
|
await removeHlxProps(state, req, res);
|
|
105
|
+
state.timer?.update('serialize');
|
|
99
106
|
await tohtml(state, req, res);
|
|
100
107
|
}
|
|
101
108
|
|
|
@@ -9,6 +9,8 @@
|
|
|
9
9
|
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
|
+
import { toString } from 'hast-util-to-string';
|
|
13
|
+
import { visit } from 'unist-util-visit';
|
|
12
14
|
|
|
13
15
|
/**
|
|
14
16
|
* Adds missing `id` attributes to the headings
|
|
@@ -16,17 +18,16 @@
|
|
|
16
18
|
* @param {PipelineContent } content The current context of processing pipeline
|
|
17
19
|
*/
|
|
18
20
|
export default async function fixSections({ content }) {
|
|
19
|
-
const { slugger,
|
|
20
|
-
|
|
21
|
-
.
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
});
|
|
21
|
+
const { slugger, hast } = content;
|
|
22
|
+
visit(hast, (node) => {
|
|
23
|
+
if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(node.tagName)) {
|
|
24
|
+
const { properties } = node;
|
|
25
|
+
if (!properties.id) {
|
|
26
|
+
const text = toString(node).trim();
|
|
27
|
+
if (text) {
|
|
28
|
+
properties.id = slugger.slug(text);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
});
|
|
32
33
|
}
|
|
@@ -9,57 +9,57 @@
|
|
|
9
9
|
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
|
+
import { h } from 'hastscript';
|
|
13
|
+
import { selectAll, select } from 'hast-util-select';
|
|
14
|
+
import { toString } from 'hast-util-to-string';
|
|
12
15
|
import { toClassName } from './utils.js';
|
|
16
|
+
import { replace, childNodes } from '../utils/hast-utils.js';
|
|
13
17
|
|
|
14
18
|
/**
|
|
15
19
|
* Creates a "DIV representation" of a table.
|
|
16
20
|
* @type PipelineStep
|
|
17
|
-
* @param {
|
|
18
|
-
* @param {HTMLTableElement} $table the table element
|
|
21
|
+
* @param {HTMLTTableElement} $table the table element
|
|
19
22
|
* @returns {HTMLDivElement} the resulting div
|
|
20
23
|
*/
|
|
21
|
-
function tableToDivs(
|
|
22
|
-
const $cards =
|
|
23
|
-
|
|
24
|
-
// iterate over the table to avoid problem with query selector and nested tables
|
|
24
|
+
function tableToDivs($table) {
|
|
25
|
+
const $cards = h('div');
|
|
25
26
|
const $rows = [];
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
$rows.push(...$tbody.rows);
|
|
27
|
+
for (const child of $table.children) {
|
|
28
|
+
if (child.tagName === 'thead' || child.tagName === 'tbody') {
|
|
29
|
+
$rows.push(...childNodes(child));
|
|
30
|
+
}
|
|
31
31
|
}
|
|
32
|
+
|
|
32
33
|
if ($rows.length === 0) {
|
|
33
34
|
return $cards;
|
|
34
35
|
}
|
|
35
|
-
const $
|
|
36
|
+
const $headerCols = childNodes($rows.shift());
|
|
36
37
|
|
|
37
38
|
// special case, only 1 row and 1 column with a nested table
|
|
38
|
-
if ($rows.length === 0 && $
|
|
39
|
-
const $nestedTable =
|
|
39
|
+
if ($rows.length === 0 && $headerCols.length === 1) {
|
|
40
|
+
const $nestedTable = select(':scope table', $headerCols[0]);
|
|
40
41
|
if ($nestedTable) {
|
|
41
42
|
return $nestedTable;
|
|
42
43
|
}
|
|
43
44
|
}
|
|
44
45
|
|
|
45
46
|
// get columns names
|
|
46
|
-
const clazz =
|
|
47
|
-
.map((e) => toClassName(e
|
|
47
|
+
const clazz = $headerCols
|
|
48
|
+
.map((e) => toClassName(toString(e)))
|
|
48
49
|
.filter((c) => !!c)
|
|
49
50
|
.join('-');
|
|
50
51
|
if (clazz) {
|
|
51
|
-
$cards.
|
|
52
|
+
$cards.properties.className = [clazz];
|
|
52
53
|
}
|
|
53
54
|
|
|
54
55
|
// construct page block
|
|
55
56
|
for (const $row of $rows) {
|
|
56
|
-
const $card =
|
|
57
|
-
for (const $cell of $row
|
|
58
|
-
|
|
59
|
-
$
|
|
60
|
-
$card.append($div);
|
|
57
|
+
const $card = h('div');
|
|
58
|
+
for (const $cell of childNodes($row)) {
|
|
59
|
+
// convert to div
|
|
60
|
+
$card.children.push(h('div', $cell.children));
|
|
61
61
|
}
|
|
62
|
-
$cards.
|
|
62
|
+
$cards.children.push($card);
|
|
63
63
|
}
|
|
64
64
|
return $cards;
|
|
65
65
|
}
|
|
@@ -70,9 +70,10 @@ function tableToDivs(document, $table) {
|
|
|
70
70
|
* @param context The current context of processing pipeline
|
|
71
71
|
*/
|
|
72
72
|
export default function createPageBlocks({ content }) {
|
|
73
|
-
const {
|
|
74
|
-
|
|
75
|
-
const $div = tableToDivs(
|
|
76
|
-
|
|
73
|
+
const { hast } = content;
|
|
74
|
+
selectAll('div > table', hast).forEach(($table) => {
|
|
75
|
+
const $div = tableToDivs($table);
|
|
76
|
+
// replace child in parent
|
|
77
|
+
replace(hast, $table, $div);
|
|
77
78
|
});
|
|
78
79
|
}
|
|
@@ -9,6 +9,9 @@
|
|
|
9
9
|
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
|
+
import { h } from 'hastscript';
|
|
13
|
+
import { selectAll } from 'hast-util-select';
|
|
14
|
+
import { replace } from '../utils/hast-utils.js';
|
|
12
15
|
import { optimizeImageURL } from './utils.js';
|
|
13
16
|
|
|
14
17
|
/**
|
|
@@ -17,19 +20,20 @@ import { optimizeImageURL } from './utils.js';
|
|
|
17
20
|
* @param context The current context of processing pipeline
|
|
18
21
|
*/
|
|
19
22
|
export default async function createPictures({ content }) {
|
|
20
|
-
const {
|
|
23
|
+
const { hast } = content;
|
|
21
24
|
|
|
22
25
|
// transform <img> to <picture>
|
|
23
|
-
|
|
24
|
-
const
|
|
25
|
-
const source =
|
|
26
|
-
|
|
27
|
-
source.
|
|
28
|
-
|
|
29
|
-
picture
|
|
30
|
-
img.
|
|
31
|
-
img.
|
|
32
|
-
|
|
33
|
-
|
|
26
|
+
selectAll('img[src^="./media_"]', hast).forEach((img, i) => {
|
|
27
|
+
const { src } = img.properties;
|
|
28
|
+
const source = h('source');
|
|
29
|
+
source.properties.media = '(max-width: 400px)';
|
|
30
|
+
source.properties.srcset = optimizeImageURL(src, 750);
|
|
31
|
+
|
|
32
|
+
const picture = h('picture', source);
|
|
33
|
+
img.properties.loading = i > 0 ? 'lazy' : 'eager';
|
|
34
|
+
img.properties.src = optimizeImageURL(src, 2000);
|
|
35
|
+
|
|
36
|
+
replace(hast, img, picture);
|
|
37
|
+
picture.children.push(img);
|
|
34
38
|
});
|
|
35
39
|
}
|
|
@@ -9,10 +9,15 @@
|
|
|
9
9
|
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
|
+
import { selectAll, select } from 'hast-util-select';
|
|
13
|
+
import { toString } from 'hast-util-to-string';
|
|
14
|
+
import { remove } from 'unist-util-remove';
|
|
15
|
+
import { visit, EXIT, CONTINUE } from 'unist-util-visit';
|
|
12
16
|
import {
|
|
13
17
|
getAbsoluteUrl, makeCanonicalHtmlUrl, optimizeImageURL, resolveUrl,
|
|
14
18
|
} from './utils.js';
|
|
15
19
|
import { filterGlobalMetadata, toMetaName, ALLOWED_RESPONSE_HEADERS } from '../utils/metadata.js';
|
|
20
|
+
import { childNodes } from '../utils/hast-utils.js';
|
|
16
21
|
|
|
17
22
|
/**
|
|
18
23
|
* Cleans up comma-separated string lists and returns an array.
|
|
@@ -28,55 +33,53 @@ function toList(list) {
|
|
|
28
33
|
|
|
29
34
|
/**
|
|
30
35
|
* Returns the config from a block element as object with key/value pairs.
|
|
31
|
-
* @param {
|
|
36
|
+
* @param {Element} $block The block element
|
|
32
37
|
* @returns {object} The block config
|
|
33
38
|
*/
|
|
34
39
|
function readBlockConfig($block) {
|
|
35
40
|
const config = {};
|
|
36
|
-
|
|
37
|
-
if ($row
|
|
38
|
-
const name =
|
|
41
|
+
selectAll(':scope>div', $block).forEach(($row) => {
|
|
42
|
+
if ($row?.children[1]) {
|
|
43
|
+
const [$name, $value] = $row.children;
|
|
44
|
+
const name = toMetaName(toString($name));
|
|
39
45
|
if (name) {
|
|
40
46
|
let value;
|
|
41
|
-
|
|
47
|
+
const $firstChild = childNodes($value)[0];
|
|
48
|
+
if ($firstChild) {
|
|
42
49
|
// check for multiple paragraph or a list
|
|
43
|
-
let
|
|
44
|
-
const { tagName } = $
|
|
45
|
-
if (tagName === '
|
|
50
|
+
let list;
|
|
51
|
+
const { tagName } = $firstChild;
|
|
52
|
+
if (tagName === 'p') {
|
|
46
53
|
// contains a list of <p> paragraphs
|
|
47
|
-
|
|
48
|
-
} else if (tagName === '
|
|
54
|
+
list = childNodes($value);
|
|
55
|
+
} else if (tagName === 'ul' || tagName === 'ol') {
|
|
49
56
|
// contains a list
|
|
50
|
-
|
|
57
|
+
list = childNodes($firstChild);
|
|
51
58
|
}
|
|
52
59
|
|
|
53
|
-
if (
|
|
54
|
-
value = '';
|
|
55
|
-
childNodes.forEach((child) => {
|
|
56
|
-
value += `${child.textContent}, `;
|
|
57
|
-
});
|
|
58
|
-
value = value.substring(0, value.length - 2);
|
|
60
|
+
if (list) {
|
|
61
|
+
value = list.map((child) => toString(child)).join(', ');
|
|
59
62
|
}
|
|
60
63
|
}
|
|
61
64
|
|
|
62
65
|
if (!value) {
|
|
63
66
|
// for text content only
|
|
64
|
-
value = $
|
|
67
|
+
value = toString($value).trim().replace(/ {3}/g, ',');
|
|
65
68
|
}
|
|
66
69
|
|
|
67
70
|
if (!value) {
|
|
68
71
|
// check for value inside link
|
|
69
|
-
const $a =
|
|
72
|
+
const $a = select('a', $value);
|
|
70
73
|
if ($a) {
|
|
71
|
-
value = $a.
|
|
74
|
+
value = $a.properties.href;
|
|
72
75
|
}
|
|
73
76
|
}
|
|
74
77
|
if (!value) {
|
|
75
78
|
// check for value inside img
|
|
76
|
-
const $img =
|
|
79
|
+
const $img = select('img', $value);
|
|
77
80
|
if ($img) {
|
|
78
81
|
// strip query string
|
|
79
|
-
value = $img.
|
|
82
|
+
value = $img.properties.src;
|
|
80
83
|
}
|
|
81
84
|
}
|
|
82
85
|
if (value) {
|
|
@@ -91,15 +94,17 @@ function readBlockConfig($block) {
|
|
|
91
94
|
|
|
92
95
|
/**
|
|
93
96
|
* Looks for metadata in the document.
|
|
94
|
-
* @param {
|
|
97
|
+
* @param {Root} document The hast document
|
|
95
98
|
* @return {object} The metadata
|
|
96
99
|
*/
|
|
97
100
|
function getLocalMetadata(document) {
|
|
98
101
|
let metaConfig = {};
|
|
99
|
-
const metaBlock =
|
|
102
|
+
const metaBlock = select('div.metadata', document);
|
|
100
103
|
if (metaBlock) {
|
|
101
104
|
metaConfig = readBlockConfig(metaBlock);
|
|
102
|
-
|
|
105
|
+
// TODO: here we should also remove the parent div of the former table, otherwise it results
|
|
106
|
+
// TODO: in an empty <div></div>
|
|
107
|
+
remove(document, { cascade: false }, metaBlock);
|
|
103
108
|
}
|
|
104
109
|
return metaConfig;
|
|
105
110
|
}
|
|
@@ -118,6 +123,27 @@ function optimizeMetaImage(pagePath, imgUrl) {
|
|
|
118
123
|
return src;
|
|
119
124
|
}
|
|
120
125
|
|
|
126
|
+
/**
|
|
127
|
+
* Extracts the description from the document. note, that the selectAll('div > p') used in
|
|
128
|
+
* jsdom doesn't work as expected in hast
|
|
129
|
+
* @param {Root} hast
|
|
130
|
+
* @see https://github.com/syntax-tree/unist/discussions/66
|
|
131
|
+
*/
|
|
132
|
+
function extractDescription(hast) {
|
|
133
|
+
let desc = '';
|
|
134
|
+
visit(hast, (node, idx, parent) => {
|
|
135
|
+
if (parent?.tagName === 'div' && node.tagName === 'p') {
|
|
136
|
+
const words = toString(node).trim().split(/\s+/);
|
|
137
|
+
if (words.length >= 10 || words.some((w) => w.length > 25 && !w.startsWith('http'))) {
|
|
138
|
+
desc = `${words.slice(0, 25).join(' ')}${words.length > 25 ? ' ...' : ''}`;
|
|
139
|
+
return EXIT;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
return CONTINUE;
|
|
143
|
+
});
|
|
144
|
+
return desc;
|
|
145
|
+
}
|
|
146
|
+
|
|
121
147
|
/**
|
|
122
148
|
* Extracts the metadata and stores it in the content meta
|
|
123
149
|
* @type PipelineStep
|
|
@@ -126,13 +152,13 @@ function optimizeMetaImage(pagePath, imgUrl) {
|
|
|
126
152
|
*/
|
|
127
153
|
export default function extractMetaData(state, req) {
|
|
128
154
|
const { content } = state;
|
|
129
|
-
const { meta,
|
|
155
|
+
const { meta, hast } = content;
|
|
130
156
|
|
|
131
157
|
// extract global metadata from spreadsheet, and overlay
|
|
132
158
|
// with local metadata from document
|
|
133
159
|
const metaConfig = Object.assign(
|
|
134
160
|
filterGlobalMetadata(state.metadata, state.info.path),
|
|
135
|
-
getLocalMetadata(
|
|
161
|
+
getLocalMetadata(hast),
|
|
136
162
|
);
|
|
137
163
|
|
|
138
164
|
// first process supported metadata properties
|
|
@@ -173,25 +199,16 @@ export default function extractMetaData(state, req) {
|
|
|
173
199
|
if (!meta.title) {
|
|
174
200
|
// content.title is not correct if the h1 is in a page-block since the pipeline
|
|
175
201
|
// only respects the heading nodes in the mdast
|
|
176
|
-
const $title =
|
|
202
|
+
const $title = select('div h1', hast);
|
|
177
203
|
if ($title) {
|
|
178
|
-
content.title = $title
|
|
204
|
+
content.title = toString($title);
|
|
179
205
|
}
|
|
180
206
|
meta.title = content.title;
|
|
181
207
|
}
|
|
182
208
|
if (!meta.description) {
|
|
183
|
-
|
|
184
|
-
let desc = [];
|
|
185
|
-
document.querySelectorAll('div > p').forEach((p) => {
|
|
186
|
-
if (desc.length === 0) {
|
|
187
|
-
const words = p.textContent.trim().split(/\s+/);
|
|
188
|
-
if (words.length >= 10 || words.some((w) => w.length > 25 && !w.startsWith('http'))) {
|
|
189
|
-
desc = desc.concat(words);
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
});
|
|
193
|
-
meta.description = `${desc.slice(0, 25).join(' ')}${desc.length > 25 ? ' ...' : ''}`;
|
|
209
|
+
meta.description = extractDescription(hast);
|
|
194
210
|
}
|
|
211
|
+
|
|
195
212
|
// use the req.url and not the state.info.path in case of folder mapping
|
|
196
213
|
meta.url = makeCanonicalHtmlUrl(getAbsoluteUrl(req.headers, req.url.pathname));
|
|
197
214
|
if (!meta.canonical) {
|
|
@@ -200,11 +217,11 @@ export default function extractMetaData(state, req) {
|
|
|
200
217
|
|
|
201
218
|
// content.image is not correct if the first image is in a page-block. since the pipeline
|
|
202
219
|
// only respects the image nodes in the mdast
|
|
203
|
-
const $hero =
|
|
220
|
+
const $hero = select('div img', hast);
|
|
204
221
|
if ($hero) {
|
|
205
|
-
content.image = $hero.src;
|
|
206
|
-
if ($hero.alt) {
|
|
207
|
-
content.imageAlt = $hero.alt;
|
|
222
|
+
content.image = $hero.properties.src;
|
|
223
|
+
if ($hero.properties.alt) {
|
|
224
|
+
content.imageAlt = $hero.properties.alt;
|
|
208
225
|
}
|
|
209
226
|
}
|
|
210
227
|
|
|
@@ -9,6 +9,8 @@
|
|
|
9
9
|
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
|
+
import { selectAll } from 'hast-util-select';
|
|
13
|
+
import { h } from 'hastscript';
|
|
12
14
|
import { wrapContent } from './utils.js';
|
|
13
15
|
|
|
14
16
|
/**
|
|
@@ -17,20 +19,17 @@ import { wrapContent } from './utils.js';
|
|
|
17
19
|
* @param {PipelineContent} content
|
|
18
20
|
*/
|
|
19
21
|
export default async function fixSections({ content }) {
|
|
20
|
-
const {
|
|
21
|
-
const $sections =
|
|
22
|
+
const { hast } = content;
|
|
23
|
+
const $sections = selectAll('div', hast);
|
|
22
24
|
|
|
23
25
|
// if there are no sections wrap everything in a div with appropriate class names from meta
|
|
24
26
|
if ($sections.length === 0) {
|
|
25
|
-
const $outerDiv =
|
|
27
|
+
const $outerDiv = h('div');
|
|
26
28
|
if (content.meta && content.meta.class) {
|
|
27
|
-
content.meta.class.split(/[ ,]/)
|
|
29
|
+
$outerDiv.properties.className = content.meta.class.split(/[ ,]/)
|
|
28
30
|
.map((c) => c.trim())
|
|
29
|
-
.filter((c) => !!c)
|
|
30
|
-
.forEach((c) => {
|
|
31
|
-
$outerDiv.classList.add(c);
|
|
32
|
-
});
|
|
31
|
+
.filter((c) => !!c);
|
|
33
32
|
}
|
|
34
|
-
wrapContent($outerDiv,
|
|
33
|
+
wrapContent($outerDiv, hast);
|
|
35
34
|
}
|
|
36
35
|
}
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
import { select, selectAll } from 'unist-util-select';
|
|
13
13
|
import { toString as plain } from 'mdast-util-to-string';
|
|
14
|
+
import { rewriteBlobLink } from './utils.js';
|
|
14
15
|
|
|
15
16
|
function yaml(section) {
|
|
16
17
|
section.meta = selectAll('yaml', section)
|
|
@@ -39,13 +40,13 @@ function image(section) {
|
|
|
39
40
|
// TODO: get a better measure of prominence than "first"
|
|
40
41
|
const img = select('image', section);
|
|
41
42
|
if (img) {
|
|
42
|
-
section.image = img.url;
|
|
43
|
+
section.image = rewriteBlobLink(img.url);
|
|
43
44
|
}
|
|
44
45
|
}
|
|
45
46
|
|
|
46
47
|
/**
|
|
47
|
-
* Construct the strings corresponding to the number of
|
|
48
|
-
* @param {Object} typecounter Type as a key, number of
|
|
48
|
+
* Construct the strings corresponding to the number of occurrences per type.
|
|
49
|
+
* @param {Object} typecounter Type as a key, number of occurrences as value
|
|
49
50
|
*/
|
|
50
51
|
function constructTypes(typecounter) {
|
|
51
52
|
const types = Object.keys(typecounter).map((type) => `has-${type}`); // has-{type}
|
|
@@ -71,7 +72,7 @@ function constructTypes(typecounter) {
|
|
|
71
72
|
* 1. has-<type> for every type of content found in the section
|
|
72
73
|
* 2. is-<type>-only for sections that have only content of type
|
|
73
74
|
* 3. is-<type1>-<type2>-<type3> ranks the top three most common types of content
|
|
74
|
-
* 4. nb-<type>-<
|
|
75
|
+
* 4. nb-<type>-<nb_occurrences> is the number of occurrences per type
|
|
75
76
|
* @param {*} section
|
|
76
77
|
*/
|
|
77
78
|
function sectiontype(section) {
|