@docusaurus/utils 2.0.0-beta.17 → 2.0.0-beta.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/constants.d.ts +49 -1
- package/lib/constants.d.ts.map +1 -1
- package/lib/constants.js +53 -8
- package/lib/constants.js.map +1 -1
- package/lib/dataFileUtils.d.ts +38 -2
- package/lib/dataFileUtils.d.ts.map +1 -1
- package/lib/dataFileUtils.js +34 -8
- package/lib/dataFileUtils.js.map +1 -1
- package/lib/emitUtils.d.ts +12 -0
- package/lib/emitUtils.d.ts.map +1 -1
- package/lib/emitUtils.js +24 -7
- package/lib/emitUtils.js.map +1 -1
- package/lib/gitUtils.d.ts +54 -5
- package/lib/gitUtils.d.ts.map +1 -1
- package/lib/gitUtils.js +14 -11
- package/lib/gitUtils.js.map +1 -1
- package/lib/globUtils.d.ts +27 -0
- package/lib/globUtils.d.ts.map +1 -1
- package/lib/globUtils.js +28 -10
- package/lib/globUtils.js.map +1 -1
- package/lib/hashUtils.d.ts +5 -4
- package/lib/hashUtils.d.ts.map +1 -1
- package/lib/hashUtils.js +6 -5
- package/lib/hashUtils.js.map +1 -1
- package/lib/i18nUtils.d.ts +11 -0
- package/lib/i18nUtils.d.ts.map +1 -1
- package/lib/i18nUtils.js +12 -3
- package/lib/i18nUtils.js.map +1 -1
- package/lib/index.d.ts +7 -7
- package/lib/index.d.ts.map +1 -1
- package/lib/index.js +16 -11
- package/lib/index.js.map +1 -1
- package/lib/jsUtils.d.ts +32 -4
- package/lib/jsUtils.d.ts.map +1 -1
- package/lib/jsUtils.js +35 -13
- package/lib/jsUtils.js.map +1 -1
- package/lib/markdownLinks.d.ts +48 -5
- package/lib/markdownLinks.d.ts.map +1 -1
- package/lib/markdownLinks.js +21 -11
- package/lib/markdownLinks.js.map +1 -1
- package/lib/markdownUtils.d.ts +112 -0
- package/lib/markdownUtils.d.ts.map +1 -0
- package/lib/markdownUtils.js +271 -0
- package/lib/markdownUtils.js.map +1 -0
- package/lib/pathUtils.d.ts +1 -1
- package/lib/pathUtils.d.ts.map +1 -1
- package/lib/pathUtils.js +2 -2
- package/lib/pathUtils.js.map +1 -1
- package/lib/slugger.d.ts +10 -0
- package/lib/slugger.d.ts.map +1 -1
- package/lib/slugger.js +4 -0
- package/lib/slugger.js.map +1 -1
- package/lib/tags.d.ts +31 -10
- package/lib/tags.d.ts.map +1 -1
- package/lib/tags.js +38 -23
- package/lib/tags.js.map +1 -1
- package/lib/urlUtils.d.ts +48 -2
- package/lib/urlUtils.d.ts.map +1 -1
- package/lib/urlUtils.js +81 -9
- package/lib/urlUtils.js.map +1 -1
- package/lib/webpackUtils.d.ts +5 -0
- package/lib/webpackUtils.d.ts.map +1 -1
- package/lib/webpackUtils.js +6 -2
- package/lib/webpackUtils.js.map +1 -1
- package/package.json +9 -9
- package/src/constants.ts +61 -9
- package/src/dataFileUtils.ts +43 -11
- package/src/emitUtils.ts +26 -9
- package/src/gitUtils.ts +76 -16
- package/src/globUtils.ts +29 -13
- package/src/hashUtils.ts +6 -5
- package/src/i18nUtils.ts +13 -4
- package/src/index.ts +14 -8
- package/src/jsUtils.ts +34 -20
- package/src/markdownLinks.ts +64 -27
- package/src/markdownUtils.ts +354 -0
- package/src/pathUtils.ts +2 -2
- package/src/slugger.ts +13 -1
- package/src/tags.ts +39 -27
- package/src/urlUtils.ts +96 -10
- package/src/webpackUtils.ts +10 -2
- package/lib/markdownParser.d.ts +0 -32
- package/lib/markdownParser.d.ts.map +0 -1
- package/lib/markdownParser.js +0 -160
- package/lib/markdownParser.js.map +0 -1
- package/src/markdownParser.ts +0 -201
package/src/markdownLinks.ts
CHANGED
|
@@ -6,41 +6,79 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import path from 'path';
|
|
9
|
+
import {getContentPathList} from './dataFileUtils';
|
|
9
10
|
import {aliasedSitePath} from './pathUtils';
|
|
10
11
|
|
|
12
|
+
/**
|
|
13
|
+
* Content plugins have a base path and a localized path to source content from.
|
|
14
|
+
* We will look into the localized path in priority.
|
|
15
|
+
*/
|
|
11
16
|
export type ContentPaths = {
|
|
17
|
+
/**
|
|
18
|
+
* The absolute path to the base content directory, like `"<siteDir>/docs"`.
|
|
19
|
+
*/
|
|
12
20
|
contentPath: string;
|
|
21
|
+
/**
|
|
22
|
+
* The absolute path to the localized content directory, like
|
|
23
|
+
* `"<siteDir>/i18n/zh-Hans/plugin-content-docs"`.
|
|
24
|
+
*/
|
|
13
25
|
contentPathLocalized: string;
|
|
14
26
|
};
|
|
15
27
|
|
|
28
|
+
/** Data structure representing each broken Markdown link to be reported. */
|
|
16
29
|
export type BrokenMarkdownLink<T extends ContentPaths> = {
|
|
30
|
+
/** Absolute path to the file containing this link. */
|
|
17
31
|
filePath: string;
|
|
32
|
+
/**
|
|
33
|
+
* This is generic because it may contain extra metadata like version name,
|
|
34
|
+
* which the reporter can provide for context.
|
|
35
|
+
*/
|
|
18
36
|
contentPaths: T;
|
|
37
|
+
/**
|
|
38
|
+
* The content of the link, like `"./brokenFile.md"`
|
|
39
|
+
*/
|
|
19
40
|
link: string;
|
|
20
41
|
};
|
|
21
42
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
};
|
|
34
|
-
|
|
43
|
+
/**
|
|
44
|
+
* Takes a Markdown file and replaces relative file references with their URL
|
|
45
|
+
* counterparts, e.g. `[link](./intro.md)` => `[link](/docs/intro)`, preserving
|
|
46
|
+
* everything else.
|
|
47
|
+
*
|
|
48
|
+
* This method uses best effort to find a matching file. The file reference can
|
|
49
|
+
* be relative to the directory of the current file (most likely) or any of the
|
|
50
|
+
* content paths (so `/tutorials/intro.md` can be resolved as
|
|
51
|
+
* `<siteDir>/docs/tutorials/intro.md`). Links that contain the `http(s):` or
|
|
52
|
+
* `@site/` prefix will always be ignored.
|
|
53
|
+
*/
|
|
35
54
|
export function replaceMarkdownLinks<T extends ContentPaths>({
|
|
36
55
|
siteDir,
|
|
37
56
|
fileString,
|
|
38
57
|
filePath,
|
|
39
58
|
contentPaths,
|
|
40
59
|
sourceToPermalink,
|
|
41
|
-
}:
|
|
42
|
-
|
|
43
|
-
|
|
60
|
+
}: {
|
|
61
|
+
/** Absolute path to the site directory, used to resolve aliased paths. */
|
|
62
|
+
siteDir: string;
|
|
63
|
+
/** The Markdown file content to be processed. */
|
|
64
|
+
fileString: string;
|
|
65
|
+
/** Absolute path to the current file containing `fileString`. */
|
|
66
|
+
filePath: string;
|
|
67
|
+
/** The content paths which the file reference may live in. */
|
|
68
|
+
contentPaths: T;
|
|
69
|
+
/**
|
|
70
|
+
* A map from source paths to their URLs. Source paths are `@site` aliased.
|
|
71
|
+
*/
|
|
72
|
+
sourceToPermalink: {[aliasedPath: string]: string};
|
|
73
|
+
}): {
|
|
74
|
+
/**
|
|
75
|
+
* The content with all Markdown file references replaced with their URLs.
|
|
76
|
+
* Unresolved links are left as-is.
|
|
77
|
+
*/
|
|
78
|
+
newContent: string;
|
|
79
|
+
/** The list of broken links, */
|
|
80
|
+
brokenMarkdownLinks: BrokenMarkdownLink<T>[];
|
|
81
|
+
} {
|
|
44
82
|
const brokenMarkdownLinks: BrokenMarkdownLink<T>[] = [];
|
|
45
83
|
|
|
46
84
|
// Replace internal markdown linking (except in fenced blocks).
|
|
@@ -48,12 +86,13 @@ export function replaceMarkdownLinks<T extends ContentPaths>({
|
|
|
48
86
|
let lastCodeFence = '';
|
|
49
87
|
const lines = fileString.split('\n').map((line) => {
|
|
50
88
|
if (line.trim().startsWith('```')) {
|
|
89
|
+
const codeFence = line.trim().match(/^`+/)![0]!;
|
|
51
90
|
if (!fencedBlock) {
|
|
52
91
|
fencedBlock = true;
|
|
53
|
-
|
|
92
|
+
lastCodeFence = codeFence;
|
|
54
93
|
// If we are in a ````-fenced block, all ``` would be plain text instead
|
|
55
94
|
// of fences
|
|
56
|
-
} else if (
|
|
95
|
+
} else if (codeFence.length >= lastCodeFence.length) {
|
|
57
96
|
fencedBlock = false;
|
|
58
97
|
}
|
|
59
98
|
}
|
|
@@ -63,21 +102,19 @@ export function replaceMarkdownLinks<T extends ContentPaths>({
|
|
|
63
102
|
|
|
64
103
|
let modifiedLine = line;
|
|
65
104
|
// Replace inline-style links or reference-style links e.g:
|
|
66
|
-
// This is [Document 1](doc1.md)
|
|
67
|
-
//
|
|
68
|
-
// [doc1]: doc1.md -> we replace this doc1.md with correct link
|
|
105
|
+
// This is [Document 1](doc1.md)
|
|
106
|
+
// [doc1]: doc1.md
|
|
69
107
|
const mdRegex =
|
|
70
|
-
/(
|
|
108
|
+
/(?:\]\(|\]:\s*)(?!https?:\/\/|@site\/)(?<filename>[^'")\]\s>]+\.mdx?)/g;
|
|
71
109
|
let mdMatch = mdRegex.exec(modifiedLine);
|
|
72
110
|
while (mdMatch !== null) {
|
|
73
111
|
// Replace it to correct html link.
|
|
74
|
-
const mdLink = mdMatch.groups!.filename
|
|
112
|
+
const mdLink = mdMatch.groups!.filename!;
|
|
75
113
|
|
|
76
114
|
const sourcesToTry = [
|
|
77
|
-
path.
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
];
|
|
115
|
+
path.dirname(filePath),
|
|
116
|
+
...getContentPathList(contentPaths),
|
|
117
|
+
].map((p) => path.join(p, decodeURIComponent(mdLink)));
|
|
81
118
|
|
|
82
119
|
const aliasedSourceMatch = sourcesToTry
|
|
83
120
|
.map((source) => aliasedSitePath(source, siteDir))
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import logger from '@docusaurus/logger';
|
|
9
|
+
import matter from 'gray-matter';
|
|
10
|
+
import {createSlugger, type Slugger, type SluggerOptions} from './slugger';
|
|
11
|
+
|
|
12
|
+
// Some utilities for parsing Markdown content. These things are only used on
|
|
13
|
+
// server-side when we infer metadata like `title` and `description` from the
|
|
14
|
+
// content. Most parsing is still done in MDX through the mdx-loader.
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Parses custom ID from a heading. The ID must be composed of letters,
|
|
18
|
+
* underscores, and dashes only.
|
|
19
|
+
*
|
|
20
|
+
* @param heading e.g. `## Some heading {#some-heading}` where the last
|
|
21
|
+
* character must be `}` for the ID to be recognized
|
|
22
|
+
*/
|
|
23
|
+
export function parseMarkdownHeadingId(heading: string): {
|
|
24
|
+
/**
|
|
25
|
+
* The heading content sans the ID part, right-trimmed. e.g. `## Some heading`
|
|
26
|
+
*/
|
|
27
|
+
text: string;
|
|
28
|
+
/** The heading ID. e.g. `some-heading` */
|
|
29
|
+
id?: string;
|
|
30
|
+
} {
|
|
31
|
+
const customHeadingIdRegex = /\s*\{#(?<id>[\w-]+)\}$/;
|
|
32
|
+
const matches = customHeadingIdRegex.exec(heading);
|
|
33
|
+
if (matches) {
|
|
34
|
+
return {
|
|
35
|
+
text: heading.replace(matches[0]!, ''),
|
|
36
|
+
id: matches.groups!.id!,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
return {text: heading, id: undefined};
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// TODO: Find a better way to do so, possibly by compiling the Markdown content,
|
|
43
|
+
// stripping out HTML tags and obtaining the first line.
|
|
44
|
+
/**
|
|
45
|
+
* Creates an excerpt of a Markdown file. This function will:
|
|
46
|
+
*
|
|
47
|
+
* - Ignore h1 headings (setext or atx)
|
|
48
|
+
* - Ignore import/export
|
|
49
|
+
* - Ignore code blocks
|
|
50
|
+
*
|
|
51
|
+
* And for the first contentful line, it will strip away most Markdown
|
|
52
|
+
* syntax, including HTML tags, emphasis, links (keeping the text), etc.
|
|
53
|
+
*/
|
|
54
|
+
export function createExcerpt(fileString: string): string | undefined {
|
|
55
|
+
const fileLines = fileString
|
|
56
|
+
.trimStart()
|
|
57
|
+
// Remove Markdown alternate title
|
|
58
|
+
.replace(/^[^\n]*\n[=]+/g, '')
|
|
59
|
+
.split('\n');
|
|
60
|
+
let inCode = false;
|
|
61
|
+
let inImport = false;
|
|
62
|
+
let lastCodeFence = '';
|
|
63
|
+
|
|
64
|
+
for (const fileLine of fileLines) {
|
|
65
|
+
if (fileLine === '' && inImport) {
|
|
66
|
+
inImport = false;
|
|
67
|
+
}
|
|
68
|
+
// Skip empty line.
|
|
69
|
+
if (!fileLine.trim()) {
|
|
70
|
+
continue;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Skip import/export declaration.
|
|
74
|
+
if ((/^(?:import|export)\s.*/.test(fileLine) || inImport) && !inCode) {
|
|
75
|
+
inImport = true;
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Skip code block line.
|
|
80
|
+
if (fileLine.trim().startsWith('```')) {
|
|
81
|
+
const codeFence = fileLine.trim().match(/^`+/)![0]!;
|
|
82
|
+
if (!inCode) {
|
|
83
|
+
inCode = true;
|
|
84
|
+
lastCodeFence = codeFence;
|
|
85
|
+
// If we are in a ````-fenced block, all ``` would be plain text instead
|
|
86
|
+
// of fences
|
|
87
|
+
} else if (codeFence.length >= lastCodeFence.length) {
|
|
88
|
+
inCode = false;
|
|
89
|
+
}
|
|
90
|
+
continue;
|
|
91
|
+
} else if (inCode) {
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const cleanedLine = fileLine
|
|
96
|
+
// Remove HTML tags.
|
|
97
|
+
.replace(/<[^>]*>/g, '')
|
|
98
|
+
// Remove Title headers
|
|
99
|
+
.replace(/^#[^#]+#?/gm, '')
|
|
100
|
+
// Remove Markdown + ATX-style headers
|
|
101
|
+
.replace(/^#{1,6}\s*(?<text>[^#]*)\s*#{0,6}/gm, '$1')
|
|
102
|
+
// Remove emphasis.
|
|
103
|
+
.replace(/(?<opening>[*_]{1,3})(?<text>.*?)\1/g, '$2')
|
|
104
|
+
// Remove strikethroughs.
|
|
105
|
+
.replace(/~~(?<text>\S.*\S)~~/g, '$1')
|
|
106
|
+
// Remove images.
|
|
107
|
+
.replace(/!\[(?<alt>.*?)\][[(].*?[\])]/g, '$1')
|
|
108
|
+
// Remove footnotes.
|
|
109
|
+
.replace(/\[\^.+?\](?:: .*$)?/g, '')
|
|
110
|
+
// Remove inline links.
|
|
111
|
+
.replace(/\[(?<alt>.*?)\][[(].*?[\])]/g, '$1')
|
|
112
|
+
// Remove inline code.
|
|
113
|
+
.replace(/`(?<text>.+?)`/g, '$1')
|
|
114
|
+
// Remove blockquotes.
|
|
115
|
+
.replace(/^\s{0,3}>\s?/g, '')
|
|
116
|
+
// Remove admonition definition.
|
|
117
|
+
.replace(/:::.*/, '')
|
|
118
|
+
// Remove Emoji names within colons include preceding whitespace.
|
|
119
|
+
.replace(/\s?:(?:::|[^:\n])+:/g, '')
|
|
120
|
+
// Remove custom Markdown heading id.
|
|
121
|
+
.replace(/\{#*[\w-]+\}/, '')
|
|
122
|
+
.trim();
|
|
123
|
+
|
|
124
|
+
if (cleanedLine) {
|
|
125
|
+
return cleanedLine;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return undefined;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Takes a raw Markdown file content, and parses the front matter using
|
|
134
|
+
* gray-matter. Worth noting that gray-matter accepts TOML and other markup
|
|
135
|
+
* languages as well.
|
|
136
|
+
*
|
|
137
|
+
* @throws Throws when gray-matter throws. e.g.:
|
|
138
|
+
* ```md
|
|
139
|
+
* ---
|
|
140
|
+
* foo: : bar
|
|
141
|
+
* ---
|
|
142
|
+
* ```
|
|
143
|
+
*/
|
|
144
|
+
export function parseFrontMatter(markdownFileContent: string): {
|
|
145
|
+
/** Front matter as parsed by gray-matter. */
|
|
146
|
+
frontMatter: {[key: string]: unknown};
|
|
147
|
+
/** The remaining content, trimmed. */
|
|
148
|
+
content: string;
|
|
149
|
+
} {
|
|
150
|
+
const {data, content} = matter(markdownFileContent);
|
|
151
|
+
return {
|
|
152
|
+
frontMatter: data,
|
|
153
|
+
content: content.trim(),
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function toTextContentTitle(contentTitle: string): string {
|
|
158
|
+
if (contentTitle.startsWith('`') && contentTitle.endsWith('`')) {
|
|
159
|
+
return contentTitle.substring(1, contentTitle.length - 1);
|
|
160
|
+
}
|
|
161
|
+
return contentTitle;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
type ParseMarkdownContentTitleOptions = {
|
|
165
|
+
/**
|
|
166
|
+
* If `true`, the matching title will be removed from the returned content.
|
|
167
|
+
* We can promise that at least one empty line will be left between the
|
|
168
|
+
* content before and after, but you shouldn't make too much assumption
|
|
169
|
+
* about what's left.
|
|
170
|
+
*/
|
|
171
|
+
removeContentTitle?: boolean;
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Takes the raw Markdown content, without front matter, and tries to find an h1
|
|
176
|
+
* title (setext or atx) to be used as metadata.
|
|
177
|
+
*
|
|
178
|
+
* It only searches until the first contentful paragraph, ignoring import/export
|
|
179
|
+
* declarations.
|
|
180
|
+
*
|
|
181
|
+
* It will try to convert markdown to reasonable text, but won't be best effort,
|
|
182
|
+
* since it's only used as a fallback when `frontMatter.title` is not provided.
|
|
183
|
+
* For now, we just unwrap inline code (``# `config.js` `` => `config.js`).
|
|
184
|
+
*/
|
|
185
|
+
export function parseMarkdownContentTitle(
|
|
186
|
+
contentUntrimmed: string,
|
|
187
|
+
options?: ParseMarkdownContentTitleOptions,
|
|
188
|
+
): {
|
|
189
|
+
/** The content, optionally without the content title. */
|
|
190
|
+
content: string;
|
|
191
|
+
/** The title, trimmed and without the `#`. */
|
|
192
|
+
contentTitle: string | undefined;
|
|
193
|
+
} {
|
|
194
|
+
const removeContentTitleOption = options?.removeContentTitle ?? false;
|
|
195
|
+
|
|
196
|
+
const content = contentUntrimmed.trim();
|
|
197
|
+
// We only need to detect import statements that will be parsed by MDX as
|
|
198
|
+
// `import` nodes, as broken syntax can't render anyways. That means any block
|
|
199
|
+
// that has `import` at the very beginning and surrounded by empty lines.
|
|
200
|
+
const contentWithoutImport = content
|
|
201
|
+
.replace(/^(?:import\s(?:.|\n(?!\n))*\n{2,})*/, '')
|
|
202
|
+
.trim();
|
|
203
|
+
|
|
204
|
+
const regularTitleMatch = /^#[ \t]+(?<title>[^ \t].*)(?:\n|$)/.exec(
|
|
205
|
+
contentWithoutImport,
|
|
206
|
+
);
|
|
207
|
+
const alternateTitleMatch = /^(?<title>.*)\n=+(?:\n|$)/.exec(
|
|
208
|
+
contentWithoutImport,
|
|
209
|
+
);
|
|
210
|
+
|
|
211
|
+
const titleMatch = regularTitleMatch ?? alternateTitleMatch;
|
|
212
|
+
if (!titleMatch) {
|
|
213
|
+
return {content, contentTitle: undefined};
|
|
214
|
+
}
|
|
215
|
+
const newContent = removeContentTitleOption
|
|
216
|
+
? content.replace(titleMatch[0]!, '')
|
|
217
|
+
: content;
|
|
218
|
+
if (regularTitleMatch) {
|
|
219
|
+
return {
|
|
220
|
+
content: newContent.trim(),
|
|
221
|
+
contentTitle: toTextContentTitle(
|
|
222
|
+
regularTitleMatch
|
|
223
|
+
.groups!.title!.trim()
|
|
224
|
+
.replace(/\s*(?:\{#*[\w-]+\}|#+)$/, ''),
|
|
225
|
+
).trim(),
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
return {
|
|
229
|
+
content: newContent.trim(),
|
|
230
|
+
contentTitle: toTextContentTitle(
|
|
231
|
+
alternateTitleMatch!.groups!.title!.trim().replace(/\s*=+$/, ''),
|
|
232
|
+
).trim(),
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Makes a full-round parse.
|
|
238
|
+
*
|
|
239
|
+
* @throws Throws when `parseFrontMatter` throws, usually because of invalid
|
|
240
|
+
* syntax.
|
|
241
|
+
*/
|
|
242
|
+
export function parseMarkdownString(
|
|
243
|
+
markdownFileContent: string,
|
|
244
|
+
options?: ParseMarkdownContentTitleOptions,
|
|
245
|
+
): {
|
|
246
|
+
/** @see {@link parseFrontMatter} */
|
|
247
|
+
frontMatter: {[key: string]: unknown};
|
|
248
|
+
/** @see {@link parseMarkdownContentTitle} */
|
|
249
|
+
contentTitle: string | undefined;
|
|
250
|
+
/** @see {@link createExcerpt} */
|
|
251
|
+
excerpt: string | undefined;
|
|
252
|
+
/**
|
|
253
|
+
* Content without front matter and (optionally) without title, depending on
|
|
254
|
+
* the `removeContentTitle` option.
|
|
255
|
+
*/
|
|
256
|
+
content: string;
|
|
257
|
+
} {
|
|
258
|
+
try {
|
|
259
|
+
const {frontMatter, content: contentWithoutFrontMatter} =
|
|
260
|
+
parseFrontMatter(markdownFileContent);
|
|
261
|
+
|
|
262
|
+
const {content, contentTitle} = parseMarkdownContentTitle(
|
|
263
|
+
contentWithoutFrontMatter,
|
|
264
|
+
options,
|
|
265
|
+
);
|
|
266
|
+
|
|
267
|
+
const excerpt = createExcerpt(content);
|
|
268
|
+
|
|
269
|
+
return {
|
|
270
|
+
frontMatter,
|
|
271
|
+
content,
|
|
272
|
+
contentTitle,
|
|
273
|
+
excerpt,
|
|
274
|
+
};
|
|
275
|
+
} catch (err) {
|
|
276
|
+
logger.error(`Error while parsing Markdown front matter.
|
|
277
|
+
This can happen if you use special characters in front matter values (try using double quotes around that value).`);
|
|
278
|
+
throw err;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
function unwrapMarkdownLinks(line: string): string {
|
|
283
|
+
return line.replace(/\[(?<alt>[^\]]+)\]\([^)]+\)/g, (match, p1) => p1);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
function addHeadingId(
|
|
287
|
+
line: string,
|
|
288
|
+
slugger: Slugger,
|
|
289
|
+
maintainCase: boolean,
|
|
290
|
+
): string {
|
|
291
|
+
let headingLevel = 0;
|
|
292
|
+
while (line.charAt(headingLevel) === '#') {
|
|
293
|
+
headingLevel += 1;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
const headingText = line.slice(headingLevel).trimEnd();
|
|
297
|
+
const headingHashes = line.slice(0, headingLevel);
|
|
298
|
+
const slug = slugger.slug(unwrapMarkdownLinks(headingText).trim(), {
|
|
299
|
+
maintainCase,
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
return `${headingHashes}${headingText} {#${slug}}`;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
export type WriteHeadingIDOptions = SluggerOptions & {
|
|
306
|
+
/** Overwrite existing heading IDs. */
|
|
307
|
+
overwrite?: boolean;
|
|
308
|
+
};
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* Takes Markdown content, returns new content with heading IDs written.
|
|
312
|
+
* Respects existing IDs (unless `overwrite=true`) and never generates colliding
|
|
313
|
+
* IDs (through the slugger).
|
|
314
|
+
*/
|
|
315
|
+
export function writeMarkdownHeadingId(
|
|
316
|
+
content: string,
|
|
317
|
+
options: WriteHeadingIDOptions = {maintainCase: false, overwrite: false},
|
|
318
|
+
): string {
|
|
319
|
+
const {maintainCase = false, overwrite = false} = options;
|
|
320
|
+
const lines = content.split('\n');
|
|
321
|
+
const slugger = createSlugger();
|
|
322
|
+
|
|
323
|
+
// If we can't overwrite existing slugs, make sure other headings don't
|
|
324
|
+
// generate colliding slugs by first marking these slugs as occupied
|
|
325
|
+
if (!overwrite) {
|
|
326
|
+
lines.forEach((line) => {
|
|
327
|
+
const parsedHeading = parseMarkdownHeadingId(line);
|
|
328
|
+
if (parsedHeading.id) {
|
|
329
|
+
slugger.slug(parsedHeading.id);
|
|
330
|
+
}
|
|
331
|
+
});
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
let inCode = false;
|
|
335
|
+
return lines
|
|
336
|
+
.map((line) => {
|
|
337
|
+
if (line.startsWith('```')) {
|
|
338
|
+
inCode = !inCode;
|
|
339
|
+
return line;
|
|
340
|
+
}
|
|
341
|
+
// Ignore h1 headings, as we don't create anchor links for those
|
|
342
|
+
if (inCode || !line.startsWith('##')) {
|
|
343
|
+
return line;
|
|
344
|
+
}
|
|
345
|
+
const parsedHeading = parseMarkdownHeadingId(line);
|
|
346
|
+
|
|
347
|
+
// Do not process if id is already there
|
|
348
|
+
if (parsedHeading.id && !overwrite) {
|
|
349
|
+
return line;
|
|
350
|
+
}
|
|
351
|
+
return addHeadingId(parsedHeading.text, slugger, maintainCase);
|
|
352
|
+
})
|
|
353
|
+
.join('\n');
|
|
354
|
+
}
|
package/src/pathUtils.ts
CHANGED
|
@@ -24,7 +24,7 @@ export const isNameTooLong = (str: string): boolean =>
|
|
|
24
24
|
? str.length + SPACE_FOR_APPENDING > MAX_PATH_SEGMENT_CHARS // MacOS (APFS) and Windows (NTFS) filename length limit (255 chars)
|
|
25
25
|
: Buffer.from(str).length + SPACE_FOR_APPENDING > MAX_PATH_SEGMENT_BYTES; // Other (255 bytes)
|
|
26
26
|
|
|
27
|
-
export
|
|
27
|
+
export function shortName(str: string): string {
|
|
28
28
|
if (isMacOs() || isWindows()) {
|
|
29
29
|
const overflowingChars = str.length - MAX_PATH_SEGMENT_CHARS;
|
|
30
30
|
return str.slice(
|
|
@@ -41,7 +41,7 @@ export const shortName = (str: string): string => {
|
|
|
41
41
|
Buffer.byteLength(strBuffer) - overflowingBytes - SPACE_FOR_APPENDING - 1,
|
|
42
42
|
)
|
|
43
43
|
.toString();
|
|
44
|
-
}
|
|
44
|
+
}
|
|
45
45
|
|
|
46
46
|
/**
|
|
47
47
|
* Convert Windows backslash paths to posix style paths.
|
package/src/slugger.ts
CHANGED
|
@@ -10,12 +10,24 @@ import GithubSlugger from 'github-slugger';
|
|
|
10
10
|
// We create our own abstraction on top of the lib:
|
|
11
11
|
// - unify usage everywhere in the codebase
|
|
12
12
|
// - ability to add extra options
|
|
13
|
-
export type SluggerOptions = {
|
|
13
|
+
export type SluggerOptions = {
|
|
14
|
+
/** Keep the headings' casing, otherwise make all lowercase. */
|
|
15
|
+
maintainCase?: boolean;
|
|
16
|
+
};
|
|
14
17
|
|
|
15
18
|
export type Slugger = {
|
|
19
|
+
/**
|
|
20
|
+
* Takes a Markdown heading like "Josh Cena" and sluggifies it according to
|
|
21
|
+
* GitHub semantics (in this case `josh-cena`). Stateful, because if you try
|
|
22
|
+
* to sluggify "Josh Cena" again it would return `josh-cena-1`.
|
|
23
|
+
*/
|
|
16
24
|
slug: (value: string, options?: SluggerOptions) => string;
|
|
17
25
|
};
|
|
18
26
|
|
|
27
|
+
/**
|
|
28
|
+
* A thin wrapper around github-slugger. This is a factory function that returns
|
|
29
|
+
* a stateful Slugger object.
|
|
30
|
+
*/
|
|
19
31
|
export function createSlugger(): Slugger {
|
|
20
32
|
const githubSlugger = new GithubSlugger();
|
|
21
33
|
return {
|
package/src/tags.ts
CHANGED
|
@@ -10,12 +10,13 @@ import {normalizeUrl} from './urlUtils';
|
|
|
10
10
|
|
|
11
11
|
export type Tag = {
|
|
12
12
|
label: string;
|
|
13
|
+
/** Permalink to this tag's page, without the `/tags/` base path. */
|
|
13
14
|
permalink: string;
|
|
14
15
|
};
|
|
15
16
|
|
|
16
17
|
export type FrontMatterTag = string | Tag;
|
|
17
18
|
|
|
18
|
-
|
|
19
|
+
function normalizeFrontMatterTag(
|
|
19
20
|
tagsPath: string,
|
|
20
21
|
frontMatterTag: FrontMatterTag,
|
|
21
22
|
): Tag {
|
|
@@ -45,8 +46,19 @@ export function normalizeFrontMatterTag(
|
|
|
45
46
|
};
|
|
46
47
|
}
|
|
47
48
|
|
|
49
|
+
/**
|
|
50
|
+
* Takes tag objects as they are defined in front matter, and normalizes each
|
|
51
|
+
* into a standard tag object. The permalink is created by appending the
|
|
52
|
+
* sluggified label to `tagsPath`. Front matter tags already containing
|
|
53
|
+
* permalinks would still have `tagsPath` prepended.
|
|
54
|
+
*
|
|
55
|
+
* The result will always be unique by permalinks. The behavior with colliding
|
|
56
|
+
* permalinks is undetermined.
|
|
57
|
+
*/
|
|
48
58
|
export function normalizeFrontMatterTags(
|
|
59
|
+
/** Base path to append the tag permalinks to. */
|
|
49
60
|
tagsPath: string,
|
|
61
|
+
/** Can be `undefined`, so that we can directly pipe in `frontMatter.tags`. */
|
|
50
62
|
frontMatterTags: FrontMatterTag[] | undefined = [],
|
|
51
63
|
): Tag[] {
|
|
52
64
|
const tags = frontMatterTags.map((tag) =>
|
|
@@ -56,42 +68,42 @@ export function normalizeFrontMatterTags(
|
|
|
56
68
|
return _.uniqBy(tags, (tag) => tag.permalink);
|
|
57
69
|
}
|
|
58
70
|
|
|
59
|
-
|
|
71
|
+
type TaggedItemGroup<Item> = {
|
|
60
72
|
tag: Tag;
|
|
61
73
|
items: Item[];
|
|
62
74
|
};
|
|
63
75
|
|
|
64
76
|
/**
|
|
65
|
-
* Permits to group docs/
|
|
66
|
-
*
|
|
67
|
-
*
|
|
68
|
-
*
|
|
69
|
-
*
|
|
70
|
-
*
|
|
77
|
+
* Permits to group docs/blog posts by tag (provided by front matter).
|
|
78
|
+
*
|
|
79
|
+
* @returns a map from tag permalink to the items and other relevant tag data.
|
|
80
|
+
* The record is indexed by permalink, because routes must be unique in the end.
|
|
81
|
+
* Labels may vary on 2 MD files but they are normalized. Docs with
|
|
82
|
+
* label='some label' and label='some-label' should end up in the same page.
|
|
71
83
|
*/
|
|
72
84
|
export function groupTaggedItems<Item>(
|
|
73
|
-
items: Item[],
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
// the same tag but the permalink is different for each
|
|
82
|
-
// For now, the first tag found wins
|
|
83
|
-
result[tag.permalink] = result[tag.permalink] ?? {
|
|
84
|
-
tag,
|
|
85
|
-
items: [],
|
|
86
|
-
};
|
|
87
|
-
|
|
88
|
-
// Add item to group
|
|
89
|
-
result[tag.permalink].items.push(item);
|
|
90
|
-
}
|
|
85
|
+
items: readonly Item[],
|
|
86
|
+
/**
|
|
87
|
+
* A callback telling me how to get the tags list of the current item. Usually
|
|
88
|
+
* simply getting it from some metadata of the current item.
|
|
89
|
+
*/
|
|
90
|
+
getItemTags: (item: Item) => readonly Tag[],
|
|
91
|
+
): {[permalink: string]: TaggedItemGroup<Item>} {
|
|
92
|
+
const result: {[permalink: string]: TaggedItemGroup<Item>} = {};
|
|
91
93
|
|
|
92
94
|
items.forEach((item) => {
|
|
93
95
|
getItemTags(item).forEach((tag) => {
|
|
94
|
-
|
|
96
|
+
// Init missing tag groups
|
|
97
|
+
// TODO: it's not really clear what should be the behavior if 2 tags have
|
|
98
|
+
// the same permalink but the label is different for each
|
|
99
|
+
// For now, the first tag found wins
|
|
100
|
+
result[tag.permalink] ??= {
|
|
101
|
+
tag,
|
|
102
|
+
items: [],
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
// Add item to group
|
|
106
|
+
result[tag.permalink]!.items.push(item);
|
|
95
107
|
});
|
|
96
108
|
});
|
|
97
109
|
|