shelving 1.71.0 → 1.71.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/markup/index.d.ts +1 -0
- package/markup/index.js +1 -0
- package/markup/regexp.d.ts +39 -0
- package/markup/regexp.js +50 -0
- package/markup/rules.d.ts +30 -54
- package/markup/rules.js +38 -107
- package/package.json +1 -1
- package/util/debug.js +4 -4
- package/util/regexp.d.ts +6 -23
- package/util/regexp.js +6 -24
- package/util/string.d.ts +2 -1
- package/util/string.js +18 -28
package/markup/index.d.ts
CHANGED
package/markup/index.js
CHANGED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { Data } from "../util/data.js";
|
|
2
|
+
import { PossibleRegExp } from "../util/regexp.js";
|
|
3
|
+
import type { MarkupOptions } from "./options.js";
|
|
4
|
+
/** Subset of `NamedRegExpArray<T>` that are the only things we're required return from a `MarkupMatcher` function. */
|
|
5
|
+
export declare type MarkupMatch<T extends Data | undefined = Data | undefined> = {
|
|
6
|
+
0: string;
|
|
7
|
+
index: number;
|
|
8
|
+
groups: T;
|
|
9
|
+
};
|
|
10
|
+
/** Function that matches a string and returns a `MarkupMatch` or `null` or `void` */
|
|
11
|
+
export declare type MarkupMatcher<T extends Data | undefined = Data | undefined> = (input: string, options: MarkupOptions) => MarkupMatch<T> | null | void;
|
|
12
|
+
export declare const LINE_REGEXP: RegExp;
|
|
13
|
+
export declare const LINE_START_REGEXP: RegExp;
|
|
14
|
+
export declare const LINE_END_REGEXP: RegExp;
|
|
15
|
+
export declare const BLOCK_REGEXP: RegExp;
|
|
16
|
+
export declare const BLOCK_START_REGEXP: RegExp;
|
|
17
|
+
export declare const BLOCK_END_REGEXP: RegExp;
|
|
18
|
+
/** Create regular expression that matches a block of content. */
|
|
19
|
+
export declare function getBlockRegExp(content?: PossibleRegExp, end?: PossibleRegExp, start?: PossibleRegExp): RegExp;
|
|
20
|
+
/** Create regular expression that matches a line of content. */
|
|
21
|
+
export declare function getLineRegExp(content?: PossibleRegExp, end?: PossibleRegExp, start?: PossibleRegExp): RegExp;
|
|
22
|
+
/**
|
|
23
|
+
* Regular expression that only matches complete its pattern if it's a complete word.
|
|
24
|
+
* - Won't match if there are letters or numbers directly before/after the matched content.
|
|
25
|
+
* - Will match if there is punctuation before/after the matched content or it is at the start/end of the string.
|
|
26
|
+
* - e.g. `this` and `"this"` and `that this that` and `that (this) that` will match because `this` is a complete word.
|
|
27
|
+
* - e.g. `thatthis` and `thatthisthat` will not because `this` is only part of a complete word.
|
|
28
|
+
*
|
|
29
|
+
* @note This isn't guaranteed to work with `String.prototype.match()` and `String.prototype.replace()`
|
|
30
|
+
*
|
|
31
|
+
* @todo This can be much less complicated when Safari supports lookbehinds in regular expressions.
|
|
32
|
+
* - We use a negative lookahead for the end of the word and it works great.
|
|
33
|
+
* - If we could use a negative lookbehind for the start of the word we wouldn't need to create a function that offsets the start.
|
|
34
|
+
*/
|
|
35
|
+
export declare class WordRegExp extends RegExp {
|
|
36
|
+
constructor(pattern: string);
|
|
37
|
+
exec(input: string): RegExpExecArray | null;
|
|
38
|
+
test(input: string): boolean;
|
|
39
|
+
}
|
package/markup/regexp.js
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { getRegExpSource } from "../util/regexp.js";
|
|
2
|
+
// Regular expressions.
|
|
3
|
+
export const LINE_REGEXP = /[^\n]*/; // Match line of content (anything that's not a newline).
|
|
4
|
+
export const LINE_START_REGEXP = /^\n*|\n+/; // Starts at start of line (one or more linebreak or start of string).
|
|
5
|
+
export const LINE_END_REGEXP = /\n+|$/; // Ends at end of line (one or more linebreak or end of string).
|
|
6
|
+
export const BLOCK_REGEXP = /[\s\S]*?/; // Match block of content (including newlines so don't be greedy).
|
|
7
|
+
export const BLOCK_START_REGEXP = /^\n*|\n+/; // Starts at start of a block (one or more linebreak or start of string).
|
|
8
|
+
export const BLOCK_END_REGEXP = /\n*$|\n\n+/; // End of a block (two or more linebreaks or end of string).
|
|
9
|
+
/** Create regular expression that matches a block of content. */
|
|
10
|
+
export function getBlockRegExp(content = BLOCK_REGEXP, end = BLOCK_END_REGEXP, start = BLOCK_START_REGEXP) {
|
|
11
|
+
return new RegExp(`(?:${getRegExpSource(start)})(?:${getRegExpSource(content)})(?:${getRegExpSource(end)})`);
|
|
12
|
+
}
|
|
13
|
+
/** Create regular expression that matches a line of content. */
|
|
14
|
+
export function getLineRegExp(content = LINE_REGEXP, end = LINE_END_REGEXP, start = LINE_START_REGEXP) {
|
|
15
|
+
return new RegExp(`(?:${getRegExpSource(start)})(?:${getRegExpSource(content)})(?:${getRegExpSource(end)})`);
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Regular expression that only matches complete its pattern if it's a complete word.
|
|
19
|
+
* - Won't match if there are letters or numbers directly before/after the matched content.
|
|
20
|
+
* - Will match if there is punctuation before/after the matched content or it is at the start/end of the string.
|
|
21
|
+
* - e.g. `this` and `"this"` and `that this that` and `that (this) that` will match because `this` is a complete word.
|
|
22
|
+
* - e.g. `thatthis` and `thatthisthat` will not because `this` is only part of a complete word.
|
|
23
|
+
*
|
|
24
|
+
* @note This isn't guaranteed to work with `String.prototype.match()` and `String.prototype.replace()`
|
|
25
|
+
*
|
|
26
|
+
* @todo This can be much less complicated when Safari supports lookbehinds in regular expressions.
|
|
27
|
+
* - We use a negative lookahead for the end of the word and it works great.
|
|
28
|
+
* - If we could use a negative lookbehind for the start of the word we wouldn't need to create a function that offsets the start.
|
|
29
|
+
*/
|
|
30
|
+
export class WordRegExp extends RegExp {
|
|
31
|
+
constructor(pattern) {
|
|
32
|
+
super(`(?<lookbehind>^|[^\\p{L}\\p{N}])${pattern}(?![\\p{L}\\p{N}])`);
|
|
33
|
+
}
|
|
34
|
+
exec(input) {
|
|
35
|
+
var _a;
|
|
36
|
+
const match = super.exec(input);
|
|
37
|
+
if (match) {
|
|
38
|
+
const { 0: zero, groups } = match;
|
|
39
|
+
const offset = ((_a = groups === null || groups === void 0 ? void 0 : groups.lookbehind) === null || _a === void 0 ? void 0 : _a.length) || 0;
|
|
40
|
+
if (zero && offset) {
|
|
41
|
+
match[0] = zero.slice(offset); // Slice off the start of the match to remove the matched first character.
|
|
42
|
+
match.index += offset; // Increment the index to remove the matched first character.
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return match;
|
|
46
|
+
}
|
|
47
|
+
test(input) {
|
|
48
|
+
return !!this.exec(input);
|
|
49
|
+
}
|
|
50
|
+
}
|
package/markup/rules.d.ts
CHANGED
|
@@ -1,19 +1,14 @@
|
|
|
1
1
|
import type { Data } from "../util/data.js";
|
|
2
2
|
import type { JSXElement } from "../util/jsx.js";
|
|
3
3
|
import { NamedRegExp, NamedRegExpData } from "../util/regexp.js";
|
|
4
|
+
import { MarkupMatcher } from "./regexp.js";
|
|
4
5
|
import type { MarkupOptions } from "./options.js";
|
|
5
|
-
/** Subset of `NamedRegExpArray<T>` that are the only things we're required return from `match()` (because ) */
|
|
6
|
-
export declare type MarkupMatch<T extends Data | undefined> = {
|
|
7
|
-
0: string;
|
|
8
|
-
index: number;
|
|
9
|
-
groups: T;
|
|
10
|
-
};
|
|
11
6
|
/** Rule for parsing string markup into a JSX element. */
|
|
12
7
|
export interface MarkupRule<T extends Data | undefined = Data | undefined> {
|
|
13
8
|
/**
|
|
14
9
|
* Regular expression or custom matching function.
|
|
15
10
|
*/
|
|
16
|
-
readonly match: (T extends undefined ? RegExp : T extends NamedRegExpData ? NamedRegExp<T> : never) |
|
|
11
|
+
readonly match: (T extends undefined ? RegExp : T extends NamedRegExpData ? NamedRegExp<T> : never) | MarkupMatcher<T>;
|
|
17
12
|
/**
|
|
18
13
|
* Render the JSX element for this rule using the props matched by
|
|
19
14
|
*/
|
|
@@ -48,22 +43,18 @@ export declare type MarkupRules = AnyMarkupRule[];
|
|
|
48
43
|
* - Same as Markdown syntax.
|
|
49
44
|
* - Markdown's underline syntax is not supported (for simplification).
|
|
50
45
|
*/
|
|
51
|
-
export declare const MATCH_HEADING: NamedRegExp<{
|
|
52
|
-
prefix: string;
|
|
53
|
-
heading: string;
|
|
54
|
-
}>;
|
|
55
46
|
export declare const HEADING_RULE: MarkupRule<{
|
|
56
|
-
|
|
47
|
+
prefix: string;
|
|
57
48
|
heading: string;
|
|
58
49
|
}>;
|
|
59
50
|
/**
|
|
60
|
-
*
|
|
51
|
+
* Separator (horizontal rule / thematic break).
|
|
61
52
|
* - Same as Markdown syntax but also allows `•` bullet character (in addition to `-` dash, `+` plus, `*` asterisk, `_` underscore).
|
|
62
53
|
* - Character must be repeated three (or more) times.
|
|
63
54
|
* - Character must be the same every time (can't mix)
|
|
64
55
|
* - Might have infinite number of spaces between the characters.
|
|
65
56
|
*/
|
|
66
|
-
export declare const
|
|
57
|
+
export declare const SEPARATOR_RULE: MarkupRule;
|
|
67
58
|
export declare const UNORDERED_RULE: MarkupRule<{
|
|
68
59
|
list: string;
|
|
69
60
|
}>;
|
|
@@ -81,6 +72,7 @@ export declare const BLOCKQUOTE_RULE: MarkupRule<{
|
|
|
81
72
|
* - Markdown-style four-space indent syntax is not supported (only fenced code, since it's easier to use).
|
|
82
73
|
*/
|
|
83
74
|
export declare const FENCED_CODE_RULE: MarkupRule<{
|
|
75
|
+
wrap: string;
|
|
84
76
|
title?: string;
|
|
85
77
|
code: string;
|
|
86
78
|
}>;
|
|
@@ -99,7 +91,7 @@ export declare const PARAGRAPH_RULE: MarkupRule<{
|
|
|
99
91
|
* - For security only schemes that appear in `options.schemes` will match (defaults to `http:` and `https:`).
|
|
100
92
|
*/
|
|
101
93
|
export declare const URL_CHAR = "[-$_@.&!*,=;/#?:%a-zA-Z0-9]";
|
|
102
|
-
export declare const
|
|
94
|
+
export declare const URL_REGEXP: NamedRegExp<{
|
|
103
95
|
title?: string;
|
|
104
96
|
href: string;
|
|
105
97
|
}>;
|
|
@@ -115,7 +107,7 @@ export declare const URL_RULE: MarkupRule<{
|
|
|
115
107
|
* - If link is not valid (using `new URL(url)` then unparsed text will be returned.
|
|
116
108
|
* - For security only `http://` or `https://` links will work (if invalid the unparsed text will be returned).
|
|
117
109
|
*/
|
|
118
|
-
export declare const
|
|
110
|
+
export declare const LINK_REGEXP: NamedRegExp<{
|
|
119
111
|
title: string;
|
|
120
112
|
href: string;
|
|
121
113
|
}>;
|
|
@@ -131,50 +123,33 @@ export declare const LINK_RULE: MarkupRule<{
|
|
|
131
123
|
* - Same as Markdown syntax.
|
|
132
124
|
*/
|
|
133
125
|
export declare const CODE_RULE: MarkupRule<{
|
|
134
|
-
|
|
126
|
+
code: string;
|
|
135
127
|
}>;
|
|
136
128
|
/**
|
|
137
|
-
* Inline strong.
|
|
138
|
-
* - Inline text wrapped in one or more `*` asterisks.
|
|
139
|
-
* -
|
|
129
|
+
* Inline strong, emphasis, insert, delete, highlight.
|
|
130
|
+
* - Inline strong text wrapped in one or more `*` asterisks.
|
|
131
|
+
* - Inline emphasis text wrapped in one or more `_` underscores.
|
|
132
|
+
* - Inline inserted text wrapped in one or more `+` pluses.
|
|
133
|
+
* - Inline deleted text wrapped in one or more `-` minuses or `~` tildes.
|
|
134
|
+
* - Inline highlighted text wrapped in one or more `=` equals or `:` colons.
|
|
140
135
|
* - Whitespace cannot be the first or last character of the element (e.g. `* abc *` will not work).
|
|
136
|
+
* - Closing chars must match opening characters.
|
|
137
|
+
* - Cannot occur in the middle of a word (e.g. `this*that*this` will not work).
|
|
141
138
|
* - Closing characters must exactly match opening characters.
|
|
142
139
|
* - Different to Markdown: strong is always surrounded by `*asterisks*` and emphasis is always surrounded by `_underscores_` (strong isn't 'double emphasis').
|
|
143
140
|
*/
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
text: string;
|
|
157
|
-
}>;
|
|
158
|
-
/**
|
|
159
|
-
* Inserted text (`<ins>` tag),
|
|
160
|
-
* - Inline text wrapped in two or more `++` pluses.
|
|
161
|
-
* - Works inside words (e.g. `magi++karp++carp`).
|
|
162
|
-
* - Whitespace cannot be the first or last character of the element (e.g. `+ abc +` will not work).
|
|
163
|
-
* - Closing characters must exactly match opening characters.
|
|
164
|
-
* - Markdown doesn't have this.
|
|
165
|
-
*/
|
|
166
|
-
export declare const INSERT_RULE: MarkupRule<{
|
|
167
|
-
text: string;
|
|
168
|
-
}>;
|
|
169
|
-
/**
|
|
170
|
-
* Deleted text (`<del>` tag),
|
|
171
|
-
* - Inline text wrapped in two or more `--` hyphens or `~~` tildes.
|
|
172
|
-
* - Works inside words (e.g. `magi--karp--carp`).
|
|
173
|
-
* - Whitespace cannot be the first or last character of the element (e.g. `-- abc --` will not work).
|
|
174
|
-
* - Closing characters must exactly match opening characters.
|
|
175
|
-
* - Markdown doesn't have this.
|
|
176
|
-
*/
|
|
177
|
-
export declare const DELETE_RULE: MarkupRule<{
|
|
141
|
+
declare const INLINE_CHARS: {
|
|
142
|
+
"-": string;
|
|
143
|
+
"~": string;
|
|
144
|
+
"+": string;
|
|
145
|
+
"*": string;
|
|
146
|
+
_: string;
|
|
147
|
+
"=": string;
|
|
148
|
+
":": string;
|
|
149
|
+
};
|
|
150
|
+
export declare const INLINE_RULE: MarkupRule<{
|
|
151
|
+
char: keyof typeof INLINE_CHARS;
|
|
152
|
+
wrap: string;
|
|
178
153
|
text: string;
|
|
179
154
|
}>;
|
|
180
155
|
/**
|
|
@@ -202,3 +177,4 @@ export declare const MARKUP_RULES_BLOCK: MarkupRules;
|
|
|
202
177
|
export declare const MARKUP_RULES_INLINE: MarkupRules;
|
|
203
178
|
/** Subset of markup rules that are relevant for collapsed shortform content. */
|
|
204
179
|
export declare const MARKUP_RULES_SHORTFORM: MarkupRules;
|
|
180
|
+
export {};
|
package/markup/rules.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
/* eslint-disable import/export */
|
|
2
|
-
import {
|
|
2
|
+
import { getRegExp } from "../util/regexp.js";
|
|
3
3
|
import { formatURL, getOptionalURL } from "../util/url.js";
|
|
4
|
+
import { getBlockRegExp, getLineRegExp, BLOCK_REGEXP, LINE_REGEXP, WordRegExp } from "./regexp.js";
|
|
4
5
|
/** React security symbol — see https://github.com/facebook/react/pull/4832 */
|
|
5
6
|
const $$typeof = Symbol.for("react.element");
|
|
6
7
|
/**
|
|
@@ -9,18 +10,10 @@ const $$typeof = Symbol.for("react.element");
|
|
|
9
10
|
* - Same as Markdown syntax.
|
|
10
11
|
* - Markdown's underline syntax is not supported (for simplification).
|
|
11
12
|
*/
|
|
12
|
-
export const MATCH_HEADING = getLineRegExp(`(?<prefix>#{1,6}) +(?<heading>${MATCH_LINE.source})`);
|
|
13
13
|
export const HEADING_RULE = {
|
|
14
|
-
match:
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
const { index, 0: first, groups } = match;
|
|
18
|
-
const { prefix, heading } = groups;
|
|
19
|
-
return { index, 0: first, groups: { level: prefix.length, heading } };
|
|
20
|
-
}
|
|
21
|
-
},
|
|
22
|
-
render: ({ level, heading }) => ({
|
|
23
|
-
type: `h${level}`,
|
|
14
|
+
match: getLineRegExp(`(?<prefix>#{1,6}) +(?<heading>${LINE_REGEXP.source})`),
|
|
15
|
+
render: ({ prefix, heading }) => ({
|
|
16
|
+
type: `h${prefix.length}`,
|
|
24
17
|
key: null,
|
|
25
18
|
ref: null,
|
|
26
19
|
$$typeof,
|
|
@@ -30,13 +23,13 @@ export const HEADING_RULE = {
|
|
|
30
23
|
subcontext: "inline",
|
|
31
24
|
};
|
|
32
25
|
/**
|
|
33
|
-
*
|
|
26
|
+
* Separator (horizontal rule / thematic break).
|
|
34
27
|
* - Same as Markdown syntax but also allows `•` bullet character (in addition to `-` dash, `+` plus, `*` asterisk, `_` underscore).
|
|
35
28
|
* - Character must be repeated three (or more) times.
|
|
36
29
|
* - Character must be the same every time (can't mix)
|
|
37
30
|
* - Might have infinite number of spaces between the characters.
|
|
38
31
|
*/
|
|
39
|
-
export const
|
|
32
|
+
export const SEPARATOR_RULE = {
|
|
40
33
|
match: getLineRegExp(`([-*•+_=])(?: *\\1){2,}`),
|
|
41
34
|
render: () => ({
|
|
42
35
|
type: "hr",
|
|
@@ -58,7 +51,7 @@ const UNORDERED_PREFIX = `[-*•+] +`;
|
|
|
58
51
|
const UNORDERED_SPLIT = new RegExp(`\n+${UNORDERED_PREFIX}`, "g");
|
|
59
52
|
const UNORDERED_INDENT = /^\t/gm;
|
|
60
53
|
export const UNORDERED_RULE = {
|
|
61
|
-
match: getBlockRegExp(`${UNORDERED_PREFIX}(?<list>${
|
|
54
|
+
match: getBlockRegExp(`${UNORDERED_PREFIX}(?<list>${BLOCK_REGEXP.source})`),
|
|
62
55
|
render: ({ list }) => ({
|
|
63
56
|
type: "ul",
|
|
64
57
|
key: null,
|
|
@@ -85,7 +78,7 @@ const ORDERED_PREFIX = "[1-9][0-9]{0,8}[.):] +"; // Number for a numbered list,
|
|
|
85
78
|
const ORDERED_SPLIT = new RegExp(`\n+(?=${ORDERED_PREFIX})`, "g");
|
|
86
79
|
const ORDERED_INDENT = UNORDERED_INDENT;
|
|
87
80
|
export const ORDERED_RULE = {
|
|
88
|
-
match: getBlockRegExp(`(?<list>${ORDERED_PREFIX}${
|
|
81
|
+
match: getBlockRegExp(`(?<list>${ORDERED_PREFIX}${BLOCK_REGEXP.source})`),
|
|
89
82
|
render: ({ list }) => ({
|
|
90
83
|
type: "ol",
|
|
91
84
|
key: null,
|
|
@@ -118,7 +111,7 @@ const _mapOrdered = (item, key) => ({
|
|
|
118
111
|
const BLOCKQUOTE_PREFIX = "> *";
|
|
119
112
|
const BLOCKQUOTE_INDENT = new RegExp(`^${BLOCKQUOTE_PREFIX}`, "gm");
|
|
120
113
|
export const BLOCKQUOTE_RULE = {
|
|
121
|
-
match: getLineRegExp(`(?<quote>${BLOCKQUOTE_PREFIX}${
|
|
114
|
+
match: getLineRegExp(`(?<quote>${BLOCKQUOTE_PREFIX}${LINE_REGEXP.source}(?:\n${BLOCKQUOTE_PREFIX}${LINE_REGEXP.source})*)`),
|
|
122
115
|
render: ({ quote }) => ({
|
|
123
116
|
type: "blockquote",
|
|
124
117
|
key: null,
|
|
@@ -138,7 +131,7 @@ export const BLOCKQUOTE_RULE = {
|
|
|
138
131
|
*/
|
|
139
132
|
export const FENCED_CODE_RULE = {
|
|
140
133
|
// Matcher has its own end that only stops when it reaches a matching closing fence or the end of the string.
|
|
141
|
-
match:
|
|
134
|
+
match: getLineRegExp(`(?<wrap>\`{3,}|~{3,}) *(?<title>${LINE_REGEXP.source})\n(?<code>${BLOCK_REGEXP.source})`, `(?:\n\\k<wrap>|$)`),
|
|
142
135
|
render: ({ title, code }) => ({
|
|
143
136
|
type: "pre",
|
|
144
137
|
key: null,
|
|
@@ -162,7 +155,7 @@ export const FENCED_CODE_RULE = {
|
|
|
162
155
|
* - When ordering rules, paragraph should go after other "block" context elements (because it has a very generous capture).
|
|
163
156
|
*/
|
|
164
157
|
export const PARAGRAPH_RULE = {
|
|
165
|
-
match: getBlockRegExp(`(?<paragraph>${
|
|
158
|
+
match: getBlockRegExp(`(?<paragraph>${BLOCK_REGEXP.source})`),
|
|
166
159
|
render: ({ paragraph }) => ({
|
|
167
160
|
type: `p`,
|
|
168
161
|
key: null,
|
|
@@ -182,9 +175,9 @@ export const PARAGRAPH_RULE = {
|
|
|
182
175
|
* - For security only schemes that appear in `options.schemes` will match (defaults to `http:` and `https:`).
|
|
183
176
|
*/
|
|
184
177
|
export const URL_CHAR = "[-$_@.&!*,=;/#?:%a-zA-Z0-9]";
|
|
185
|
-
export const
|
|
178
|
+
export const URL_REGEXP = new RegExp(`(?<href>[a-z]+:${URL_CHAR}+)(?: +(?:\\((?<title>[^)]*?)\\)))?`);
|
|
186
179
|
export const URL_RULE = {
|
|
187
|
-
match: (input, options) => _urlMatch(
|
|
180
|
+
match: (input, options) => _urlMatch(URL_REGEXP.exec(input), options),
|
|
188
181
|
render: ({ href, title }, { rel }) => ({
|
|
189
182
|
type: "a",
|
|
190
183
|
key: null,
|
|
@@ -213,10 +206,10 @@ function _urlMatch(match, { schemes, url: base }) {
|
|
|
213
206
|
* - If link is not valid (using `new URL(url)` then unparsed text will be returned.
|
|
214
207
|
* - For security only `http://` or `https://` links will work (if invalid the unparsed text will be returned).
|
|
215
208
|
*/
|
|
216
|
-
export const
|
|
209
|
+
export const LINK_REGEXP = getRegExp(/\[(?<title>[^\]]*?)\]\((?<href>[^)]*?)\)/);
|
|
217
210
|
export const LINK_RULE = {
|
|
218
211
|
...URL_RULE,
|
|
219
|
-
match: (input, options) => _urlMatch(
|
|
212
|
+
match: (input, options) => _urlMatch(LINK_REGEXP.exec(input), options),
|
|
220
213
|
};
|
|
221
214
|
/**
|
|
222
215
|
* Inline code.
|
|
@@ -226,89 +219,35 @@ export const LINK_RULE = {
|
|
|
226
219
|
* - Same as Markdown syntax.
|
|
227
220
|
*/
|
|
228
221
|
export const CODE_RULE = {
|
|
229
|
-
match:
|
|
230
|
-
render: ({
|
|
222
|
+
match: new RegExp(`(?<wrap>\`+)(?<code>${BLOCK_REGEXP.source})\\k<wrap>`),
|
|
223
|
+
render: ({ code }) => ({
|
|
231
224
|
type: "code",
|
|
232
225
|
key: null,
|
|
233
226
|
ref: null,
|
|
234
227
|
$$typeof,
|
|
235
|
-
props: { children:
|
|
228
|
+
props: { children: code },
|
|
236
229
|
}),
|
|
237
230
|
contexts: ["inline", "list"],
|
|
238
231
|
priority: 10, // Higher priority than e.g. `strong` or `em` (from CommonMark spec: "Code span backticks have higher precedence than any other inline constructs except HTML tags and autolinks.")
|
|
239
232
|
};
|
|
240
233
|
/**
|
|
241
|
-
* Inline strong.
|
|
242
|
-
* - Inline text wrapped in one or more `*` asterisks.
|
|
243
|
-
* -
|
|
234
|
+
* Inline strong, emphasis, insert, delete, highlight.
|
|
235
|
+
* - Inline strong text wrapped in one or more `*` asterisks.
|
|
236
|
+
* - Inline emphasis text wrapped in one or more `_` underscores.
|
|
237
|
+
* - Inline inserted text wrapped in one or more `+` pluses.
|
|
238
|
+
* - Inline deleted text wrapped in one or more `-` minuses or `~` tildes.
|
|
239
|
+
* - Inline highlighted text wrapped in one or more `=` equals or `:` colons.
|
|
244
240
|
* - Whitespace cannot be the first or last character of the element (e.g. `* abc *` will not work).
|
|
241
|
+
* - Closing chars must match opening characters.
|
|
242
|
+
* - Cannot occur in the middle of a word (e.g. `this*that*this` will not work).
|
|
245
243
|
* - Closing characters must exactly match opening characters.
|
|
246
244
|
* - Different to Markdown: strong is always surrounded by `*asterisks*` and emphasis is always surrounded by `_underscores_` (strong isn't 'double emphasis').
|
|
247
245
|
*/
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
ref: null,
|
|
254
|
-
$$typeof,
|
|
255
|
-
props: { children: text },
|
|
256
|
-
}),
|
|
257
|
-
contexts: ["inline", "list", "link"],
|
|
258
|
-
subcontext: "inline",
|
|
259
|
-
};
|
|
260
|
-
/**
|
|
261
|
-
* Inline emphasis.
|
|
262
|
-
* - Inline text wrapped in one or more `_` underscore symbols.
|
|
263
|
-
* - Works inside words (e.g. `magi_carp_carp`).
|
|
264
|
-
* - Whitespace cannot be the first or last character of the element (e.g. `_ abc _` will not work).
|
|
265
|
-
* - Closing characters must exactly match opening characters.
|
|
266
|
-
* - Different to Markdown: strong is always surrounded by `*asterisks*` and emphasis is always surrounded by `_underscores_` (strong isn't 'double emphasis').
|
|
267
|
-
*/
|
|
268
|
-
export const EMPHASIS_RULE = {
|
|
269
|
-
match: getWrapRegExp("_+"),
|
|
270
|
-
render: ({ text }) => ({
|
|
271
|
-
type: "em",
|
|
272
|
-
key: null,
|
|
273
|
-
ref: null,
|
|
274
|
-
$$typeof,
|
|
275
|
-
props: { children: text },
|
|
276
|
-
}),
|
|
277
|
-
contexts: ["inline", "list", "link"],
|
|
278
|
-
subcontext: "inline",
|
|
279
|
-
};
|
|
280
|
-
/**
|
|
281
|
-
* Inserted text (`<ins>` tag),
|
|
282
|
-
* - Inline text wrapped in two or more `++` pluses.
|
|
283
|
-
* - Works inside words (e.g. `magi++karp++carp`).
|
|
284
|
-
* - Whitespace cannot be the first or last character of the element (e.g. `+ abc +` will not work).
|
|
285
|
-
* - Closing characters must exactly match opening characters.
|
|
286
|
-
* - Markdown doesn't have this.
|
|
287
|
-
*/
|
|
288
|
-
export const INSERT_RULE = {
|
|
289
|
-
match: getWrapRegExp("\\+\\++"),
|
|
290
|
-
render: ({ text }) => ({
|
|
291
|
-
type: "ins",
|
|
292
|
-
key: null,
|
|
293
|
-
ref: null,
|
|
294
|
-
$$typeof,
|
|
295
|
-
props: { children: text },
|
|
296
|
-
}),
|
|
297
|
-
contexts: ["inline", "list", "link"],
|
|
298
|
-
subcontext: "inline",
|
|
299
|
-
};
|
|
300
|
-
/**
|
|
301
|
-
* Deleted text (`<del>` tag),
|
|
302
|
-
* - Inline text wrapped in two or more `--` hyphens or `~~` tildes.
|
|
303
|
-
* - Works inside words (e.g. `magi--karp--carp`).
|
|
304
|
-
* - Whitespace cannot be the first or last character of the element (e.g. `-- abc --` will not work).
|
|
305
|
-
* - Closing characters must exactly match opening characters.
|
|
306
|
-
* - Markdown doesn't have this.
|
|
307
|
-
*/
|
|
308
|
-
export const DELETE_RULE = {
|
|
309
|
-
match: getWrapRegExp("--+|~~+"),
|
|
310
|
-
render: ({ text }) => ({
|
|
311
|
-
type: "del",
|
|
246
|
+
const INLINE_CHARS = { "-": "del", "~": "del", "+": "ins", "*": "strong", "_": "em", "=": "mark", ":": "mark" }; // Hyphen must be first so it works when we use the keys as a character class.
|
|
247
|
+
export const INLINE_RULE = {
|
|
248
|
+
match: new WordRegExp(`(?<wrap>(?<char>[${Object.keys(INLINE_CHARS).join("")}])+)(?<text>(?!\\k<char>)\\S|(?!\\k<char>)\\S[\\s\\S]*?(?!\\k<char>)\\S)\\k<wrap>`),
|
|
249
|
+
render: ({ char, text }) => ({
|
|
250
|
+
type: INLINE_CHARS[char],
|
|
312
251
|
key: null,
|
|
313
252
|
ref: null,
|
|
314
253
|
$$typeof,
|
|
@@ -348,7 +287,7 @@ export const LINEBREAK_RULE = {
|
|
|
348
287
|
*/
|
|
349
288
|
export const MARKUP_RULES = [
|
|
350
289
|
HEADING_RULE,
|
|
351
|
-
|
|
290
|
+
SEPARATOR_RULE,
|
|
352
291
|
UNORDERED_RULE,
|
|
353
292
|
ORDERED_RULE,
|
|
354
293
|
BLOCKQUOTE_RULE,
|
|
@@ -357,16 +296,14 @@ export const MARKUP_RULES = [
|
|
|
357
296
|
LINK_RULE,
|
|
358
297
|
URL_RULE,
|
|
359
298
|
CODE_RULE,
|
|
360
|
-
|
|
361
|
-
EMPHASIS_RULE,
|
|
362
|
-
INSERT_RULE,
|
|
363
|
-
DELETE_RULE,
|
|
299
|
+
INLINE_RULE,
|
|
364
300
|
LINEBREAK_RULE,
|
|
301
|
+
//
|
|
365
302
|
];
|
|
366
303
|
/** Subset of markup rules that work in a block context. */
|
|
367
304
|
export const MARKUP_RULES_BLOCK = [
|
|
368
305
|
HEADING_RULE,
|
|
369
|
-
|
|
306
|
+
SEPARATOR_RULE,
|
|
370
307
|
UNORDERED_RULE,
|
|
371
308
|
ORDERED_RULE,
|
|
372
309
|
BLOCKQUOTE_RULE,
|
|
@@ -379,10 +316,7 @@ export const MARKUP_RULES_INLINE = [
|
|
|
379
316
|
LINK_RULE,
|
|
380
317
|
URL_RULE,
|
|
381
318
|
CODE_RULE,
|
|
382
|
-
|
|
383
|
-
EMPHASIS_RULE,
|
|
384
|
-
INSERT_RULE,
|
|
385
|
-
DELETE_RULE,
|
|
319
|
+
INLINE_RULE,
|
|
386
320
|
LINEBREAK_RULE,
|
|
387
321
|
//
|
|
388
322
|
];
|
|
@@ -394,10 +328,7 @@ export const MARKUP_RULES_SHORTFORM = [
|
|
|
394
328
|
LINK_RULE,
|
|
395
329
|
URL_RULE,
|
|
396
330
|
CODE_RULE,
|
|
397
|
-
|
|
398
|
-
EMPHASIS_RULE,
|
|
399
|
-
INSERT_RULE,
|
|
400
|
-
DELETE_RULE,
|
|
331
|
+
INLINE_RULE,
|
|
401
332
|
LINEBREAK_RULE,
|
|
402
333
|
//
|
|
403
334
|
];
|
package/package.json
CHANGED
package/util/debug.js
CHANGED
|
@@ -31,10 +31,10 @@ export function debug(value) {
|
|
|
31
31
|
return typeof value;
|
|
32
32
|
}
|
|
33
33
|
/** Debug a string. */
|
|
34
|
-
export const debugString = (value) => `"${value.replace(
|
|
35
|
-
const
|
|
36
|
-
const
|
|
37
|
-
const
|
|
34
|
+
export const debugString = (value) => `"${value.replace(ESCAPE_REGEXP, _escapeChar)}"`;
|
|
35
|
+
const ESCAPE_REGEXP = /[\x00-\x08\x0B-\x1F\x7F-\x9F"\\]/g; // Match control characters, `"` double quote, `\` backslash.
|
|
36
|
+
const ESCAPE_LIST = { '"': '\\"', "\\": "\\\\", "\r": "\\r", "\n": "\\n", "\t": "\\t", "\b": "\\b", "\f": "\\f", "\v": "\\v" };
|
|
37
|
+
const _escapeChar = (char) => ESCAPE_LIST[char] || `\\x${char.charCodeAt(0).toString(16).padStart(2, "00")}`;
|
|
38
38
|
/** Debug an array. */
|
|
39
39
|
export function debugArray(value) {
|
|
40
40
|
const prototype = Object.getPrototypeOf(value);
|
package/util/regexp.d.ts
CHANGED
|
@@ -1,14 +1,9 @@
|
|
|
1
1
|
import { Match } from "./match.js";
|
|
2
2
|
import { NotString } from "./string.js";
|
|
3
|
-
|
|
4
|
-
export declare const
|
|
5
|
-
|
|
6
|
-
export declare const
|
|
7
|
-
export declare const MATCH_BLOCK_START: RegExp;
|
|
8
|
-
export declare const MATCH_BLOCK_END: RegExp;
|
|
9
|
-
export declare const MATCH_TEXT: RegExp;
|
|
10
|
-
export declare const MATCH_ALWAYS: RegExp;
|
|
11
|
-
export declare const MATCH_NEVER: RegExp;
|
|
3
|
+
/** Regular expression that always matches everything. */
|
|
4
|
+
export declare const ALWAYS_REGEXP: RegExp;
|
|
5
|
+
/** Regular expression that never matches anything. */
|
|
6
|
+
export declare const NEVER_REGEXP: RegExp;
|
|
12
7
|
/** Things that can be convert to a regular expression. */
|
|
13
8
|
export declare type PossibleRegExp = string | RegExp;
|
|
14
9
|
/** Is an unknown value a `RegExp` instance? */
|
|
@@ -27,25 +22,13 @@ export declare type NamedRegExpData = {
|
|
|
27
22
|
};
|
|
28
23
|
/** Regular expression match array that you've asserted contains the specified named groups. */
|
|
29
24
|
export interface NamedRegExpArray<T extends NamedRegExpData = NamedRegExpData> extends RegExpExecArray {
|
|
30
|
-
|
|
31
|
-
|
|
25
|
+
0: string;
|
|
26
|
+
groups: T;
|
|
32
27
|
}
|
|
33
28
|
/** Regular expression that you've asserted contains the specified named capture groups. */
|
|
34
29
|
export interface NamedRegExp<T extends NamedRegExpData = NamedRegExpData> extends RegExp {
|
|
35
30
|
exec(input: string): NamedRegExpArray<T> | null;
|
|
36
31
|
}
|
|
37
|
-
/** Create a named regular expression (note: this is unsafe). */
|
|
38
|
-
export declare const getNamedRegExp: <T extends NamedRegExpData>(pattern: string | RegExp, flags?: string) => NamedRegExp<T>;
|
|
39
|
-
/** Create regular expression that matches a block of content (possibly asserting that it contains named match groups). */
|
|
40
|
-
export declare function getBlockRegExp<T extends NamedRegExpData>(middle: PossibleRegExp, end?: PossibleRegExp, start?: PossibleRegExp, flags?: string): NamedRegExp<T>;
|
|
41
|
-
export declare function getBlockRegExp(middle: PossibleRegExp, end?: PossibleRegExp, start?: PossibleRegExp, flags?: string): RegExp;
|
|
42
|
-
/** Create regular expression that matches a line of content (possibly asserting that it contains named match groups). */
|
|
43
|
-
export declare function getLineRegExp<T extends NamedRegExpData>(middle: PossibleRegExp, end?: PossibleRegExp, start?: PossibleRegExp, flags?: string): NamedRegExp<T>;
|
|
44
|
-
export declare function getLineRegExp(middle: PossibleRegExp, end?: PossibleRegExp, start?: PossibleRegExp, flags?: string): RegExp;
|
|
45
|
-
/** Create regular expression that matches piece of text wrapped by another expression (use `text` match group). */
|
|
46
|
-
export declare function getWrapRegExp(wrapper: PossibleRegExp, middle?: PossibleRegExp, flags?: string): NamedRegExp<{
|
|
47
|
-
text: string;
|
|
48
|
-
}>;
|
|
49
32
|
/** Create regular expression that matches any of a list of other expressions. */
|
|
50
33
|
export declare function getAnyRegExp(patterns: Iterable<PossibleRegExp> & NotString, flags?: string): RegExp;
|
|
51
34
|
/** Create regular expression that matches all of a list of other expressions. */
|
package/util/regexp.js
CHANGED
|
@@ -1,15 +1,9 @@
|
|
|
1
1
|
import { AssertionError } from "../error/AssertionError.js";
|
|
2
2
|
import { getArray } from "./array.js";
|
|
3
|
-
|
|
4
|
-
export const
|
|
5
|
-
|
|
6
|
-
export const
|
|
7
|
-
export const MATCH_BLOCK = /[\s\S]*?/; // Match block of content (including newlines so don't be greedy).
|
|
8
|
-
export const MATCH_BLOCK_START = /^\n*|\n+/; // Starts at start of a block (one or more linebreak or start of string).
|
|
9
|
-
export const MATCH_BLOCK_END = /\n*$|\n\n+/; // End of a block (two or more linebreaks or end of string).
|
|
10
|
-
export const MATCH_TEXT = /\S(?:[\s\S]*?\S)?/; // Run of text that starts and ends with non-space characters (possibly multi-line).
|
|
11
|
-
export const MATCH_ALWAYS = /^.*$/; // Regular expression that always matches.
|
|
12
|
-
export const MATCH_NEVER = /^(?=a)a/; // Regular expression that never matches.
|
|
3
|
+
/** Regular expression that always matches everything. */
|
|
4
|
+
export const ALWAYS_REGEXP = /^.*$/;
|
|
5
|
+
/** Regular expression that never matches anything. */
|
|
6
|
+
export const NEVER_REGEXP = /^(?=a)a/;
|
|
13
7
|
/** Is an unknown value a `RegExp` instance? */
|
|
14
8
|
export const isRegExp = (v) => v instanceof RegExp;
|
|
15
9
|
/** Assert that an unknown value is a `RegExp` instance. */
|
|
@@ -24,24 +18,12 @@ export const getRegExpSource = (regexp) => (typeof regexp === "string" ? regexp
|
|
|
24
18
|
/** Escape special characters in a string regular expression. */
|
|
25
19
|
export const escapeRegExp = (pattern) => pattern.replace(REPLACE_ESCAPED, "\\$&");
|
|
26
20
|
const REPLACE_ESCAPED = /[-[\]/{}()*+?.\\^$|]/g;
|
|
27
|
-
/** Create a named regular expression (note: this is unsafe). */
|
|
28
|
-
export const getNamedRegExp = (pattern, flags) => (typeof pattern === "string" ? new RegExp(pattern, flags) : pattern);
|
|
29
|
-
export function getBlockRegExp(middle = MATCH_BLOCK, end = MATCH_BLOCK_END, start = MATCH_BLOCK_START, flags) {
|
|
30
|
-
return new RegExp(`(?:${getRegExpSource(start)})(?:${getRegExpSource(middle)})(?:${getRegExpSource(end)})`, flags);
|
|
31
|
-
}
|
|
32
|
-
export function getLineRegExp(middle = MATCH_LINE, end = MATCH_LINE_END, start = MATCH_LINE_START, flags) {
|
|
33
|
-
return new RegExp(`(?:${getRegExpSource(start)})(?:${getRegExpSource(middle)})(?:${getRegExpSource(end)})`, flags);
|
|
34
|
-
}
|
|
35
|
-
/** Create regular expression that matches piece of text wrapped by another expression (use `text` match group). */
|
|
36
|
-
export function getWrapRegExp(wrapper, middle = MATCH_TEXT, flags) {
|
|
37
|
-
return getNamedRegExp(`(${getRegExpSource(wrapper)})(?<text>${getRegExpSource(middle)})\\1`, flags);
|
|
38
|
-
}
|
|
39
21
|
/** Create regular expression that matches any of a list of other expressions. */
|
|
40
22
|
export function getAnyRegExp(patterns, flags) {
|
|
41
23
|
const arr = getArray(patterns).filter(Boolean);
|
|
42
24
|
// If there are no patterns to match against then _no_ string can ever match against any of nothing.
|
|
43
25
|
if (!arr.length)
|
|
44
|
-
return
|
|
26
|
+
return NEVER_REGEXP;
|
|
45
27
|
// Create RegExp using multiple joined matches like `(?:AAA)|(?:BBB)`
|
|
46
28
|
return new RegExp(`(?:${getArray(patterns).map(getRegExpSource).join(")|(?:")})`, flags);
|
|
47
29
|
}
|
|
@@ -50,7 +32,7 @@ export function getAllRegExp(patterns, flags) {
|
|
|
50
32
|
const arr = getArray(patterns).filter(Boolean);
|
|
51
33
|
// If there are no patterns to match against then _every_ string will match against the entire list of nothing.
|
|
52
34
|
if (!arr.length)
|
|
53
|
-
return
|
|
35
|
+
return ALWAYS_REGEXP;
|
|
54
36
|
// Create RegExp using multiple lookaheads like `^(?=.*?(?:AAA))(?=.*?(?:BBB))`
|
|
55
37
|
return new RegExp(`^(?=.*?(?:${getArray(patterns).map(getRegExpSource).join("))(?=.*?(?:")}))`, flags);
|
|
56
38
|
}
|
package/util/string.d.ts
CHANGED
|
@@ -41,6 +41,7 @@ export declare const joinStrings: (strs: Iterable<string> & NotString, joiner?:
|
|
|
41
41
|
* - Remove allow control characters
|
|
42
42
|
* - Normalise runs of whitespace to one ` ` space,
|
|
43
43
|
* - Trim whitespace from the start and end of the string.
|
|
44
|
+
*
|
|
44
45
|
* @example santizeString("\x00Nice! "); // Returns `"Nice!"`
|
|
45
46
|
*/
|
|
46
47
|
export declare const sanitizeString: (str: string) => string;
|
|
@@ -54,7 +55,7 @@ export declare const sanitizeString: (str: string) => string;
|
|
|
54
55
|
* - Allow spaces at the start of each line (for indentation) but trim the end of each line.
|
|
55
56
|
* - Trim excess newlines at the start and end of the string and runs of more than two newlines in a row.
|
|
56
57
|
*
|
|
57
|
-
* @todo Use lookbehind when Safari supports it
|
|
58
|
+
* @todo Use lookbehind when Safari supports it, so replacements don't need `$1`
|
|
58
59
|
*/
|
|
59
60
|
export declare const sanitizeLines: (str: string) => string;
|
|
60
61
|
/**
|
package/util/string.js
CHANGED
|
@@ -53,29 +53,18 @@ export function getString(value) {
|
|
|
53
53
|
}
|
|
54
54
|
/** Concatenate an iterable set of strings together. */
|
|
55
55
|
export const joinStrings = (strs, joiner = "") => getArray(strs).join(joiner);
|
|
56
|
-
// Regular expressions.
|
|
57
|
-
const MATCH_CONTROL_CHARS = /[\x00-\x1F\x7F-\x9F]/g; // Match control characters.
|
|
58
|
-
const MATCH_LINE_CONTROL_CHARS = /[\x00-\x08\x0B-\x1F\x7F-\x9F]/g; // Match control characters except `\n` newline and `\t` tab.
|
|
59
|
-
const MATCH_PARAGRAPH_SEPARATOR = /\n\n+|\f|\u2029/g; // Match indications of paragraph separation.
|
|
60
|
-
const MATCH_LINE_SEPARATOR = /\r\n?|\n|\v|\x85|\u2028/g; // Match indications of line separation.
|
|
61
|
-
const MATCH_WORD_SEPARATOR = /[\s\p{P}\p{S}\p{Z}]+/gu; // Match indications of word separation.
|
|
62
|
-
const MATCH_WHITESPACE = /\s+/g; // Match runs of whitespace characters.
|
|
63
|
-
const MATCH_TRAILING_WHITESPACE = /[^\S\n]+(?=\n)|\s+$/g; // Trailing whitespace at the end of a line or the whole string.
|
|
64
|
-
const MATCH_NON_TEXT = /[^\p{L}\p{N} ]+/gu; // Match any characters that isn't a letter, number, or ` ` space.
|
|
65
|
-
const MATCH_LEADING_NEWLINES = /^\n+/g; // `\n` newline characters at the start of the string.
|
|
66
|
-
// const MATCH_TRAILING_NEWLINES = /\n+$/g; // `\n` newline characters at the end of the string.
|
|
67
|
-
const MATCH_FOUR_SPACES = / {4}/g; // Match a run of four whitespace characters.
|
|
68
56
|
/**
|
|
69
57
|
* Sanitize a single-line string.
|
|
70
58
|
* - Used when you're sanitising a single-line input, e.g. a title for something.
|
|
71
59
|
* - Remove allow control characters
|
|
72
60
|
* - Normalise runs of whitespace to one ` ` space,
|
|
73
61
|
* - Trim whitespace from the start and end of the string.
|
|
62
|
+
*
|
|
74
63
|
* @example santizeString("\x00Nice! "); // Returns `"Nice!"`
|
|
75
64
|
*/
|
|
76
65
|
export const sanitizeString = (str) => str
|
|
77
|
-
.replace(
|
|
78
|
-
.replace(
|
|
66
|
+
.replace(/[^\P{C}\s]/gu, "") // Strip control characters (except whitespace).
|
|
67
|
+
.replace(/\s+/gu, " ") // Normalise runs of whitespace to one ` ` space.
|
|
79
68
|
.trim(); // Trim whitespace from the start and end of the string.
|
|
80
69
|
/**
|
|
81
70
|
* Sanitize a multiline string.
|
|
@@ -87,17 +76,18 @@ export const sanitizeString = (str) => str
|
|
|
87
76
|
* - Allow spaces at the start of each line (for indentation) but trim the end of each line.
|
|
88
77
|
* - Trim excess newlines at the start and end of the string and runs of more than two newlines in a row.
|
|
89
78
|
*
|
|
90
|
-
* @todo Use lookbehind when Safari supports it
|
|
79
|
+
* @todo Use lookbehind when Safari supports it, so replacements don't need `$1`
|
|
91
80
|
*/
|
|
92
81
|
export const sanitizeLines = (str) => str
|
|
93
|
-
.replace(
|
|
94
|
-
.replace(
|
|
95
|
-
.replace(
|
|
96
|
-
.replace(
|
|
97
|
-
.replace(
|
|
98
|
-
.replace(
|
|
99
|
-
.replace(/(
|
|
100
|
-
.replace(
|
|
82
|
+
.replace(/[^\P{C}\s]/gu, "") // Strip control characters (except whitespace).
|
|
83
|
+
.replace(/\r\n?|\v|\x85|\u2028/g, "\n") // Normalise line separators to `\n` newline
|
|
84
|
+
.replace(/[^\S\n]+(?=\n|$)/g, "") // Trim trailing whitespace on each line.
|
|
85
|
+
.replace(/\f|\u2029/g, "\n\n") // Normalise paragraph separators to `\n\n` double newline.
|
|
86
|
+
.replace(/^\n+|\n+$/g, "") // Trim leading and trailing newlines.
|
|
87
|
+
.replace(/\n{3,}/g, "\n\n") // Normalise `\n\n\n` triple newline (or more) to `\n\n` double newline.
|
|
88
|
+
.replace(/(\S)[^\S\n]+(?=\S)/g, "$1 ") // Normalise runs of whitespace in the middle of each line to one ` ` space.
|
|
89
|
+
.replace(/ {4}/g, "\t") // Normalise runs of ` ` four spaces to a single `\t` tab (this will only exist in indentation because we already stripped it in other places).
|
|
90
|
+
.replace(/(^|\n|\t) +/g, "$1"); // Remove runs of ` ` space in indentation (will only match three or fewer because four spaces have already been normalised to `\t` tab).
|
|
101
91
|
/**
|
|
102
92
|
* Simplify a string by removing anything that isn't a number, letter, or space.
|
|
103
93
|
* - Used when you're running a query against a string entered by a user.
|
|
@@ -106,8 +96,8 @@ export const sanitizeLines = (str) => str
|
|
|
106
96
|
*/
|
|
107
97
|
export const simplifyString = (str) => str
|
|
108
98
|
.normalize("NFD") // Convert ligatures (e.g. `ff`) and letters with marks (e.g. `ü`) to separate characters (e.g. `ff` and `u◌̈`)`.
|
|
109
|
-
.replace(
|
|
110
|
-
.replace(
|
|
99
|
+
.replace(/[\s\p{P}\p{S}\p{Z}]+/gu, " ") // Normalise word separators to ` ` space.
|
|
100
|
+
.replace(/[^\p{L}\p{N} ]+/gu, "") // Strip characters that aren't letters, numbers, spaces.
|
|
111
101
|
.trim()
|
|
112
102
|
.toLowerCase();
|
|
113
103
|
/**
|
|
@@ -117,7 +107,7 @@ export const simplifyString = (str) => str
|
|
|
117
107
|
*
|
|
118
108
|
* Note: this splits words based on spaces, so won't work well with logographic writing systems e.g. kanji.
|
|
119
109
|
*/
|
|
120
|
-
export const getSlug = (str) => simplifyString(str).replace(
|
|
110
|
+
export const getSlug = (str) => simplifyString(str).replace(/ /g, "-");
|
|
121
111
|
/**
|
|
122
112
|
* Return an array of the separate words and "quoted phrases" found in a string.
|
|
123
113
|
* - Phrases enclosed "in quotes" are a single word.
|
|
@@ -132,14 +122,14 @@ export const getWords = (str) => Array.from(yieldWords(str));
|
|
|
132
122
|
* Note: this splits words based on spaces, so won't work well with logographic writing systems e.g. kanji.
|
|
133
123
|
*/
|
|
134
124
|
export function* yieldWords(str) {
|
|
135
|
-
for (const [, word, phrase] of str.matchAll(
|
|
125
|
+
for (const [, word, phrase] of str.matchAll(WORD)) {
|
|
136
126
|
if (phrase)
|
|
137
127
|
yield phrase;
|
|
138
128
|
else if (word)
|
|
139
129
|
yield word;
|
|
140
130
|
}
|
|
141
131
|
}
|
|
142
|
-
const
|
|
132
|
+
const WORD = /([^\s"]+)|"([^"]*)"|'([^']*)'/g; // Runs of characters without spaces, or "quoted phrases"
|
|
143
133
|
/** Is the first character of a string an uppercase letter? */
|
|
144
134
|
export const isUppercaseLetter = (str) => isBetween(str.charCodeAt(0), 65, 90);
|
|
145
135
|
/** Is the first character of a string a lowercase letter? */
|