@yozora/tokenizer-emphasis 2.1.2 → 2.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -6
- package/src/index.ts +0 -9
- package/src/match.ts +0 -278
- package/src/parse.ts +0 -16
- package/src/tokenizer.ts +0 -32
- package/src/types.ts +0 -33
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yozora/tokenizer-emphasis",
|
|
3
|
-
"version": "2.1.
|
|
3
|
+
"version": "2.1.4",
|
|
4
4
|
"author": {
|
|
5
5
|
"name": "guanghechen",
|
|
6
6
|
"url": "https://github.com/guanghechen/"
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
},
|
|
28
28
|
"files": [
|
|
29
29
|
"lib/",
|
|
30
|
-
"
|
|
30
|
+
"lib/**/*.map",
|
|
31
31
|
"package.json",
|
|
32
32
|
"CHANGELOG.md",
|
|
33
33
|
"LICENSE",
|
|
@@ -39,9 +39,9 @@
|
|
|
39
39
|
"test": "cross-env TS_NODE_FILES=true NODE_OPTIONS=--experimental-vm-modules jest --config ../../jest.config.mjs --rootDir ."
|
|
40
40
|
},
|
|
41
41
|
"dependencies": {
|
|
42
|
-
"@yozora/ast": "^2.1.
|
|
43
|
-
"@yozora/character": "^2.1.
|
|
44
|
-
"@yozora/core-tokenizer": "^2.1.
|
|
42
|
+
"@yozora/ast": "^2.1.4",
|
|
43
|
+
"@yozora/character": "^2.1.4",
|
|
44
|
+
"@yozora/core-tokenizer": "^2.1.4"
|
|
45
45
|
},
|
|
46
|
-
"gitHead": "
|
|
46
|
+
"gitHead": "aa464ed1e3cd84892773a833910cfc53a556bf5f"
|
|
47
47
|
}
|
package/src/index.ts
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
export { match as emphasisMatch } from './match'
|
|
2
|
-
export { parse as emphasisParse } from './parse'
|
|
3
|
-
export { EmphasisTokenizer, EmphasisTokenizer as default } from './tokenizer'
|
|
4
|
-
export { uniqueName as EmphasisTokenizerName } from './types'
|
|
5
|
-
export type {
|
|
6
|
-
IThis as IEmphasisHookContext,
|
|
7
|
-
IToken as IEmphasisToken,
|
|
8
|
-
ITokenizerProps as IEmphasisTokenizerProps,
|
|
9
|
-
} from './types'
|
package/src/match.ts
DELETED
|
@@ -1,278 +0,0 @@
|
|
|
1
|
-
import { EmphasisType, StrongType } from '@yozora/ast'
|
|
2
|
-
import type { INodePoint } from '@yozora/character'
|
|
3
|
-
import {
|
|
4
|
-
AsciiCodePoint,
|
|
5
|
-
isPunctuationCharacter,
|
|
6
|
-
isUnicodeWhitespaceCharacter,
|
|
7
|
-
} from '@yozora/character'
|
|
8
|
-
import type {
|
|
9
|
-
IInlineToken,
|
|
10
|
-
IMatchInlineHookCreator,
|
|
11
|
-
IResultOfIsDelimiterPair,
|
|
12
|
-
IResultOfProcessDelimiterPair,
|
|
13
|
-
} from '@yozora/core-tokenizer'
|
|
14
|
-
import { eatOptionalCharacters, genFindDelimiter } from '@yozora/core-tokenizer'
|
|
15
|
-
import type { IDelimiter, IThis, IToken, T } from './types'
|
|
16
|
-
|
|
17
|
-
/**
|
|
18
|
-
* @see https://github.com/syntax-tree/mdast#strong
|
|
19
|
-
* @see https://github.github.com/gfm/#emphasis-and-strong-emphasis
|
|
20
|
-
*/
|
|
21
|
-
export const match: IMatchInlineHookCreator<T, IDelimiter, IToken, IThis> = function (api) {
|
|
22
|
-
return {
|
|
23
|
-
findDelimiter: () => genFindDelimiter<IDelimiter>(_findDelimiter),
|
|
24
|
-
isDelimiterPair,
|
|
25
|
-
processDelimiterPair,
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
function _findDelimiter(startIndex: number, endIndex: number): IDelimiter | null {
|
|
29
|
-
const nodePoints: ReadonlyArray<INodePoint> = api.getNodePoints()
|
|
30
|
-
const blockStartIndex: number = api.getBlockStartIndex()
|
|
31
|
-
const blockEndIndex: number = api.getBlockEndIndex()
|
|
32
|
-
|
|
33
|
-
/**
|
|
34
|
-
* Check if it is a opener delimiter.
|
|
35
|
-
* @see https://github.github.com/gfm/#left-flanking-delimiter-run
|
|
36
|
-
*/
|
|
37
|
-
const isOpenerDelimiter = (delimiterStartIndex: number, delimiterEndIndex: number): boolean => {
|
|
38
|
-
if (delimiterEndIndex === blockEndIndex) return false
|
|
39
|
-
if (delimiterEndIndex === endIndex) return true
|
|
40
|
-
|
|
41
|
-
// Left-flanking delimiter should not followed by Unicode whitespace
|
|
42
|
-
const nextCodePosition = nodePoints[delimiterEndIndex]
|
|
43
|
-
if (isUnicodeWhitespaceCharacter(nextCodePosition.codePoint)) return false
|
|
44
|
-
|
|
45
|
-
// Left-flanking delimiter should not followed by a punctuation character
|
|
46
|
-
if (!isPunctuationCharacter(nextCodePosition.codePoint)) return true
|
|
47
|
-
|
|
48
|
-
// Or followed by a punctuation character and preceded
|
|
49
|
-
// by Unicode whitespace or a punctuation character
|
|
50
|
-
if (delimiterStartIndex <= startIndex) return true
|
|
51
|
-
const prevCodePosition = nodePoints[delimiterStartIndex - 1]
|
|
52
|
-
return (
|
|
53
|
-
isUnicodeWhitespaceCharacter(prevCodePosition.codePoint) ||
|
|
54
|
-
isPunctuationCharacter(prevCodePosition.codePoint)
|
|
55
|
-
)
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
/**
|
|
59
|
-
* Check if it is a closer delimiter.
|
|
60
|
-
* @see https://github.github.com/gfm/#right-flanking-delimiter-run
|
|
61
|
-
*/
|
|
62
|
-
const isCloserDelimiter = (delimiterStartIndex: number, delimiterEndIndex: number): boolean => {
|
|
63
|
-
if (delimiterStartIndex === blockStartIndex) return false
|
|
64
|
-
if (delimiterStartIndex === startIndex) return true
|
|
65
|
-
|
|
66
|
-
// Right-flanking delimiter should not preceded by Unicode whitespace.
|
|
67
|
-
const prevCodePosition = nodePoints[delimiterStartIndex - 1]
|
|
68
|
-
if (isUnicodeWhitespaceCharacter(prevCodePosition.codePoint)) return false
|
|
69
|
-
|
|
70
|
-
// Right-flanking delimiter should not preceded by a punctuation character
|
|
71
|
-
if (!isPunctuationCharacter(prevCodePosition.codePoint)) return true
|
|
72
|
-
|
|
73
|
-
// Or preceded by a punctuation character and followed
|
|
74
|
-
// by Unicode whitespace or a punctuation character
|
|
75
|
-
if (delimiterEndIndex >= endIndex) return true
|
|
76
|
-
const nextCodePosition = nodePoints[delimiterEndIndex]
|
|
77
|
-
return (
|
|
78
|
-
isUnicodeWhitespaceCharacter(nextCodePosition.codePoint) ||
|
|
79
|
-
isPunctuationCharacter(nextCodePosition.codePoint)
|
|
80
|
-
)
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
for (let i = startIndex; i < endIndex; ++i) {
|
|
84
|
-
const c = nodePoints[i].codePoint
|
|
85
|
-
switch (c) {
|
|
86
|
-
case AsciiCodePoint.BACKSLASH:
|
|
87
|
-
i += 1
|
|
88
|
-
break
|
|
89
|
-
/**
|
|
90
|
-
* Rule #1: A single <i>*</i> character can open emphasis iff (if and
|
|
91
|
-
* only if) it is part of a left-flanking delimiter run.
|
|
92
|
-
* Rule #5: (..omit..)
|
|
93
|
-
* @see https://github.github.com/gfm/#example-360
|
|
94
|
-
*
|
|
95
|
-
* Rule #3: A single <i>*</i> character can close emphasis iff it is
|
|
96
|
-
* part of a right-flanking delimiter run.
|
|
97
|
-
* Rule #7: (..omit..)
|
|
98
|
-
* @see https://github.github.com/gfm/#example-366
|
|
99
|
-
*
|
|
100
|
-
* @see https://github.github.com/gfm/#can-open-emphasis
|
|
101
|
-
*/
|
|
102
|
-
case AsciiCodePoint.ASTERISK:
|
|
103
|
-
case AsciiCodePoint.UNDERSCORE: {
|
|
104
|
-
const _startIndex = i
|
|
105
|
-
|
|
106
|
-
// matched as many asterisk/underscore as possible
|
|
107
|
-
i = eatOptionalCharacters(nodePoints, i + 1, endIndex, c) - 1
|
|
108
|
-
|
|
109
|
-
const _endIndex = i + 1
|
|
110
|
-
const isLeftFlankingDelimiterRun = isOpenerDelimiter(_startIndex, _endIndex)
|
|
111
|
-
const isRightFlankingDelimiterRun = isCloserDelimiter(_startIndex, _endIndex)
|
|
112
|
-
|
|
113
|
-
let isOpener = isLeftFlankingDelimiterRun
|
|
114
|
-
let isCloser = isRightFlankingDelimiterRun
|
|
115
|
-
|
|
116
|
-
/**
|
|
117
|
-
* Rule #2: A single <i>_</i> character can open emphasis iff it is
|
|
118
|
-
* part of a left-flanking delimiter run and either:
|
|
119
|
-
* (a) not part of a right-flanking delimiter run, or
|
|
120
|
-
* (b) part of a right-flanking delimiter run preceded
|
|
121
|
-
* by punctuation.
|
|
122
|
-
* Rule #6: (..omit..)
|
|
123
|
-
* @see https://github.github.com/gfm/#example-367
|
|
124
|
-
* @see https://github.github.com/gfm/#example-368
|
|
125
|
-
* @see https://github.github.com/gfm/#example-369
|
|
126
|
-
* @see https://github.github.com/gfm/#example-370
|
|
127
|
-
* @see https://github.github.com/gfm/#example-373
|
|
128
|
-
*
|
|
129
|
-
* Rule #4: A single <i>_</i> character can open emphasis iff it is
|
|
130
|
-
* part of a right-flanking delimiter run and either:
|
|
131
|
-
* (a) not part of a left-flanking delimiter run, or
|
|
132
|
-
* (b) part of a left-flanking delimiter run followed
|
|
133
|
-
* by punctuation.
|
|
134
|
-
* Rule #8: (..omit..)
|
|
135
|
-
* @see https://github.github.com/gfm/#example-380
|
|
136
|
-
* @see https://github.github.com/gfm/#example-381
|
|
137
|
-
* @see https://github.github.com/gfm/#example-382
|
|
138
|
-
* @see https://github.github.com/gfm/#example-383
|
|
139
|
-
* @see https://github.github.com/gfm/#example-385
|
|
140
|
-
*/
|
|
141
|
-
if (c === AsciiCodePoint.UNDERSCORE) {
|
|
142
|
-
if (isLeftFlankingDelimiterRun && isRightFlankingDelimiterRun) {
|
|
143
|
-
// Rule #2
|
|
144
|
-
if (
|
|
145
|
-
_startIndex > startIndex &&
|
|
146
|
-
!isPunctuationCharacter(nodePoints[_startIndex - 1].codePoint)
|
|
147
|
-
) {
|
|
148
|
-
isOpener = false
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
// Rule #4
|
|
152
|
-
const nextCodePosition = nodePoints[_endIndex]
|
|
153
|
-
if (!isPunctuationCharacter(nextCodePosition.codePoint)) {
|
|
154
|
-
isCloser = false
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
if (!isOpener && !isCloser) break
|
|
160
|
-
const thickness = _endIndex - _startIndex
|
|
161
|
-
return {
|
|
162
|
-
type: isOpener ? (isCloser ? 'both' : 'opener') : 'closer',
|
|
163
|
-
startIndex: _startIndex,
|
|
164
|
-
endIndex: _endIndex,
|
|
165
|
-
thickness,
|
|
166
|
-
originalThickness: thickness,
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
return null
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
function isDelimiterPair(
|
|
175
|
-
openerDelimiter: IDelimiter,
|
|
176
|
-
closerDelimiter: IDelimiter,
|
|
177
|
-
): IResultOfIsDelimiterPair {
|
|
178
|
-
const nodePoints: ReadonlyArray<INodePoint> = api.getNodePoints()
|
|
179
|
-
|
|
180
|
-
/**
|
|
181
|
-
* Rule #9: INode begins with a delimiter that can open emphasis
|
|
182
|
-
* and ends with a delimiter that can close emphasis, and that
|
|
183
|
-
* uses the same character (_ or *) as the opening delimiter.
|
|
184
|
-
* The opening and closing delimiters must belong to separate
|
|
185
|
-
* delimiter runs.
|
|
186
|
-
* If one of the delimiters can both open and close emphasis,
|
|
187
|
-
* then the sum of the lengths of the delimiter runs containing
|
|
188
|
-
* the opening and closing delimiters must not be a multiple
|
|
189
|
-
* of 3 unless both lengths are multiples of 3.
|
|
190
|
-
* Rule #10: (..omit..)
|
|
191
|
-
* @see https://github.github.com/gfm/#example-413
|
|
192
|
-
* @see https://github.github.com/gfm/#example-42
|
|
193
|
-
*/
|
|
194
|
-
if (
|
|
195
|
-
nodePoints[openerDelimiter.startIndex].codePoint !==
|
|
196
|
-
nodePoints[closerDelimiter.startIndex].codePoint ||
|
|
197
|
-
((openerDelimiter.type === 'both' || closerDelimiter.type === 'both') &&
|
|
198
|
-
(openerDelimiter.originalThickness + closerDelimiter.originalThickness) % 3 === 0 &&
|
|
199
|
-
openerDelimiter.originalThickness % 3 !== 0)
|
|
200
|
-
) {
|
|
201
|
-
return { paired: false, opener: true, closer: true }
|
|
202
|
-
}
|
|
203
|
-
return { paired: true }
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
function processDelimiterPair(
|
|
207
|
-
openerDelimiter: IDelimiter,
|
|
208
|
-
closerDelimiter: IDelimiter,
|
|
209
|
-
internalTokens: ReadonlyArray<IInlineToken>,
|
|
210
|
-
): IResultOfProcessDelimiterPair<T, IToken, IDelimiter> {
|
|
211
|
-
/**
|
|
212
|
-
* Rule #13: The number of nestings should be minimized. Thus, for example,
|
|
213
|
-
* an interpretation '<strong>...</strong>' is always preferred
|
|
214
|
-
* to '<em><em>...</em></em>'.
|
|
215
|
-
* @see https://github.github.com/gfm/#example-469
|
|
216
|
-
* @see https://github.github.com/gfm/#example-470
|
|
217
|
-
* @see https://github.github.com/gfm/#example-473
|
|
218
|
-
* @see https://github.github.com/gfm/#example-475
|
|
219
|
-
*
|
|
220
|
-
* Rule #14: An interpretation '<em><strong>...</strong></em>' is always
|
|
221
|
-
* preferred to '<strong><em>...</em></strong>'
|
|
222
|
-
* @see https://github.github.com/gfm/#example-476
|
|
223
|
-
* @see https://github.github.com/gfm/#example-477
|
|
224
|
-
*
|
|
225
|
-
* Rule #16: When there are two potential emphasis or strong emphasis
|
|
226
|
-
* spans with the same closing delimiter, the shorter one (the
|
|
227
|
-
* one that opens later) takes precedence. Thus, for example,
|
|
228
|
-
* **foo **bar baz** is parsed as **foo <strong>bar baz</strong>
|
|
229
|
-
* rather than <strong>foo **bar baz</strong>.
|
|
230
|
-
* @see https://github.github.com/gfm/#example-480
|
|
231
|
-
* @see https://github.github.com/gfm/#example-481
|
|
232
|
-
*/
|
|
233
|
-
let thickness = 1
|
|
234
|
-
if (openerDelimiter.thickness > 1 && closerDelimiter.thickness > 1) {
|
|
235
|
-
thickness = 2
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
// eslint-disable-next-line no-param-reassign
|
|
239
|
-
internalTokens = api.resolveInternalTokens(
|
|
240
|
-
internalTokens,
|
|
241
|
-
openerDelimiter.endIndex,
|
|
242
|
-
closerDelimiter.startIndex,
|
|
243
|
-
)
|
|
244
|
-
|
|
245
|
-
const token: IToken = {
|
|
246
|
-
nodeType: thickness === 1 ? EmphasisType : StrongType,
|
|
247
|
-
startIndex: openerDelimiter.endIndex - thickness,
|
|
248
|
-
endIndex: closerDelimiter.startIndex + thickness,
|
|
249
|
-
thickness,
|
|
250
|
-
children: internalTokens,
|
|
251
|
-
}
|
|
252
|
-
const remainOpenerDelimiter: IDelimiter | undefined =
|
|
253
|
-
openerDelimiter.thickness > thickness
|
|
254
|
-
? {
|
|
255
|
-
type: openerDelimiter.type,
|
|
256
|
-
startIndex: openerDelimiter.startIndex,
|
|
257
|
-
endIndex: openerDelimiter.endIndex - thickness,
|
|
258
|
-
thickness: openerDelimiter.thickness - thickness,
|
|
259
|
-
originalThickness: openerDelimiter.originalThickness,
|
|
260
|
-
}
|
|
261
|
-
: undefined
|
|
262
|
-
const remainCloserDelimiter: IDelimiter | undefined =
|
|
263
|
-
closerDelimiter.thickness > thickness
|
|
264
|
-
? {
|
|
265
|
-
type: closerDelimiter.type,
|
|
266
|
-
startIndex: closerDelimiter.startIndex + thickness,
|
|
267
|
-
endIndex: closerDelimiter.endIndex,
|
|
268
|
-
thickness: closerDelimiter.thickness - thickness,
|
|
269
|
-
originalThickness: closerDelimiter.originalThickness,
|
|
270
|
-
}
|
|
271
|
-
: undefined
|
|
272
|
-
return {
|
|
273
|
-
tokens: [token],
|
|
274
|
-
remainOpenerDelimiter,
|
|
275
|
-
remainCloserDelimiter,
|
|
276
|
-
}
|
|
277
|
-
}
|
|
278
|
-
}
|
package/src/parse.ts
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import type { Node } from '@yozora/ast'
|
|
2
|
-
import type { IParseInlineHookCreator } from '@yozora/core-tokenizer'
|
|
3
|
-
import type { INode, IThis, IToken, T } from './types'
|
|
4
|
-
|
|
5
|
-
export const parse: IParseInlineHookCreator<T, IToken, INode, IThis> = function (api) {
|
|
6
|
-
return {
|
|
7
|
-
parse: tokens =>
|
|
8
|
-
tokens.map(token => {
|
|
9
|
-
const children: Node[] = api.parseInlineTokens(token.children)
|
|
10
|
-
const node: INode = api.shouldReservePosition
|
|
11
|
-
? { type: token.nodeType, position: api.calcPosition(token), children }
|
|
12
|
-
: { type: token.nodeType, children }
|
|
13
|
-
return node
|
|
14
|
-
}),
|
|
15
|
-
}
|
|
16
|
-
}
|
package/src/tokenizer.ts
DELETED
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
IInlineTokenizer,
|
|
3
|
-
IMatchInlineHookCreator,
|
|
4
|
-
IParseInlineHookCreator,
|
|
5
|
-
} from '@yozora/core-tokenizer'
|
|
6
|
-
import { BaseInlineTokenizer, TokenizerPriority } from '@yozora/core-tokenizer'
|
|
7
|
-
import { match } from './match'
|
|
8
|
-
import { parse } from './parse'
|
|
9
|
-
import type { IDelimiter, INode, IThis, IToken, ITokenizerProps, T } from './types'
|
|
10
|
-
import { uniqueName } from './types'
|
|
11
|
-
|
|
12
|
-
/**
|
|
13
|
-
* Lexical Analyzer for Emphasis and Strong Emphasis.
|
|
14
|
-
* @see https://github.com/syntax-tree/mdast#strong
|
|
15
|
-
* @see https://github.github.com/gfm/#emphasis-and-strong-emphasis
|
|
16
|
-
*/
|
|
17
|
-
export class EmphasisTokenizer
|
|
18
|
-
extends BaseInlineTokenizer<T, IDelimiter, IToken, INode, IThis>
|
|
19
|
-
implements IInlineTokenizer<T, IDelimiter, IToken, INode, IThis>
|
|
20
|
-
{
|
|
21
|
-
/* istanbul ignore next */
|
|
22
|
-
constructor(props: ITokenizerProps = {}) {
|
|
23
|
-
super({
|
|
24
|
-
name: props.name ?? uniqueName,
|
|
25
|
-
priority: props.priority ?? TokenizerPriority.CONTAINING_INLINE,
|
|
26
|
-
})
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
public override readonly match: IMatchInlineHookCreator<T, IDelimiter, IToken, IThis> = match
|
|
30
|
-
|
|
31
|
-
public override readonly parse: IParseInlineHookCreator<T, IToken, INode, IThis> = parse
|
|
32
|
-
}
|
package/src/types.ts
DELETED
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
import type { Emphasis, EmphasisType, Strong, StrongType } from '@yozora/ast'
|
|
2
|
-
import type {
|
|
3
|
-
IBaseInlineTokenizerProps,
|
|
4
|
-
IPartialInlineToken,
|
|
5
|
-
ITokenDelimiter,
|
|
6
|
-
ITokenizer,
|
|
7
|
-
} from '@yozora/core-tokenizer'
|
|
8
|
-
|
|
9
|
-
export type T = EmphasisType | StrongType
|
|
10
|
-
export type INode = Emphasis | Strong
|
|
11
|
-
export const uniqueName = '@yozora/tokenizer-emphasis'
|
|
12
|
-
|
|
13
|
-
export interface IToken extends IPartialInlineToken<T> {
|
|
14
|
-
/**
|
|
15
|
-
* IDelimiter thickness.
|
|
16
|
-
*/
|
|
17
|
-
thickness: number
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
export interface IDelimiter extends ITokenDelimiter {
|
|
21
|
-
/**
|
|
22
|
-
* Thickness of the delimiter.
|
|
23
|
-
*/
|
|
24
|
-
thickness: number
|
|
25
|
-
/**
|
|
26
|
-
* The original thickness of the delimiter.
|
|
27
|
-
*/
|
|
28
|
-
originalThickness: number
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
export type IThis = ITokenizer
|
|
32
|
-
|
|
33
|
-
export type ITokenizerProps = Partial<IBaseInlineTokenizerProps>
|