@yozora/tokenizer-definition 2.0.4 → 2.0.5-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -297,7 +297,6 @@ const match = function (api) {
297
297
  return { token, nextIndex: endIndex };
298
298
  }
299
299
  function eatContinuationText(line, token) {
300
- var _a;
301
300
  if (token.title != null && token.title.saturated)
302
301
  return { status: 'notMatched' };
303
302
  const { nodePoints, startIndex, firstNonWhitespaceIndex, endIndex } = line;
@@ -357,7 +356,7 @@ const match = function (api) {
357
356
  };
358
357
  }
359
358
  token.lines.push(line);
360
- const saturated = (_a = token.title) === null || _a === void 0 ? void 0 : _a.saturated;
359
+ const saturated = token.title?.saturated;
361
360
  return { status: saturated ? 'closing' : 'opening', nextIndex: endIndex };
362
361
  }
363
362
  function onClose(token) {
@@ -415,19 +414,18 @@ const uniqueName = '@yozora/tokenizer-definition';
415
414
 
416
415
  class DefinitionTokenizer extends coreTokenizer.BaseBlockTokenizer {
417
416
  constructor(props = {}) {
418
- var _a, _b;
419
417
  super({
420
- name: (_a = props.name) !== null && _a !== void 0 ? _a : uniqueName,
421
- priority: (_b = props.priority) !== null && _b !== void 0 ? _b : coreTokenizer.TokenizerPriority.ATOMIC,
418
+ name: props.name ?? uniqueName,
419
+ priority: props.priority ?? coreTokenizer.TokenizerPriority.ATOMIC,
422
420
  });
423
- this.match = match;
424
- this.parse = parse;
425
421
  }
422
+ match = match;
423
+ parse = parse;
426
424
  }
427
425
 
428
426
  exports.DefinitionTokenizer = DefinitionTokenizer;
429
427
  exports.DefinitionTokenizerName = uniqueName;
430
- exports["default"] = DefinitionTokenizer;
428
+ exports.default = DefinitionTokenizer;
431
429
  exports.definitionMatch = match;
432
430
  exports.definitionParse = parse;
433
431
  exports.eatAndCollectLinkDestination = eatAndCollectLinkDestination;
@@ -293,7 +293,6 @@ const match = function (api) {
293
293
  return { token, nextIndex: endIndex };
294
294
  }
295
295
  function eatContinuationText(line, token) {
296
- var _a;
297
296
  if (token.title != null && token.title.saturated)
298
297
  return { status: 'notMatched' };
299
298
  const { nodePoints, startIndex, firstNonWhitespaceIndex, endIndex } = line;
@@ -353,7 +352,7 @@ const match = function (api) {
353
352
  };
354
353
  }
355
354
  token.lines.push(line);
356
- const saturated = (_a = token.title) === null || _a === void 0 ? void 0 : _a.saturated;
355
+ const saturated = token.title?.saturated;
357
356
  return { status: saturated ? 'closing' : 'opening', nextIndex: endIndex };
358
357
  }
359
358
  function onClose(token) {
@@ -411,14 +410,13 @@ const uniqueName = '@yozora/tokenizer-definition';
411
410
 
412
411
  class DefinitionTokenizer extends BaseBlockTokenizer {
413
412
  constructor(props = {}) {
414
- var _a, _b;
415
413
  super({
416
- name: (_a = props.name) !== null && _a !== void 0 ? _a : uniqueName,
417
- priority: (_b = props.priority) !== null && _b !== void 0 ? _b : TokenizerPriority.ATOMIC,
414
+ name: props.name ?? uniqueName,
415
+ priority: props.priority ?? TokenizerPriority.ATOMIC,
418
416
  });
419
- this.match = match;
420
- this.parse = parse;
421
417
  }
418
+ match = match;
419
+ parse = parse;
422
420
  }
423
421
 
424
422
  export { DefinitionTokenizer, uniqueName as DefinitionTokenizerName, DefinitionTokenizer as default, match as definitionMatch, parse as definitionParse, eatAndCollectLinkDestination, eatAndCollectLinkLabel, eatAndCollectLinkTitle };
@@ -116,8 +116,8 @@ declare function eatAndCollectLinkTitle(nodePoints: ReadonlyArray<INodePoint>, s
116
116
  state: ILinkTitleCollectingState;
117
117
  };
118
118
 
119
- declare type T = DefinitionType;
120
- declare type INode = Definition;
119
+ type T = DefinitionType;
120
+ type INode = Definition;
121
121
  declare const uniqueName = "@yozora/tokenizer-definition";
122
122
  interface IToken extends IPartialYastBlockToken<T> {
123
123
  /**
@@ -158,8 +158,8 @@ interface IToken extends IPartialYastBlockToken<T> {
158
158
  */
159
159
  _identifier?: string;
160
160
  }
161
- declare type IThis = ITokenizer;
162
- declare type ITokenizerProps = Partial<IBaseBlockTokenizerProps>;
161
+ type IThis = ITokenizer;
162
+ type ITokenizerProps = Partial<IBaseBlockTokenizerProps>;
163
163
 
164
164
  /**
165
165
  * A link reference definition consists of a link label, indented up to three
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yozora/tokenizer-definition",
3
- "version": "2.0.4",
3
+ "version": "2.0.5-alpha.0",
4
4
  "author": {
5
5
  "name": "guanghechen",
6
6
  "url": "https://github.com/guanghechen/"
@@ -11,33 +11,37 @@
11
11
  "directory": "tokenizers/definition"
12
12
  },
13
13
  "homepage": "https://github.com/yozorajs/yozora/tree/release-2.x.x/tokenizers/definition",
14
- "main": "lib/cjs/index.js",
15
- "module": "lib/esm/index.js",
16
- "types": "lib/types/index.d.ts",
17
- "source": "src/index.ts",
14
+ "type": "module",
15
+ "exports": {
16
+ "types": "./lib/types/index.d.ts",
17
+ "import": "./lib/esm/index.mjs",
18
+ "require": "./lib/cjs/index.cjs"
19
+ },
20
+ "source": "./src/index.ts",
21
+ "types": "./lib/types/index.d.ts",
22
+ "main": "./lib/cjs/index.cjs",
23
+ "module": "./lib/esm/index.mjs",
18
24
  "license": "MIT",
19
25
  "engines": {
20
26
  "node": ">= 16.0.0"
21
27
  },
22
28
  "files": [
23
29
  "lib/",
24
- "!lib/**/*.js.map",
25
- "!lib/**/*.d.ts.map",
30
+ "src/",
26
31
  "package.json",
27
32
  "CHANGELOG.md",
28
33
  "LICENSE",
29
34
  "README.md"
30
35
  ],
31
36
  "scripts": {
32
- "build": "cross-env NODE_ENV=production rollup -c ../../rollup.config.js",
33
- "prebuild": "rimraf lib/",
37
+ "build": "rimraf lib/ && cross-env NODE_ENV=production rollup -c ../../rollup.config.mjs",
34
38
  "prepublishOnly": "cross-env ROLLUP_SHOULD_SOURCEMAP=false yarn build",
35
- "test": "cross-env TS_NODE_FILES=true jest --config ../../jest.config.js --rootDir ."
39
+ "test": "cross-env TS_NODE_FILES=true NODE_OPTIONS=--experimental-vm-modules jest --config ../../jest.config.mjs --rootDir ."
36
40
  },
37
41
  "dependencies": {
38
- "@yozora/ast": "^2.0.4",
39
- "@yozora/character": "^2.0.4",
40
- "@yozora/core-tokenizer": "^2.0.4"
42
+ "@yozora/ast": "^2.0.5-alpha.0",
43
+ "@yozora/character": "^2.0.5-alpha.0",
44
+ "@yozora/core-tokenizer": "^2.0.5-alpha.0"
41
45
  },
42
- "gitHead": "c980b95254394dcacba0cbb4bea251350b09397c"
46
+ "gitHead": "8bf941fe4ef82947165b0f3cc123cd493665e13b"
43
47
  }
package/src/index.ts ADDED
@@ -0,0 +1,12 @@
1
+ export * from './util/link-destination'
2
+ export * from './util/link-label'
3
+ export * from './util/link-title'
4
+ export { match as definitionMatch } from './match'
5
+ export { parse as definitionParse } from './parse'
6
+ export { DefinitionTokenizer, DefinitionTokenizer as default } from './tokenizer'
7
+ export { uniqueName as DefinitionTokenizerName } from './types'
8
+ export type {
9
+ IThis as IDefinitionHookContext,
10
+ IToken as IDefinitionToken,
11
+ ITokenizerProps as IDefinitionTokenizerProps,
12
+ } from './types'
package/src/match.ts ADDED
@@ -0,0 +1,340 @@
1
+ import { DefinitionType } from '@yozora/ast'
2
+ import type { INodePoint } from '@yozora/character'
3
+ import { AsciiCodePoint, calcStringFromNodePoints } from '@yozora/character'
4
+ import type {
5
+ IMatchBlockHookCreator,
6
+ IPhrasingContentLine,
7
+ IResultOfEatContinuationText,
8
+ IResultOfEatOpener,
9
+ IResultOfOnClose,
10
+ } from '@yozora/core-tokenizer'
11
+ import {
12
+ calcEndPoint,
13
+ calcStartPoint,
14
+ eatOptionalWhitespaces,
15
+ resolveLabelToIdentifier,
16
+ } from '@yozora/core-tokenizer'
17
+ import type { IThis, IToken, T } from './types'
18
+ import { eatAndCollectLinkDestination } from './util/link-destination'
19
+ import { eatAndCollectLinkLabel } from './util/link-label'
20
+ import { eatAndCollectLinkTitle } from './util/link-title'
21
+
22
+ /**
23
+ * A link reference definition consists of a link label, indented up to three
24
+ * spaces, followed by a colon (:), optional whitespace (including up to one
25
+ * line ending), a link destination, optional whitespace (including up to one
26
+ * line ending), and an optional link title, which if it is present must be
27
+ * separated from the link destination by whitespace. No further non-whitespace
28
+ * characters may occur on the line.
29
+ *
30
+ * A link reference definition does not correspond to a structural element of
31
+ * a document. Instead, it defines a label which can be used in reference
32
+ * links and reference-style images elsewhere in the document. Link reference
33
+ * definitions can come either before or after the links that use them.
34
+ *
35
+ * @see https://github.github.com/gfm/#link-reference-definition
36
+ */
37
+ export const match: IMatchBlockHookCreator<T, IToken, IThis> = function (api) {
38
+ return {
39
+ isContainingBlock: false,
40
+ eatOpener,
41
+ eatContinuationText,
42
+ onClose,
43
+ }
44
+
45
+ function eatOpener(line: Readonly<IPhrasingContentLine>): IResultOfEatOpener<T, IToken> {
46
+ /**
47
+ * Four spaces are too much
48
+ * @see https://github.github.com/gfm/#example-180
49
+ */
50
+ if (line.countOfPrecedeSpaces >= 4) return null
51
+
52
+ const { nodePoints, startIndex, endIndex, firstNonWhitespaceIndex } = line
53
+ if (firstNonWhitespaceIndex >= endIndex) return null
54
+
55
+ // Try to match link label
56
+ let i = firstNonWhitespaceIndex
57
+ const { nextIndex: labelEndIndex, state: labelState } = eatAndCollectLinkLabel(
58
+ nodePoints,
59
+ i,
60
+ endIndex,
61
+ null,
62
+ )
63
+ if (labelEndIndex < 0) return null
64
+
65
+ const lineNo = nodePoints[startIndex].line
66
+
67
+ // Optimization: lazy calculation
68
+ const createInitState = (): IToken => {
69
+ const token: IToken = {
70
+ nodeType: DefinitionType,
71
+ position: {
72
+ start: calcStartPoint(nodePoints, startIndex),
73
+ end: calcEndPoint(nodePoints, endIndex - 1),
74
+ },
75
+ label: labelState,
76
+ destination: null,
77
+ title: null,
78
+ lineNoOfLabel: lineNo,
79
+ lineNoOfDestination: -1,
80
+ lineNoOfTitle: -1,
81
+ lines: [line],
82
+ }
83
+ return token
84
+ }
85
+
86
+ if (!labelState.saturated) {
87
+ const token = createInitState()
88
+ return { token, nextIndex: endIndex }
89
+ }
90
+
91
+ // Saturated but no following colon exists.
92
+ if (
93
+ labelEndIndex < 0 ||
94
+ labelEndIndex + 1 >= endIndex ||
95
+ nodePoints[labelEndIndex].codePoint !== AsciiCodePoint.COLON
96
+ )
97
+ return null
98
+
99
+ /**
100
+ * At most one line break can be used between link destination and link label
101
+ * @see https://github.github.com/gfm/#example-162
102
+ * @see https://github.github.com/gfm/#example-164
103
+ * @see https://github.github.com/gfm/#link-reference-definition
104
+ */
105
+ i = eatOptionalWhitespaces(nodePoints, labelEndIndex + 1, endIndex)
106
+ if (i >= endIndex) {
107
+ const token = createInitState()
108
+ return { token, nextIndex: endIndex }
109
+ }
110
+
111
+ // Try to match link destination
112
+ const { nextIndex: destinationEndIndex, state: destinationState } =
113
+ eatAndCollectLinkDestination(nodePoints, i, endIndex, null)
114
+
115
+ /**
116
+ * The link destination may not be omitted
117
+ * @see https://github.github.com/gfm/#example-168
118
+ */
119
+ if (destinationEndIndex < 0) return null
120
+
121
+ // Link destination not saturated
122
+ if (!destinationState.saturated && destinationEndIndex !== endIndex) return null
123
+
124
+ /**
125
+ * At most one line break can be used between link title and link destination
126
+ * @see https://github.github.com/gfm/#example-162
127
+ * @see https://github.github.com/gfm/#example-164
128
+ * @see https://github.github.com/gfm/#link-reference-definition
129
+ */
130
+ i = eatOptionalWhitespaces(nodePoints, destinationEndIndex, endIndex)
131
+ if (i >= endIndex) {
132
+ const token = createInitState()
133
+ token.destination = destinationState
134
+ token.lineNoOfDestination = lineNo
135
+ return { token, nextIndex: endIndex }
136
+ }
137
+
138
+ /**
139
+ * The title must be separated from the link destination by whitespace.
140
+ * @see https://github.github.com/gfm/#example-170
141
+ */
142
+ if (i === destinationEndIndex) return null
143
+
144
+ // Try to match link-title
145
+ const { nextIndex: titleEndIndex, state: titleState } = eatAndCollectLinkTitle(
146
+ nodePoints,
147
+ i,
148
+ endIndex,
149
+ null,
150
+ )
151
+
152
+ /**
153
+ * non-whitespace characters after title is not allowed
154
+ * @see https://github.github.com/gfm/#example-178
155
+ */
156
+ if (titleEndIndex >= 0) i = titleEndIndex
157
+
158
+ if (i < endIndex) {
159
+ const k = eatOptionalWhitespaces(nodePoints, i, endIndex)
160
+ if (k < endIndex) return null
161
+ }
162
+
163
+ const token = createInitState()
164
+ token.destination = destinationState
165
+ token.title = titleState
166
+ token.lineNoOfDestination = lineNo
167
+ token.lineNoOfTitle = lineNo
168
+ return { token, nextIndex: endIndex }
169
+ }
170
+
171
+ function eatContinuationText(
172
+ line: Readonly<IPhrasingContentLine>,
173
+ token: IToken,
174
+ ): IResultOfEatContinuationText {
175
+ // All parts of Definition have been matched
176
+ if (token.title != null && token.title.saturated) return { status: 'notMatched' }
177
+
178
+ const { nodePoints, startIndex, firstNonWhitespaceIndex, endIndex } = line
179
+ const lineNo = nodePoints[startIndex].line
180
+
181
+ let i = firstNonWhitespaceIndex
182
+ if (!token.label.saturated) {
183
+ const { nextIndex: labelEndIndex, state: labelState } = eatAndCollectLinkLabel(
184
+ nodePoints,
185
+ i,
186
+ endIndex,
187
+ token.label,
188
+ )
189
+ if (labelEndIndex < 0) {
190
+ return { status: 'failedAndRollback', lines: token.lines }
191
+ }
192
+
193
+ if (!labelState.saturated) {
194
+ token.lines.push(line)
195
+ return { status: 'opening', nextIndex: endIndex }
196
+ }
197
+
198
+ // Saturated but no following colon exists.
199
+ if (
200
+ labelEndIndex + 1 >= endIndex ||
201
+ nodePoints[labelEndIndex].codePoint !== AsciiCodePoint.COLON
202
+ ) {
203
+ return { status: 'failedAndRollback', lines: token.lines }
204
+ }
205
+
206
+ i = labelEndIndex + 1
207
+ }
208
+
209
+ if (token.destination == null) {
210
+ i = eatOptionalWhitespaces(nodePoints, i, endIndex)
211
+ if (i >= endIndex) {
212
+ return { status: 'failedAndRollback', lines: token.lines }
213
+ }
214
+
215
+ // Try to match link destination
216
+ const { nextIndex: destinationEndIndex, state: destinationState } =
217
+ eatAndCollectLinkDestination(nodePoints, i, endIndex, null)
218
+
219
+ /**
220
+ * At most one line break can be used between link destination and link label,
221
+ * therefore, this line must match a complete link destination
222
+ */
223
+ if (destinationEndIndex < 0 || !destinationState.saturated) {
224
+ return { status: 'failedAndRollback', lines: token.lines }
225
+ }
226
+
227
+ /**
228
+ * At most one line break can be used between link title and link destination
229
+ * @see https://github.github.com/gfm/#example-162
230
+ * @see https://github.github.com/gfm/#example-164
231
+ * @see https://github.github.com/gfm/#link-reference-definition
232
+ */
233
+ i = eatOptionalWhitespaces(nodePoints, destinationEndIndex, endIndex)
234
+ if (i >= endIndex) {
235
+ // eslint-disable-next-line no-param-reassign
236
+ token.destination = destinationState
237
+ token.lines.push(line)
238
+ return { status: 'opening', nextIndex: endIndex }
239
+ }
240
+
241
+ // eslint-disable-next-line no-param-reassign
242
+ token.lineNoOfDestination = lineNo
243
+ // eslint-disable-next-line no-param-reassign
244
+ token.lineNoOfTitle = lineNo
245
+ }
246
+
247
+ if (token.lineNoOfTitle < 0) {
248
+ // eslint-disable-next-line no-param-reassign
249
+ token.lineNoOfTitle = lineNo
250
+ }
251
+
252
+ const { nextIndex: titleEndIndex, state: titleState } = eatAndCollectLinkTitle(
253
+ nodePoints,
254
+ i,
255
+ endIndex,
256
+ token.title,
257
+ )
258
+ // eslint-disable-next-line no-param-reassign
259
+ token.title = titleState
260
+
261
+ if (
262
+ titleEndIndex < 0 ||
263
+ titleState.nodePoints.length <= 0 ||
264
+ (titleState.saturated &&
265
+ eatOptionalWhitespaces(nodePoints, titleEndIndex, endIndex) < endIndex)
266
+ ) {
267
+ // check if there exists a valid title
268
+ if (token.lineNoOfDestination === token.lineNoOfTitle) {
269
+ return { status: 'failedAndRollback', lines: token.lines }
270
+ }
271
+
272
+ const lastLine = token.lines[token.lines.length - 1]
273
+ // eslint-disable-next-line no-param-reassign
274
+ token.title = null
275
+ // eslint-disable-next-line no-param-reassign
276
+ token.position.end = calcEndPoint(lastLine.nodePoints, lastLine.endIndex - 1)
277
+ return {
278
+ status: 'closingAndRollback',
279
+ lines: token.lines.slice(token.lineNoOfTitle - 1),
280
+ }
281
+ }
282
+
283
+ token.lines.push(line)
284
+ const saturated: boolean = token.title?.saturated
285
+ return { status: saturated ? 'closing' : 'opening', nextIndex: endIndex }
286
+ }
287
+
288
+ function onClose(token: IToken): IResultOfOnClose {
289
+ let result: IResultOfOnClose
290
+
291
+ // Not all parts of Definition have been matched.
292
+ if (token.title == null || !token.title.saturated) {
293
+ // No valid label matched.
294
+ if (!token.label.saturated) {
295
+ return { status: 'failedAndRollback', lines: token.lines }
296
+ }
297
+
298
+ // No valid destination matched.
299
+ if (token.destination == null || !token.destination.saturated) {
300
+ return { status: 'failedAndRollback', lines: token.lines }
301
+ }
302
+
303
+ // No valid title matched.
304
+ if (token.title != null && !token.title.saturated) {
305
+ if (token.lineNoOfDestination === token.lineNoOfTitle) {
306
+ return { status: 'failedAndRollback', lines: token.lines }
307
+ }
308
+
309
+ const lines = token.lines.splice(token.lineNoOfTitle - 1)
310
+ const lastLine = token.lines[token.lines.length - 1]
311
+ // eslint-disable-next-line no-param-reassign
312
+ token.title = null
313
+ // eslint-disable-next-line no-param-reassign
314
+ token.position.end = calcEndPoint(lastLine.nodePoints, lastLine.endIndex - 1)
315
+
316
+ result = { status: 'closingAndRollback', lines }
317
+ }
318
+ }
319
+
320
+ /**
321
+ * Labels are trimmed and case-insensitive
322
+ * @see https://github.github.com/gfm/#example-174
323
+ * @see https://github.github.com/gfm/#example-175
324
+ */
325
+ const labelPoints: INodePoint[] = token.label.nodePoints
326
+ const label = calcStringFromNodePoints(labelPoints, 1, labelPoints.length - 1)
327
+ const identifier = resolveLabelToIdentifier(label)
328
+
329
+ // Register definition identifier.
330
+ api.registerDefinitionIdentifier(identifier)
331
+
332
+ // Cache label and identifier for performance.
333
+
334
+ // eslint-disable-next-line no-param-reassign
335
+ token._label = label
336
+ // eslint-disable-next-line no-param-reassign
337
+ token._identifier = identifier
338
+ return result
339
+ }
340
+ }
package/src/parse.ts ADDED
@@ -0,0 +1,50 @@
1
+ import { DefinitionType } from '@yozora/ast'
2
+ import type { INodePoint } from '@yozora/character'
3
+ import { AsciiCodePoint, calcEscapedStringFromNodePoints } from '@yozora/character'
4
+ import type { IParseBlockHookCreator } from '@yozora/core-tokenizer'
5
+ import { encodeLinkDestination } from '@yozora/core-tokenizer'
6
+ import type { INode, IThis, IToken, T } from './types'
7
+
8
+ export const parse: IParseBlockHookCreator<T, IToken, INode, IThis> = function (api) {
9
+ return {
10
+ parse: tokens =>
11
+ tokens.map(token => {
12
+ const label: string = token._label!
13
+ const identifier: string = token._identifier!
14
+
15
+ /**
16
+ * Resolve link destination
17
+ * @see https://github.github.com/gfm/#link-destination
18
+ */
19
+ const destinationPoints: INodePoint[] = token.destination!.nodePoints
20
+ const destination: string =
21
+ destinationPoints[0].codePoint === AsciiCodePoint.OPEN_ANGLE
22
+ ? calcEscapedStringFromNodePoints(
23
+ destinationPoints,
24
+ 1,
25
+ destinationPoints.length - 1,
26
+ true,
27
+ )
28
+ : calcEscapedStringFromNodePoints(destinationPoints, 0, destinationPoints.length, true)
29
+ const url = encodeLinkDestination(destination)
30
+
31
+ /**
32
+ * Resolve link title
33
+ * @see https://github.github.com/gfm/#link-title
34
+ */
35
+ const title: string | undefined =
36
+ token.title == null
37
+ ? undefined
38
+ : calcEscapedStringFromNodePoints(
39
+ token.title.nodePoints,
40
+ 1,
41
+ token.title.nodePoints.length - 1,
42
+ )
43
+
44
+ const node: INode = api.shouldReservePosition
45
+ ? { type: DefinitionType, position: token.position, identifier, label, url, title }
46
+ : { type: DefinitionType, identifier, label, url, title }
47
+ return node
48
+ }),
49
+ }
50
+ }
@@ -0,0 +1,31 @@
1
+ import type {
2
+ IBlockTokenizer,
3
+ IMatchBlockHookCreator,
4
+ IParseBlockHookCreator,
5
+ } from '@yozora/core-tokenizer'
6
+ import { BaseBlockTokenizer, TokenizerPriority } from '@yozora/core-tokenizer'
7
+ import { match } from './match'
8
+ import { parse } from './parse'
9
+ import { uniqueName } from './types'
10
+ import type { INode, IThis, IToken, ITokenizerProps, T } from './types'
11
+
12
+ /**
13
+ * Lexical Analyzer for Definition.
14
+ * @see https://github.github.com/gfm/#link-reference-definition
15
+ */
16
+ export class DefinitionTokenizer
17
+ extends BaseBlockTokenizer<T, IToken, INode, IThis>
18
+ implements IBlockTokenizer<T, IToken, INode, IThis>
19
+ {
20
+ /* istanbul ignore next */
21
+ constructor(props: ITokenizerProps = {}) {
22
+ super({
23
+ name: props.name ?? uniqueName,
24
+ priority: props.priority ?? TokenizerPriority.ATOMIC,
25
+ })
26
+ }
27
+
28
+ public override readonly match: IMatchBlockHookCreator<T, IToken, IThis> = match
29
+
30
+ public override readonly parse: IParseBlockHookCreator<T, IToken, INode, IThis> = parse
31
+ }
package/src/types.ts ADDED
@@ -0,0 +1,58 @@
1
+ import type { Definition, DefinitionType } from '@yozora/ast'
2
+ import type {
3
+ IBaseBlockTokenizerProps,
4
+ IPartialYastBlockToken,
5
+ IPhrasingContentLine,
6
+ ITokenizer,
7
+ } from '@yozora/core-tokenizer'
8
+ import type { ILinkDestinationCollectingState } from './util/link-destination'
9
+ import type { ILinkLabelCollectingState } from './util/link-label'
10
+ import type { ILinkTitleCollectingState } from './util/link-title'
11
+
12
+ export type T = DefinitionType
13
+ export type INode = Definition
14
+ export const uniqueName = '@yozora/tokenizer-definition'
15
+
16
+ export interface IToken extends IPartialYastBlockToken<T> {
17
+ /**
18
+ *
19
+ */
20
+ lines: Array<Readonly<IPhrasingContentLine>>
21
+ /**
22
+ * Link label
23
+ * Trimmed, Case-Insensitive
24
+ */
25
+ label: ILinkLabelCollectingState
26
+ /**
27
+ * Link destination
28
+ */
29
+ destination: ILinkDestinationCollectingState | null
30
+ /**
31
+ * Link title
32
+ */
33
+ title: ILinkTitleCollectingState | null
34
+ /**
35
+ * The line number of the first matched character of the link label
36
+ */
37
+ lineNoOfLabel: number
38
+ /**
39
+ * The line number of the first matched character of the link destination
40
+ */
41
+ lineNoOfDestination: number
42
+ /**
43
+ * The line number of the first matched character of the link title
44
+ */
45
+ lineNoOfTitle: number
46
+ /**
47
+ * Resolved definition label.
48
+ */
49
+ _label?: string
50
+ /**
51
+ * Resolved definition identifier.
52
+ */
53
+ _identifier?: string
54
+ }
55
+
56
+ export type IThis = ITokenizer
57
+
58
+ export type ITokenizerProps = Partial<IBaseBlockTokenizerProps>
@@ -0,0 +1,160 @@
1
+ import type { INodePoint } from '@yozora/character'
2
+ import {
3
+ AsciiCodePoint,
4
+ VirtualCodePoint,
5
+ isAsciiControlCharacter,
6
+ isWhitespaceCharacter,
7
+ } from '@yozora/character'
8
+ import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
9
+
10
+ /**
11
+ * The processing token of eatAndCollectLinkDestination, used to save
12
+ * intermediate data to support multiple codePosition fragment processing
13
+ *
14
+ * @see https://github.github.com/gfm/#link-destination
15
+ */
16
+ export interface ILinkDestinationCollectingState {
17
+ /**
18
+ * Whether the current token has collected a legal LinkDestination
19
+ */
20
+ saturated: boolean
21
+ /**
22
+ * Collected token points
23
+ */
24
+ nodePoints: INodePoint[]
25
+ /**
26
+ * Whether an opening angle bracket has been matched
27
+ */
28
+ hasOpenAngleBracket: boolean
29
+ /**
30
+ * Number of parentheses encountered
31
+ */
32
+ openParensCount: number
33
+ }
34
+
35
+ /**
36
+ *
37
+ * @param nodePoints
38
+ * @param startIndex
39
+ * @param endIndex
40
+ * @param state
41
+ * @see https://github.github.com/gfm/#link-destination
42
+ */
43
+ export function eatAndCollectLinkDestination(
44
+ nodePoints: ReadonlyArray<INodePoint>,
45
+ startIndex: number,
46
+ endIndex: number,
47
+ state: ILinkDestinationCollectingState | null,
48
+ ): { nextIndex: number; state: ILinkDestinationCollectingState } {
49
+ let i = startIndex
50
+
51
+ // init token
52
+ if (state == null) {
53
+ // eslint-disable-next-line no-param-reassign
54
+ state = {
55
+ saturated: false,
56
+ nodePoints: [],
57
+ hasOpenAngleBracket: false,
58
+ openParensCount: 0,
59
+ }
60
+ }
61
+
62
+ /**
63
+ * Although link destination may span multiple lines,
64
+ * they may not contain a blank line.
65
+ */
66
+ const firstNonWhitespaceIndex = eatOptionalWhitespaces(nodePoints, i, endIndex)
67
+ if (firstNonWhitespaceIndex >= endIndex) return { nextIndex: -1, state: state }
68
+
69
+ if (state.nodePoints.length <= 0) {
70
+ i = firstNonWhitespaceIndex
71
+
72
+ // check whether in pointy brackets
73
+ const p = nodePoints[i]
74
+ if (p.codePoint === AsciiCodePoint.OPEN_ANGLE) {
75
+ i += 1
76
+ // eslint-disable-next-line no-param-reassign
77
+ state.hasOpenAngleBracket = true
78
+ state.nodePoints.push(p)
79
+ }
80
+ }
81
+
82
+ /**
83
+ * In pointy brackets:
84
+ * - A sequence of zero or more characters between an opening '<' and
85
+ * a closing '>' that contains no line breaks or unescaped '<' or '>' characters
86
+ */
87
+ if (state.hasOpenAngleBracket) {
88
+ for (; i < endIndex; ++i) {
89
+ const p = nodePoints[i]
90
+ switch (p.codePoint) {
91
+ case AsciiCodePoint.BACKSLASH:
92
+ if (i + 1 < endIndex) {
93
+ state.nodePoints.push(p)
94
+ state.nodePoints.push(nodePoints[i + 1])
95
+ }
96
+ i += 1
97
+ break
98
+ case AsciiCodePoint.OPEN_ANGLE:
99
+ case VirtualCodePoint.LINE_END:
100
+ return { nextIndex: -1, state: state }
101
+ case AsciiCodePoint.CLOSE_ANGLE:
102
+ // eslint-disable-next-line no-param-reassign
103
+ state.saturated = true
104
+ state.nodePoints.push(p)
105
+ return { nextIndex: i + 1, state: state }
106
+ default:
107
+ state.nodePoints.push(p)
108
+ }
109
+ }
110
+ return { nextIndex: i, state: state }
111
+ }
112
+
113
+ /**
114
+ * Not in pointy brackets:
115
+ * - A nonempty sequence of characters that does not start with '<', does not include
116
+ * ASCII space or control characters, and includes parentheses only if
117
+ *
118
+ * a) they are backslash-escaped or
119
+ * b) they are part of a balanced pair of unescaped parentheses. (Implementations
120
+ * may impose limits on parentheses nesting to avoid performance issues,
121
+ * but at least three levels of nesting should be supported.)
122
+ */
123
+ for (; i < endIndex; ++i) {
124
+ const p = nodePoints[i]
125
+ switch (p.codePoint) {
126
+ case AsciiCodePoint.BACKSLASH:
127
+ if (i + 1 < endIndex) {
128
+ state.nodePoints.push(p)
129
+ state.nodePoints.push(nodePoints[i + 1])
130
+ }
131
+ i += 1
132
+ break
133
+ case AsciiCodePoint.OPEN_PARENTHESIS:
134
+ // eslint-disable-next-line no-param-reassign
135
+ state.openParensCount += 1
136
+ state.nodePoints.push(p)
137
+ break
138
+ case AsciiCodePoint.CLOSE_PARENTHESIS:
139
+ // eslint-disable-next-line no-param-reassign
140
+ state.openParensCount -= 1
141
+ state.nodePoints.push(p)
142
+ if (state.openParensCount < 0) {
143
+ return { nextIndex: i, state: state }
144
+ }
145
+ break
146
+ default:
147
+ if (isWhitespaceCharacter(p.codePoint) || isAsciiControlCharacter(p.codePoint)) {
148
+ // eslint-disable-next-line no-param-reassign
149
+ state.saturated = true
150
+ return { nextIndex: i, state: state }
151
+ }
152
+ state.nodePoints.push(p)
153
+ break
154
+ }
155
+ }
156
+
157
+ // eslint-disable-next-line no-param-reassign
158
+ state.saturated = true
159
+ return { nextIndex: i, state: state }
160
+ }
@@ -0,0 +1,116 @@
1
+ import type { INodePoint } from '@yozora/character'
2
+ import { AsciiCodePoint, isWhitespaceCharacter } from '@yozora/character'
3
+ import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
4
+
5
+ /**
6
+ * The processing token of eatAndCollectLinkLabel, used to save
7
+ * intermediate data to support multiple codePosition fragment processing
8
+ *
9
+ * @see https://github.github.com/gfm/#link-label
10
+ */
11
+ export interface ILinkLabelCollectingState {
12
+ /**
13
+ * Whether the current token has collected a legal LinkDestination
14
+ */
15
+ saturated: boolean
16
+ /**
17
+ * Collected token points
18
+ */
19
+ nodePoints: INodePoint[]
20
+ /**
21
+ * Does it contain non-blank characters
22
+ */
23
+ hasNonWhitespaceCharacter: boolean
24
+ }
25
+
26
+ /**
27
+ * A link label begins with a left bracket '[' and ends with the first right bracket ']'
28
+ * that is not backslash-escaped. Between these brackets there must be at least one
29
+ * non-whitespace character. Unescaped square bracket characters are not allowed inside
30
+ * the opening and closing square brackets of link labels. A link label can have at most
31
+ * 999 characters inside the square brackets.
32
+ *
33
+ * One label matches another just in case their normalized forms are equal. To normalize
34
+ * a label, strip off the opening and closing brackets, perform the Unicode case fold,
35
+ * strip leading and trailing whitespace and collapse consecutive internal whitespace to
36
+ * a single space. If there are multiple matching reference link definitions, the one that
37
+ * comes first in the document is used. (It is desirable in such cases to emit a warning.)
38
+ *
39
+ * @param nodePoints
40
+ * @param startIndex
41
+ * @param endIndex
42
+ * @param state
43
+ * @see https://github.github.com/gfm/#link-label
44
+ */
45
+ export function eatAndCollectLinkLabel(
46
+ nodePoints: ReadonlyArray<INodePoint>,
47
+ startIndex: number,
48
+ endIndex: number,
49
+ state: ILinkLabelCollectingState | null,
50
+ ): { nextIndex: number; state: ILinkLabelCollectingState } {
51
+ let i = startIndex
52
+
53
+ // init token
54
+ if (state == null) {
55
+ // eslint-disable-next-line no-param-reassign
56
+ state = {
57
+ saturated: false,
58
+ nodePoints: [],
59
+ hasNonWhitespaceCharacter: false,
60
+ }
61
+ }
62
+
63
+ /**
64
+ * Although link label may span multiple lines,
65
+ * they may not contain a blank line.
66
+ */
67
+ const firstNonWhitespaceIndex = eatOptionalWhitespaces(nodePoints, i, endIndex)
68
+ if (firstNonWhitespaceIndex >= endIndex) return { nextIndex: -1, state: state }
69
+
70
+ if (state.nodePoints.length <= 0) {
71
+ i = firstNonWhitespaceIndex
72
+
73
+ // check whether in brackets
74
+ const p = nodePoints[i]
75
+ if (p.codePoint !== AsciiCodePoint.OPEN_BRACKET) {
76
+ return { nextIndex: -1, state: state }
77
+ }
78
+
79
+ i += 1
80
+ // eslint-disable-next-line no-param-reassign
81
+ state.nodePoints.push(p)
82
+ }
83
+
84
+ for (; i < endIndex; ++i) {
85
+ const p = nodePoints[i]
86
+ switch (p.codePoint) {
87
+ case AsciiCodePoint.BACKSLASH:
88
+ // eslint-disable-next-line no-param-reassign
89
+ state.hasNonWhitespaceCharacter = true
90
+ if (i + 1 < endIndex) {
91
+ state.nodePoints.push(p)
92
+ state.nodePoints.push(nodePoints[i + 1])
93
+ }
94
+ i += 1
95
+ break
96
+ case AsciiCodePoint.OPEN_BRACKET:
97
+ return { nextIndex: -1, state: state }
98
+ case AsciiCodePoint.CLOSE_BRACKET:
99
+ state.nodePoints.push(p)
100
+ if (state.hasNonWhitespaceCharacter) {
101
+ // eslint-disable-next-line no-param-reassign
102
+ state.saturated = true
103
+ return { nextIndex: i + 1, state: state }
104
+ }
105
+ return { nextIndex: -1, state: state }
106
+ default:
107
+ if (!isWhitespaceCharacter(p.codePoint)) {
108
+ // eslint-disable-next-line no-param-reassign
109
+ state.hasNonWhitespaceCharacter = true
110
+ }
111
+ state.nodePoints.push(p)
112
+ }
113
+ }
114
+
115
+ return { nextIndex: 1, state: state }
116
+ }
@@ -0,0 +1,143 @@
1
+ import type { INodePoint } from '@yozora/character'
2
+ import { AsciiCodePoint, VirtualCodePoint } from '@yozora/character'
3
+ import { eatOptionalWhitespaces } from '@yozora/core-tokenizer'
4
+
5
+ /**
6
+ * The processing token of eatAndCollectLinkDestination, used to save
7
+ * intermediate data to support multiple codePosition fragment processing.
8
+ *
9
+ * @see https://github.github.com/gfm/#link-title
10
+ */
11
+ export interface ILinkTitleCollectingState {
12
+ /**
13
+ * Whether the current token has collected a legal LinkDestination
14
+ */
15
+ saturated: boolean
16
+ /**
17
+ * Collected token points
18
+ */
19
+ nodePoints: INodePoint[]
20
+ /**
21
+ * Character that wrap link-title
22
+ */
23
+ wrapSymbol: number | null
24
+ }
25
+
26
+ /**
27
+ *
28
+ * @param nodePoints
29
+ * @param startIndex
30
+ * @param endIndex
31
+ * @param state
32
+ * @see https://github.github.com/gfm/#link-title
33
+ */
34
+ export function eatAndCollectLinkTitle(
35
+ nodePoints: ReadonlyArray<INodePoint>,
36
+ startIndex: number,
37
+ endIndex: number,
38
+ state: ILinkTitleCollectingState | null,
39
+ ): { nextIndex: number; state: ILinkTitleCollectingState } {
40
+ let i = startIndex
41
+
42
+ // init token
43
+ if (state == null) {
44
+ // eslint-disable-next-line no-param-reassign
45
+ state = {
46
+ saturated: false,
47
+ nodePoints: [],
48
+ wrapSymbol: null,
49
+ }
50
+ }
51
+
52
+ /**
53
+ * Although link titles may span multiple lines,
54
+ * they may not contain a blank line.
55
+ */
56
+ const firstNonWhitespaceIndex = eatOptionalWhitespaces(nodePoints, i, endIndex)
57
+ if (firstNonWhitespaceIndex >= endIndex) return { nextIndex: -1, state: state }
58
+
59
+ if (state.nodePoints.length <= 0) {
60
+ i = firstNonWhitespaceIndex
61
+ const p = nodePoints[i]
62
+
63
+ switch (p.codePoint) {
64
+ case AsciiCodePoint.DOUBLE_QUOTE:
65
+ case AsciiCodePoint.SINGLE_QUOTE:
66
+ case AsciiCodePoint.OPEN_PARENTHESIS:
67
+ // eslint-disable-next-line no-param-reassign
68
+ state.wrapSymbol = p.codePoint
69
+ state.nodePoints.push(p)
70
+ i += 1
71
+ break
72
+ default:
73
+ return { nextIndex: -1, state: state }
74
+ }
75
+ }
76
+
77
+ if (state.wrapSymbol == null) return { nextIndex: -1, state: state }
78
+
79
+ switch (state.wrapSymbol) {
80
+ /**
81
+ * - a sequence of zero or more characters between straight double-quote characters '"',
82
+ * including a '"' character only if it is backslash-escaped, or
83
+ * - a sequence of zero or more characters between straight single-quote characters '\'',
84
+ * including a '\'' character only if it is backslash-escaped,
85
+ */
86
+ case AsciiCodePoint.DOUBLE_QUOTE:
87
+ case AsciiCodePoint.SINGLE_QUOTE: {
88
+ for (; i < endIndex; ++i) {
89
+ const p = nodePoints[i]
90
+ switch (p.codePoint) {
91
+ case AsciiCodePoint.BACKSLASH:
92
+ if (i + 1 < endIndex) {
93
+ state.nodePoints.push(p)
94
+ state.nodePoints.push(nodePoints[i + 1])
95
+ }
96
+ i += 1
97
+ break
98
+ case state.wrapSymbol:
99
+ // eslint-disable-next-line no-param-reassign
100
+ state.saturated = true
101
+ state.nodePoints.push(p)
102
+ return { nextIndex: i + 1, state: state }
103
+ default:
104
+ state.nodePoints.push(p)
105
+ }
106
+ }
107
+ break
108
+ }
109
+ /**
110
+ * a sequence of zero or more characters between matching parentheses '((...))',
111
+ * including a '(' or ')' character only if it is backslash-escaped.
112
+ */
113
+ case AsciiCodePoint.OPEN_PARENTHESIS: {
114
+ for (; i < endIndex; ++i) {
115
+ const p = nodePoints[i]
116
+ switch (p.codePoint) {
117
+ case AsciiCodePoint.BACKSLASH:
118
+ if (i + 1 < endIndex) {
119
+ state.nodePoints.push(p)
120
+ state.nodePoints.push(nodePoints[i + 1])
121
+ }
122
+ i += 1
123
+ break
124
+ case AsciiCodePoint.OPEN_PARENTHESIS:
125
+ return { nextIndex: -1, state: state }
126
+ case AsciiCodePoint.CLOSE_PARENTHESIS:
127
+ if (i + 1 >= endIndex || nodePoints[i + 1].codePoint === VirtualCodePoint.LINE_END) {
128
+ state.nodePoints.push(p)
129
+ // eslint-disable-next-line no-param-reassign
130
+ state.saturated = true
131
+ break
132
+ }
133
+ return { nextIndex: -1, state: state }
134
+ default:
135
+ state.nodePoints.push(p)
136
+ }
137
+ }
138
+ break
139
+ }
140
+ }
141
+
142
+ return { nextIndex: endIndex, state: state }
143
+ }