@yozora/tokenizer-break 2.0.3 → 2.0.5-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/{index.js → index.cjs} +5 -6
- package/lib/esm/{index.js → index.mjs} +4 -5
- package/lib/types/index.d.ts +5 -5
- package/package.json +18 -14
- package/src/index.ts +9 -0
- package/src/match.ts +104 -0
- package/src/parse.ts +15 -0
- package/src/tokenizer.ts +33 -0
- package/src/types.ts +39 -0
|
@@ -86,18 +86,17 @@ const parse = function (api) {
|
|
|
86
86
|
|
|
87
87
|
class BreakTokenizer extends coreTokenizer.BaseInlineTokenizer {
|
|
88
88
|
constructor(props = {}) {
|
|
89
|
-
var _a, _b;
|
|
90
89
|
super({
|
|
91
|
-
name:
|
|
92
|
-
priority:
|
|
90
|
+
name: props.name ?? uniqueName,
|
|
91
|
+
priority: props.priority ?? coreTokenizer.TokenizerPriority.SOFT_INLINE,
|
|
93
92
|
});
|
|
94
|
-
this.match = match;
|
|
95
|
-
this.parse = parse;
|
|
96
93
|
}
|
|
94
|
+
match = match;
|
|
95
|
+
parse = parse;
|
|
97
96
|
}
|
|
98
97
|
|
|
99
98
|
exports.BreakTokenizer = BreakTokenizer;
|
|
100
99
|
exports.BreakTokenizerName = uniqueName;
|
|
101
100
|
exports.breakMatch = match;
|
|
102
101
|
exports.breakParse = parse;
|
|
103
|
-
exports
|
|
102
|
+
exports.default = BreakTokenizer;
|
|
@@ -82,14 +82,13 @@ const parse = function (api) {
|
|
|
82
82
|
|
|
83
83
|
class BreakTokenizer extends BaseInlineTokenizer {
|
|
84
84
|
constructor(props = {}) {
|
|
85
|
-
var _a, _b;
|
|
86
85
|
super({
|
|
87
|
-
name:
|
|
88
|
-
priority:
|
|
86
|
+
name: props.name ?? uniqueName,
|
|
87
|
+
priority: props.priority ?? TokenizerPriority.SOFT_INLINE,
|
|
89
88
|
});
|
|
90
|
-
this.match = match;
|
|
91
|
-
this.parse = parse;
|
|
92
89
|
}
|
|
90
|
+
match = match;
|
|
91
|
+
parse = parse;
|
|
93
92
|
}
|
|
94
93
|
|
|
95
94
|
export { BreakTokenizer, uniqueName as BreakTokenizerName, match as breakMatch, parse as breakParse, BreakTokenizer as default };
|
package/lib/types/index.d.ts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { IPartialYastInlineToken, ITokenizer, IBaseInlineTokenizerProps, IYastTokenDelimiter, IMatchInlineHookCreator, IParseInlineHookCreator, BaseInlineTokenizer, IInlineTokenizer } from '@yozora/core-tokenizer';
|
|
2
2
|
import { BreakType, Break } from '@yozora/ast';
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
type T = BreakType;
|
|
5
|
+
type INode = Break;
|
|
6
6
|
declare const uniqueName = "@yozora/tokenizer-break";
|
|
7
|
-
|
|
7
|
+
type IToken = IPartialYastInlineToken<T>;
|
|
8
8
|
interface IDelimiter extends IYastTokenDelimiter {
|
|
9
9
|
type: 'full';
|
|
10
10
|
/**
|
|
@@ -12,8 +12,8 @@ interface IDelimiter extends IYastTokenDelimiter {
|
|
|
12
12
|
*/
|
|
13
13
|
markerType: BreakTokenMarkerType;
|
|
14
14
|
}
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
type IThis = ITokenizer;
|
|
16
|
+
type ITokenizerProps = Partial<IBaseInlineTokenizerProps>;
|
|
17
17
|
/**
|
|
18
18
|
* Line break marker type.
|
|
19
19
|
*/
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yozora/tokenizer-break",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.5-alpha.0",
|
|
4
4
|
"author": {
|
|
5
5
|
"name": "guanghechen",
|
|
6
6
|
"url": "https://github.com/guanghechen/"
|
|
@@ -11,33 +11,37 @@
|
|
|
11
11
|
"directory": "tokenizers/break"
|
|
12
12
|
},
|
|
13
13
|
"homepage": "https://github.com/yozorajs/yozora/tree/release-2.x.x/tokenizers/break",
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
|
|
17
|
-
|
|
14
|
+
"type": "module",
|
|
15
|
+
"exports": {
|
|
16
|
+
"types": "./lib/types/index.d.ts",
|
|
17
|
+
"import": "./lib/esm/index.mjs",
|
|
18
|
+
"require": "./lib/cjs/index.cjs"
|
|
19
|
+
},
|
|
20
|
+
"source": "./src/index.ts",
|
|
21
|
+
"types": "./lib/types/index.d.ts",
|
|
22
|
+
"main": "./lib/cjs/index.cjs",
|
|
23
|
+
"module": "./lib/esm/index.mjs",
|
|
18
24
|
"license": "MIT",
|
|
19
25
|
"engines": {
|
|
20
26
|
"node": ">= 16.0.0"
|
|
21
27
|
},
|
|
22
28
|
"files": [
|
|
23
29
|
"lib/",
|
|
24
|
-
"
|
|
25
|
-
"!lib/**/*.d.ts.map",
|
|
30
|
+
"src/",
|
|
26
31
|
"package.json",
|
|
27
32
|
"CHANGELOG.md",
|
|
28
33
|
"LICENSE",
|
|
29
34
|
"README.md"
|
|
30
35
|
],
|
|
31
36
|
"scripts": {
|
|
32
|
-
"build": "cross-env NODE_ENV=production rollup -c ../../rollup.config.
|
|
33
|
-
"prebuild": "rimraf lib/",
|
|
37
|
+
"build": "rimraf lib/ && cross-env NODE_ENV=production rollup -c ../../rollup.config.mjs",
|
|
34
38
|
"prepublishOnly": "cross-env ROLLUP_SHOULD_SOURCEMAP=false yarn build",
|
|
35
|
-
"test": "cross-env TS_NODE_FILES=true jest --config ../../jest.config.
|
|
39
|
+
"test": "cross-env TS_NODE_FILES=true NODE_OPTIONS=--experimental-vm-modules jest --config ../../jest.config.mjs --rootDir ."
|
|
36
40
|
},
|
|
37
41
|
"dependencies": {
|
|
38
|
-
"@yozora/ast": "^2.0.
|
|
39
|
-
"@yozora/character": "^2.0.
|
|
40
|
-
"@yozora/core-tokenizer": "^2.0.
|
|
42
|
+
"@yozora/ast": "^2.0.5-alpha.0",
|
|
43
|
+
"@yozora/character": "^2.0.5-alpha.0",
|
|
44
|
+
"@yozora/core-tokenizer": "^2.0.5-alpha.0"
|
|
41
45
|
},
|
|
42
|
-
"gitHead": "
|
|
46
|
+
"gitHead": "8bf941fe4ef82947165b0f3cc123cd493665e13b"
|
|
43
47
|
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export { match as breakMatch } from './match'
|
|
2
|
+
export { parse as breakParse } from './parse'
|
|
3
|
+
export { BreakTokenizer, BreakTokenizer as default } from './tokenizer'
|
|
4
|
+
export { uniqueName as BreakTokenizerName } from './types'
|
|
5
|
+
export type {
|
|
6
|
+
IThis as IBreakHookContext,
|
|
7
|
+
IToken as IBreakToken,
|
|
8
|
+
ITokenizerProps as IBreakTokenizerProps,
|
|
9
|
+
} from './types'
|
package/src/match.ts
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import { BreakType } from '@yozora/ast'
|
|
2
|
+
import type { INodePoint } from '@yozora/character'
|
|
3
|
+
import { AsciiCodePoint, VirtualCodePoint } from '@yozora/character'
|
|
4
|
+
import type {
|
|
5
|
+
IMatchInlineHookCreator,
|
|
6
|
+
IResultOfProcessSingleDelimiter,
|
|
7
|
+
} from '@yozora/core-tokenizer'
|
|
8
|
+
import { genFindDelimiter } from '@yozora/core-tokenizer'
|
|
9
|
+
import { BreakTokenMarkerType } from './types'
|
|
10
|
+
import type { IDelimiter, IThis, IToken, T } from './types'
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* A line break (not in a code span or HTML tag) that is preceded by two or more
|
|
14
|
+
* spaces and does not occur at the end of a block is parsed as a hard line
|
|
15
|
+
* break (rendered in HTML as a <br /> tag)
|
|
16
|
+
* @see https://github.github.com/gfm/#hard-line-breaks
|
|
17
|
+
*
|
|
18
|
+
* A regular line break (not in a code span or HTML tag) that is not preceded
|
|
19
|
+
* by two or more spaces or a backslash is parsed as a softbreak. (A softbreak
|
|
20
|
+
* may be rendered in HTML either as a line ending or as a space. The result
|
|
21
|
+
* will be the same in browsers.
|
|
22
|
+
* @see https://github.github.com/gfm/#soft-line-breaks
|
|
23
|
+
*
|
|
24
|
+
* @see https://github.com/syntax-tree/mdast#break
|
|
25
|
+
*/
|
|
26
|
+
export const match: IMatchInlineHookCreator<T, IDelimiter, IToken, IThis> = function (api) {
|
|
27
|
+
return {
|
|
28
|
+
findDelimiter: () => genFindDelimiter<IDelimiter>(_findDelimiter),
|
|
29
|
+
processSingleDelimiter,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function _findDelimiter(startIndex: number, endIndex: number): IDelimiter | null {
|
|
33
|
+
const nodePoints: ReadonlyArray<INodePoint> = api.getNodePoints()
|
|
34
|
+
for (let i = startIndex + 1; i < endIndex; ++i) {
|
|
35
|
+
if (nodePoints[i].codePoint !== VirtualCodePoint.LINE_END) continue
|
|
36
|
+
|
|
37
|
+
const c = nodePoints[i - 1].codePoint
|
|
38
|
+
let _start: number | null = null
|
|
39
|
+
let markerType: BreakTokenMarkerType | null = null
|
|
40
|
+
switch (c) {
|
|
41
|
+
/**
|
|
42
|
+
* For a more visible alternative, a backslash
|
|
43
|
+
* before the line ending may be used instead of two spaces
|
|
44
|
+
* @see https://github.github.com/gfm/#example-655
|
|
45
|
+
*/
|
|
46
|
+
case AsciiCodePoint.BACKSLASH: {
|
|
47
|
+
let x = i - 2
|
|
48
|
+
for (; x >= startIndex; x -= 1) {
|
|
49
|
+
if (nodePoints[x].codePoint !== AsciiCodePoint.BACKSLASH) break
|
|
50
|
+
}
|
|
51
|
+
if (((i - x) & 1) === 0) {
|
|
52
|
+
_start = i - 1
|
|
53
|
+
markerType = BreakTokenMarkerType.BACKSLASH
|
|
54
|
+
}
|
|
55
|
+
break
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* - A line break (not in a code span or HTML tag) that is preceded
|
|
59
|
+
* by two or more spaces and does not occur at the end of a block
|
|
60
|
+
* is parsed as a hard line break (rendered in HTML as a <br /> tag)
|
|
61
|
+
* - More than two spaces can be used
|
|
62
|
+
* - Leading spaces at the beginning of the next line are ignored
|
|
63
|
+
*
|
|
64
|
+
* @see https://github.github.com/gfm/#example-654
|
|
65
|
+
* @see https://github.github.com/gfm/#example-656
|
|
66
|
+
* @see https://github.github.com/gfm/#example-657
|
|
67
|
+
*/
|
|
68
|
+
case AsciiCodePoint.SPACE: {
|
|
69
|
+
let x = i - 2
|
|
70
|
+
for (; x >= startIndex; x -= 1) {
|
|
71
|
+
if (nodePoints[x].codePoint !== AsciiCodePoint.SPACE) break
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (i - x > 2) {
|
|
75
|
+
_start = x + 1
|
|
76
|
+
markerType = BreakTokenMarkerType.MORE_THAN_TWO_SPACES
|
|
77
|
+
}
|
|
78
|
+
break
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (_start == null || markerType == null) continue
|
|
83
|
+
|
|
84
|
+
return {
|
|
85
|
+
type: 'full',
|
|
86
|
+
markerType,
|
|
87
|
+
startIndex: _start,
|
|
88
|
+
endIndex: i,
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return null
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function processSingleDelimiter(
|
|
95
|
+
delimiter: IDelimiter,
|
|
96
|
+
): IResultOfProcessSingleDelimiter<T, IToken> {
|
|
97
|
+
const token: IToken = {
|
|
98
|
+
nodeType: BreakType,
|
|
99
|
+
startIndex: delimiter.startIndex,
|
|
100
|
+
endIndex: delimiter.endIndex,
|
|
101
|
+
}
|
|
102
|
+
return [token]
|
|
103
|
+
}
|
|
104
|
+
}
|
package/src/parse.ts
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { BreakType } from '@yozora/ast'
|
|
2
|
+
import type { IParseInlineHookCreator } from '@yozora/core-tokenizer'
|
|
3
|
+
import type { INode, IThis, IToken, T } from './types'
|
|
4
|
+
|
|
5
|
+
export const parse: IParseInlineHookCreator<T, IToken, INode, IThis> = function (api) {
|
|
6
|
+
return {
|
|
7
|
+
parse: tokens =>
|
|
8
|
+
tokens.map(token => {
|
|
9
|
+
const node: INode = api.shouldReservePosition
|
|
10
|
+
? { type: BreakType, position: api.calcPosition(token) }
|
|
11
|
+
: { type: BreakType }
|
|
12
|
+
return node
|
|
13
|
+
}),
|
|
14
|
+
}
|
|
15
|
+
}
|
package/src/tokenizer.ts
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
IInlineTokenizer,
|
|
3
|
+
IMatchInlineHookCreator,
|
|
4
|
+
IParseInlineHookCreator,
|
|
5
|
+
} from '@yozora/core-tokenizer'
|
|
6
|
+
import { BaseInlineTokenizer, TokenizerPriority } from '@yozora/core-tokenizer'
|
|
7
|
+
import { match } from './match'
|
|
8
|
+
import { parse } from './parse'
|
|
9
|
+
import { uniqueName } from './types'
|
|
10
|
+
import type { IDelimiter, INode, IThis, IToken, ITokenizerProps, T } from './types'
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Lexical Analyzer for a line break.
|
|
14
|
+
* @see https://github.github.com/gfm/#hard-line-breaks
|
|
15
|
+
* @see https://github.github.com/gfm/#soft-line-breaks
|
|
16
|
+
* @see https://github.com/syntax-tree/mdast#break
|
|
17
|
+
*/
|
|
18
|
+
export class BreakTokenizer
|
|
19
|
+
extends BaseInlineTokenizer<T, IDelimiter, IToken, INode, IThis>
|
|
20
|
+
implements IInlineTokenizer<T, IDelimiter, IToken, INode, IThis>
|
|
21
|
+
{
|
|
22
|
+
/* istanbul ignore next */
|
|
23
|
+
constructor(props: ITokenizerProps = {}) {
|
|
24
|
+
super({
|
|
25
|
+
name: props.name ?? uniqueName,
|
|
26
|
+
priority: props.priority ?? TokenizerPriority.SOFT_INLINE,
|
|
27
|
+
})
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
public override readonly match: IMatchInlineHookCreator<T, IDelimiter, IToken, IThis> = match
|
|
31
|
+
|
|
32
|
+
public override readonly parse: IParseInlineHookCreator<T, IToken, INode, IThis> = parse
|
|
33
|
+
}
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { Break, BreakType } from '@yozora/ast'
|
|
2
|
+
import type {
|
|
3
|
+
IBaseInlineTokenizerProps,
|
|
4
|
+
IPartialYastInlineToken,
|
|
5
|
+
ITokenizer,
|
|
6
|
+
IYastTokenDelimiter,
|
|
7
|
+
} from '@yozora/core-tokenizer'
|
|
8
|
+
|
|
9
|
+
export type T = BreakType
|
|
10
|
+
export type INode = Break
|
|
11
|
+
export const uniqueName = '@yozora/tokenizer-break'
|
|
12
|
+
|
|
13
|
+
export type IToken = IPartialYastInlineToken<T>
|
|
14
|
+
|
|
15
|
+
export interface IDelimiter extends IYastTokenDelimiter {
|
|
16
|
+
type: 'full'
|
|
17
|
+
/**
|
|
18
|
+
* Line break marker type.
|
|
19
|
+
*/
|
|
20
|
+
markerType: BreakTokenMarkerType
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export type IThis = ITokenizer
|
|
24
|
+
|
|
25
|
+
export type ITokenizerProps = Partial<IBaseInlineTokenizerProps>
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Line break marker type.
|
|
29
|
+
*/
|
|
30
|
+
export enum BreakTokenMarkerType {
|
|
31
|
+
/**
|
|
32
|
+
* Backslash at the end of the line
|
|
33
|
+
*/
|
|
34
|
+
BACKSLASH = 'backslash',
|
|
35
|
+
/**
|
|
36
|
+
* More than two spaces at the end of the line
|
|
37
|
+
*/
|
|
38
|
+
MORE_THAN_TWO_SPACES = 'more-than-two-spaces',
|
|
39
|
+
}
|