@yozora/tokenizer-autolink-extension 2.0.4 → 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/{index.js → index.cjs} +5 -6
- package/lib/esm/{index.js → index.mjs} +4 -5
- package/lib/types/index.d.ts +5 -5
- package/package.json +19 -15
- package/src/index.ts +11 -0
- package/src/match.ts +114 -0
- package/src/parse.ts +35 -0
- package/src/tokenizer.ts +36 -0
- package/src/types.ts +47 -0
- package/src/util/email.ts +78 -0
- package/src/util/uri.ts +235 -0
|
@@ -281,21 +281,20 @@ const uniqueName = '@yozora/tokenizer-autolink-extension';
|
|
|
281
281
|
|
|
282
282
|
class AutolinkExtensionTokenizer extends coreTokenizer.BaseInlineTokenizer {
|
|
283
283
|
constructor(props = {}) {
|
|
284
|
-
var _a, _b;
|
|
285
284
|
super({
|
|
286
|
-
name:
|
|
287
|
-
priority:
|
|
285
|
+
name: props.name ?? uniqueName,
|
|
286
|
+
priority: props.priority ?? coreTokenizer.TokenizerPriority.LINKS,
|
|
288
287
|
});
|
|
289
|
-
this.match = match;
|
|
290
|
-
this.parse = parse;
|
|
291
288
|
}
|
|
289
|
+
match = match;
|
|
290
|
+
parse = parse;
|
|
292
291
|
}
|
|
293
292
|
|
|
294
293
|
exports.AutolinkExtensionTokenizer = AutolinkExtensionTokenizer;
|
|
295
294
|
exports.AutolinkExtensionTokenizerName = uniqueName;
|
|
296
295
|
exports.autolinkExtensionMatch = match;
|
|
297
296
|
exports.autolinkExtensionParse = parse;
|
|
298
|
-
exports
|
|
297
|
+
exports.default = AutolinkExtensionTokenizer;
|
|
299
298
|
exports.eatDomainSegment = eatDomainSegment;
|
|
300
299
|
exports.eatExtendEmailAddress = eatExtendEmailAddress;
|
|
301
300
|
exports.eatExtendedUrl = eatExtendedUrl;
|
|
@@ -277,14 +277,13 @@ const uniqueName = '@yozora/tokenizer-autolink-extension';
|
|
|
277
277
|
|
|
278
278
|
class AutolinkExtensionTokenizer extends BaseInlineTokenizer {
|
|
279
279
|
constructor(props = {}) {
|
|
280
|
-
var _a, _b;
|
|
281
280
|
super({
|
|
282
|
-
name:
|
|
283
|
-
priority:
|
|
281
|
+
name: props.name ?? uniqueName,
|
|
282
|
+
priority: props.priority ?? TokenizerPriority.LINKS,
|
|
284
283
|
});
|
|
285
|
-
this.match = match;
|
|
286
|
-
this.parse = parse;
|
|
287
284
|
}
|
|
285
|
+
match = match;
|
|
286
|
+
parse = parse;
|
|
288
287
|
}
|
|
289
288
|
|
|
290
289
|
export { AutolinkExtensionTokenizer, uniqueName as AutolinkExtensionTokenizerName, match as autolinkExtensionMatch, parse as autolinkExtensionParse, AutolinkExtensionTokenizer as default, eatDomainSegment, eatExtendEmailAddress, eatExtendedUrl, eatOptionalDomainFollows, eatValidDomain, eatWWWDomain };
|
package/lib/types/index.d.ts
CHANGED
|
@@ -58,10 +58,10 @@ declare function eatDomainSegment(nodePoints: ReadonlyArray<INodePoint>, startIn
|
|
|
58
58
|
hasUnderscore: boolean;
|
|
59
59
|
};
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
|
|
61
|
+
type T = LinkType;
|
|
62
|
+
type INode = Link;
|
|
63
63
|
declare const uniqueName = "@yozora/tokenizer-autolink-extension";
|
|
64
|
-
|
|
64
|
+
type AutolinkExtensionContentType = AutolinkContentType | 'uri-www';
|
|
65
65
|
interface IToken extends IPartialYastInlineToken<T> {
|
|
66
66
|
/**
|
|
67
67
|
* Autolink content type: absolute uri or email.
|
|
@@ -75,8 +75,8 @@ interface IDelimiter extends IYastTokenDelimiter {
|
|
|
75
75
|
*/
|
|
76
76
|
contentType: AutolinkExtensionContentType;
|
|
77
77
|
}
|
|
78
|
-
|
|
79
|
-
|
|
78
|
+
type IThis = ITokenizer;
|
|
79
|
+
type ITokenizerProps = Partial<IBaseInlineTokenizerProps>;
|
|
80
80
|
|
|
81
81
|
/**
|
|
82
82
|
* @see https://github.github.com/gfm/#autolinks-extension-
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yozora/tokenizer-autolink-extension",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.5",
|
|
4
4
|
"author": {
|
|
5
5
|
"name": "guanghechen",
|
|
6
6
|
"url": "https://github.com/guanghechen/"
|
|
@@ -11,34 +11,38 @@
|
|
|
11
11
|
"directory": "tokenizers/autolink-extension"
|
|
12
12
|
},
|
|
13
13
|
"homepage": "https://github.com/yozorajs/yozora/tree/release-2.x.x/tokenizers/autolink-extension",
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
|
|
17
|
-
|
|
14
|
+
"type": "module",
|
|
15
|
+
"exports": {
|
|
16
|
+
"types": "./lib/types/index.d.ts",
|
|
17
|
+
"import": "./lib/esm/index.mjs",
|
|
18
|
+
"require": "./lib/cjs/index.cjs"
|
|
19
|
+
},
|
|
20
|
+
"source": "./src/index.ts",
|
|
21
|
+
"types": "./lib/types/index.d.ts",
|
|
22
|
+
"main": "./lib/cjs/index.cjs",
|
|
23
|
+
"module": "./lib/esm/index.mjs",
|
|
18
24
|
"license": "MIT",
|
|
19
25
|
"engines": {
|
|
20
26
|
"node": ">= 16.0.0"
|
|
21
27
|
},
|
|
22
28
|
"files": [
|
|
23
29
|
"lib/",
|
|
24
|
-
"
|
|
25
|
-
"!lib/**/*.d.ts.map",
|
|
30
|
+
"src/",
|
|
26
31
|
"package.json",
|
|
27
32
|
"CHANGELOG.md",
|
|
28
33
|
"LICENSE",
|
|
29
34
|
"README.md"
|
|
30
35
|
],
|
|
31
36
|
"scripts": {
|
|
32
|
-
"build": "cross-env NODE_ENV=production rollup -c ../../rollup.config.
|
|
33
|
-
"prebuild": "rimraf lib/",
|
|
37
|
+
"build": "rimraf lib/ && cross-env NODE_ENV=production rollup -c ../../rollup.config.mjs",
|
|
34
38
|
"prepublishOnly": "cross-env ROLLUP_SHOULD_SOURCEMAP=false yarn build",
|
|
35
|
-
"test": "cross-env TS_NODE_FILES=true jest --config ../../jest.config.
|
|
39
|
+
"test": "cross-env TS_NODE_FILES=true NODE_OPTIONS=--experimental-vm-modules jest --config ../../jest.config.mjs --rootDir ."
|
|
36
40
|
},
|
|
37
41
|
"dependencies": {
|
|
38
|
-
"@yozora/ast": "^2.0.
|
|
39
|
-
"@yozora/character": "^2.0.
|
|
40
|
-
"@yozora/core-tokenizer": "^2.0.
|
|
41
|
-
"@yozora/tokenizer-autolink": "^2.0.
|
|
42
|
+
"@yozora/ast": "^2.0.5",
|
|
43
|
+
"@yozora/character": "^2.0.5",
|
|
44
|
+
"@yozora/core-tokenizer": "^2.0.5",
|
|
45
|
+
"@yozora/tokenizer-autolink": "^2.0.5"
|
|
42
46
|
},
|
|
43
|
-
"gitHead": "
|
|
47
|
+
"gitHead": "7ba3bab49fe65cf2f57082c0503af73da9356cf0"
|
|
44
48
|
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export * from './util/email'
|
|
2
|
+
export * from './util/uri'
|
|
3
|
+
export { match as autolinkExtensionMatch } from './match'
|
|
4
|
+
export { parse as autolinkExtensionParse } from './parse'
|
|
5
|
+
export { AutolinkExtensionTokenizer, AutolinkExtensionTokenizer as default } from './tokenizer'
|
|
6
|
+
export { uniqueName as AutolinkExtensionTokenizerName } from './types'
|
|
7
|
+
export type {
|
|
8
|
+
IThis as IAutolinkExtensionHookContext,
|
|
9
|
+
IToken as IAutolinkExtensionToken,
|
|
10
|
+
ITokenizerProps as IAutolinkExtensionTokenizerProps,
|
|
11
|
+
} from './types'
|
package/src/match.ts
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import { LinkType } from '@yozora/ast'
|
|
2
|
+
import type { INodePoint } from '@yozora/character'
|
|
3
|
+
import { AsciiCodePoint, isWhitespaceCharacter } from '@yozora/character'
|
|
4
|
+
import type {
|
|
5
|
+
IMatchInlineHookCreator,
|
|
6
|
+
IResultOfProcessSingleDelimiter,
|
|
7
|
+
} from '@yozora/core-tokenizer'
|
|
8
|
+
import { genFindDelimiter } from '@yozora/core-tokenizer'
|
|
9
|
+
import type {
|
|
10
|
+
AutolinkExtensionContentType,
|
|
11
|
+
ContentHelper,
|
|
12
|
+
IDelimiter,
|
|
13
|
+
IThis,
|
|
14
|
+
IToken,
|
|
15
|
+
T,
|
|
16
|
+
} from './types'
|
|
17
|
+
import { eatExtendEmailAddress } from './util/email'
|
|
18
|
+
import { eatExtendedUrl, eatWWWDomain } from './util/uri'
|
|
19
|
+
|
|
20
|
+
const helpers: ReadonlyArray<ContentHelper> = [
|
|
21
|
+
{ contentType: 'uri', eat: eatExtendedUrl },
|
|
22
|
+
{ contentType: 'uri-www', eat: eatWWWDomain },
|
|
23
|
+
{ contentType: 'email', eat: eatExtendEmailAddress },
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* @see https://github.github.com/gfm/#autolinks-extension-
|
|
28
|
+
*/
|
|
29
|
+
export const match: IMatchInlineHookCreator<T, IDelimiter, IToken, IThis> = function (api) {
|
|
30
|
+
return {
|
|
31
|
+
findDelimiter: () => genFindDelimiter<IDelimiter>(_findDelimiter),
|
|
32
|
+
processSingleDelimiter,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function _findDelimiter(startIndex: number, endIndex: number): IDelimiter | null {
|
|
36
|
+
const nodePoints: ReadonlyArray<INodePoint> = api.getNodePoints()
|
|
37
|
+
const blockStartIndex: number = api.getBlockStartIndex()
|
|
38
|
+
for (let i = startIndex; i < endIndex; ++i) {
|
|
39
|
+
/**
|
|
40
|
+
* Autolinks can also be constructed without requiring the use of '<' and
|
|
41
|
+
* to '>' to delimit them, although they will be recognized under a
|
|
42
|
+
* smaller set of circumstances. All such recognized autolinks can only
|
|
43
|
+
* come at the beginning of a line, after whitespace, or any of the
|
|
44
|
+
* delimiting characters '*', '_', '~', and '('.
|
|
45
|
+
* @see https://github.github.com/gfm/#autolinks-extension-
|
|
46
|
+
*/
|
|
47
|
+
{
|
|
48
|
+
let j = i
|
|
49
|
+
let flag = false
|
|
50
|
+
for (; j < endIndex; ++j) {
|
|
51
|
+
const c = nodePoints[j].codePoint
|
|
52
|
+
if (
|
|
53
|
+
isWhitespaceCharacter(c) ||
|
|
54
|
+
c === AsciiCodePoint.ASTERISK ||
|
|
55
|
+
c === AsciiCodePoint.UNDERSCORE ||
|
|
56
|
+
c === AsciiCodePoint.TILDE ||
|
|
57
|
+
c === AsciiCodePoint.OPEN_PARENTHESIS
|
|
58
|
+
) {
|
|
59
|
+
flag = true
|
|
60
|
+
continue
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (flag || j === blockStartIndex) break
|
|
64
|
+
flag = false
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (j >= endIndex) break
|
|
68
|
+
i = j
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
let nextIndex: number = endIndex
|
|
72
|
+
let contentType: AutolinkExtensionContentType | null = null
|
|
73
|
+
for (const helper of helpers) {
|
|
74
|
+
const eatResult = helper.eat(nodePoints, i, endIndex)
|
|
75
|
+
nextIndex = Math.min(nextIndex, eatResult.nextIndex)
|
|
76
|
+
if (eatResult.valid) {
|
|
77
|
+
contentType = helper.contentType
|
|
78
|
+
nextIndex = eatResult.nextIndex
|
|
79
|
+
break
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Optimization: move forward to the next latest potential position.
|
|
84
|
+
if (contentType == null) {
|
|
85
|
+
i = Math.max(i, nextIndex - 1)
|
|
86
|
+
continue
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (nextIndex <= endIndex) {
|
|
90
|
+
return {
|
|
91
|
+
type: 'full',
|
|
92
|
+
startIndex: i,
|
|
93
|
+
endIndex: nextIndex,
|
|
94
|
+
contentType,
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
i = nextIndex - 1
|
|
98
|
+
}
|
|
99
|
+
return null
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function processSingleDelimiter(
|
|
103
|
+
delimiter: IDelimiter,
|
|
104
|
+
): IResultOfProcessSingleDelimiter<T, IToken> {
|
|
105
|
+
const token: IToken = {
|
|
106
|
+
nodeType: LinkType,
|
|
107
|
+
startIndex: delimiter.startIndex,
|
|
108
|
+
endIndex: delimiter.endIndex,
|
|
109
|
+
contentType: delimiter.contentType,
|
|
110
|
+
children: api.resolveFallbackTokens([], delimiter.startIndex, delimiter.endIndex),
|
|
111
|
+
}
|
|
112
|
+
return [token]
|
|
113
|
+
}
|
|
114
|
+
}
|
package/src/parse.ts
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import type { Node } from '@yozora/ast'
|
|
2
|
+
import { LinkType } from '@yozora/ast'
|
|
3
|
+
import type { INodePoint } from '@yozora/character'
|
|
4
|
+
import { calcStringFromNodePoints } from '@yozora/character'
|
|
5
|
+
import type { IParseInlineHookCreator } from '@yozora/core-tokenizer'
|
|
6
|
+
import type { INode, IThis, IToken, T } from './types'
|
|
7
|
+
|
|
8
|
+
export const parse: IParseInlineHookCreator<T, IToken, INode, IThis> = function (api) {
|
|
9
|
+
return {
|
|
10
|
+
parse: tokens =>
|
|
11
|
+
tokens.map(token => {
|
|
12
|
+
const nodePoints: ReadonlyArray<INodePoint> = api.getNodePoints()
|
|
13
|
+
|
|
14
|
+
// Backslash-escapes do not work inside autolink.
|
|
15
|
+
let url = calcStringFromNodePoints(nodePoints, token.startIndex, token.endIndex)
|
|
16
|
+
|
|
17
|
+
switch (token.contentType) {
|
|
18
|
+
// Add 'mailto:' prefix to email address type autolink.
|
|
19
|
+
case 'email':
|
|
20
|
+
url = 'mailto:' + url
|
|
21
|
+
break
|
|
22
|
+
// Add 'http://' prefix to email address type autolink.
|
|
23
|
+
case 'uri-www':
|
|
24
|
+
url = 'http://' + url
|
|
25
|
+
break
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const children: Node[] = api.parseInlineTokens(token.children)
|
|
29
|
+
const node: INode = api.shouldReservePosition
|
|
30
|
+
? { type: LinkType, position: api.calcPosition(token), url, children }
|
|
31
|
+
: { type: LinkType, url, children }
|
|
32
|
+
return node
|
|
33
|
+
}),
|
|
34
|
+
}
|
|
35
|
+
}
|
package/src/tokenizer.ts
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
IInlineTokenizer,
|
|
3
|
+
IMatchInlineHookCreator,
|
|
4
|
+
IParseInlineHookCreator,
|
|
5
|
+
} from '@yozora/core-tokenizer'
|
|
6
|
+
import { BaseInlineTokenizer, TokenizerPriority } from '@yozora/core-tokenizer'
|
|
7
|
+
import { match } from './match'
|
|
8
|
+
import { parse } from './parse'
|
|
9
|
+
import type { IDelimiter, INode, IThis, IToken, ITokenizerProps, T } from './types'
|
|
10
|
+
import { uniqueName } from './types'
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Lexical Analyzer for Autolink (extension).
|
|
14
|
+
*
|
|
15
|
+
* @see https://github.github.com/gfm/#autolinks-extension-
|
|
16
|
+
*/
|
|
17
|
+
export class AutolinkExtensionTokenizer
|
|
18
|
+
extends BaseInlineTokenizer<T, IDelimiter, IToken, INode, IThis>
|
|
19
|
+
implements IInlineTokenizer<T, IDelimiter, IToken, INode, IThis>
|
|
20
|
+
{
|
|
21
|
+
/* istanbul ignore next */
|
|
22
|
+
constructor(props: ITokenizerProps = {}) {
|
|
23
|
+
super({
|
|
24
|
+
name: props.name ?? uniqueName,
|
|
25
|
+
/**
|
|
26
|
+
* Autolink has the same priority as links.
|
|
27
|
+
* @see https://github.github.com/gfm/#example-509
|
|
28
|
+
*/
|
|
29
|
+
priority: props.priority ?? TokenizerPriority.LINKS,
|
|
30
|
+
})
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
public override readonly match: IMatchInlineHookCreator<T, IDelimiter, IToken, IThis> = match
|
|
34
|
+
|
|
35
|
+
public override readonly parse: IParseInlineHookCreator<T, IToken, INode, IThis> = parse
|
|
36
|
+
}
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import type { Link, LinkType } from '@yozora/ast'
|
|
2
|
+
import type { INodePoint } from '@yozora/character'
|
|
3
|
+
import type {
|
|
4
|
+
IBaseInlineTokenizerProps,
|
|
5
|
+
IPartialYastInlineToken,
|
|
6
|
+
IResultOfRequiredEater,
|
|
7
|
+
ITokenizer,
|
|
8
|
+
IYastTokenDelimiter,
|
|
9
|
+
} from '@yozora/core-tokenizer'
|
|
10
|
+
import type { AutolinkContentType } from '@yozora/tokenizer-autolink'
|
|
11
|
+
|
|
12
|
+
export type T = LinkType
|
|
13
|
+
export type INode = Link
|
|
14
|
+
export const uniqueName = '@yozora/tokenizer-autolink-extension'
|
|
15
|
+
|
|
16
|
+
// Content type of autolink
|
|
17
|
+
export type AutolinkExtensionContentType = AutolinkContentType | 'uri-www'
|
|
18
|
+
|
|
19
|
+
export interface IToken extends IPartialYastInlineToken<T> {
|
|
20
|
+
/**
|
|
21
|
+
* Autolink content type: absolute uri or email.
|
|
22
|
+
*/
|
|
23
|
+
contentType: AutolinkExtensionContentType
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface IDelimiter extends IYastTokenDelimiter {
|
|
27
|
+
type: 'full'
|
|
28
|
+
/**
|
|
29
|
+
* Autolink and autolink-extension content types.
|
|
30
|
+
*/
|
|
31
|
+
contentType: AutolinkExtensionContentType
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export type IThis = ITokenizer
|
|
35
|
+
|
|
36
|
+
export type ITokenizerProps = Partial<IBaseInlineTokenizerProps>
|
|
37
|
+
|
|
38
|
+
export type ContentEater = (
|
|
39
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
40
|
+
startIndex: number,
|
|
41
|
+
endIndex: number,
|
|
42
|
+
) => IResultOfRequiredEater
|
|
43
|
+
|
|
44
|
+
export interface ContentHelper {
|
|
45
|
+
contentType: AutolinkExtensionContentType
|
|
46
|
+
eat: ContentEater
|
|
47
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import type { INodePoint } from '@yozora/character'
|
|
2
|
+
import { AsciiCodePoint, isAlphanumeric } from '@yozora/character'
|
|
3
|
+
import type { IResultOfRequiredEater } from '@yozora/core-tokenizer'
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* An extended email autolink will be recognised when an email address is
|
|
7
|
+
* recognised within any text node. Email addresses are recognised according to
|
|
8
|
+
* the following rules:
|
|
9
|
+
*
|
|
10
|
+
* - One ore more characters which are alphanumeric, or '.', '-', '_', or '+'.
|
|
11
|
+
* - An '@' symbol.
|
|
12
|
+
* - One or more characters which are alphanumeric, or '-' or '_', separated
|
|
13
|
+
* by periods (.). There must be at least one period. The last character must
|
|
14
|
+
* not be one of '-' or '_'.
|
|
15
|
+
*
|
|
16
|
+
* @see https://github.github.com/gfm/#extended-email-autolink
|
|
17
|
+
*/
|
|
18
|
+
export function eatExtendEmailAddress(
|
|
19
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
20
|
+
startIndex: number,
|
|
21
|
+
endIndex: number,
|
|
22
|
+
): IResultOfRequiredEater {
|
|
23
|
+
let i = startIndex
|
|
24
|
+
if (i >= endIndex || !isAlphanumeric(nodePoints[i].codePoint)) {
|
|
25
|
+
return { valid: false, nextIndex: i + 1 }
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
for (i += 1; i < endIndex; i += 1) {
|
|
29
|
+
const c = nodePoints[i].codePoint
|
|
30
|
+
if (
|
|
31
|
+
isAlphanumeric(c) ||
|
|
32
|
+
c === AsciiCodePoint.DOT ||
|
|
33
|
+
c === AsciiCodePoint.MINUS_SIGN ||
|
|
34
|
+
c === AsciiCodePoint.UNDERSCORE ||
|
|
35
|
+
c === AsciiCodePoint.PLUS_SIGN
|
|
36
|
+
)
|
|
37
|
+
continue
|
|
38
|
+
break
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Match an '@' symbol.
|
|
42
|
+
if (
|
|
43
|
+
i === startIndex ||
|
|
44
|
+
i + 2 >= endIndex ||
|
|
45
|
+
nodePoints[i].codePoint !== AsciiCodePoint.AT_SIGN ||
|
|
46
|
+
!isAlphanumeric(nodePoints[i + 1].codePoint)
|
|
47
|
+
)
|
|
48
|
+
return { valid: false, nextIndex: i + 1 }
|
|
49
|
+
|
|
50
|
+
let countOfPeriod = 0
|
|
51
|
+
for (i += 2; i < endIndex; i += 1) {
|
|
52
|
+
const c = nodePoints[i].codePoint
|
|
53
|
+
if (c === AsciiCodePoint.DOT) {
|
|
54
|
+
countOfPeriod += 1
|
|
55
|
+
continue
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (isAlphanumeric(c) || c === AsciiCodePoint.MINUS_SIGN || c === AsciiCodePoint.UNDERSCORE)
|
|
59
|
+
continue
|
|
60
|
+
break
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// '.', '-', and '_' can occur on both sides of the '@', but only '.' may
|
|
64
|
+
// occur at the end of the email address, in which case it will not be
|
|
65
|
+
// considered part of the address.
|
|
66
|
+
const lastCharacter = nodePoints[i - 1].codePoint
|
|
67
|
+
if (lastCharacter === AsciiCodePoint.MINUS_SIGN || lastCharacter === AsciiCodePoint.UNDERSCORE)
|
|
68
|
+
return { valid: false, nextIndex: i }
|
|
69
|
+
|
|
70
|
+
if (lastCharacter === AsciiCodePoint.DOT) {
|
|
71
|
+
i -= 1
|
|
72
|
+
countOfPeriod -= 1
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// There must be at least one period.
|
|
76
|
+
if (countOfPeriod <= 0) return { valid: false, nextIndex: i }
|
|
77
|
+
return { valid: true, nextIndex: i }
|
|
78
|
+
}
|
package/src/util/uri.ts
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
import type { INodePoint } from '@yozora/character'
|
|
2
|
+
import {
|
|
3
|
+
AsciiCodePoint,
|
|
4
|
+
isAlphanumeric,
|
|
5
|
+
isPunctuationCharacter,
|
|
6
|
+
isWhitespaceCharacter,
|
|
7
|
+
} from '@yozora/character'
|
|
8
|
+
import type { IResultOfOptionalEater, IResultOfRequiredEater } from '@yozora/core-tokenizer'
|
|
9
|
+
import { eatAutolinkSchema } from '@yozora/tokenizer-autolink'
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* An extended url autolink will be recognised when one of the schemes 'http://',
|
|
13
|
+
* or 'https://', followed by a valid domain, then zero or more non-space non-<
|
|
14
|
+
* characters according to extended autolink path validation.
|
|
15
|
+
*
|
|
16
|
+
* @see https://github.github.com/gfm/#extended-url-autolink
|
|
17
|
+
*/
|
|
18
|
+
export function eatExtendedUrl(
|
|
19
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
20
|
+
startIndex: number,
|
|
21
|
+
endIndex: number,
|
|
22
|
+
): IResultOfRequiredEater {
|
|
23
|
+
const schema = eatAutolinkSchema(nodePoints, startIndex, endIndex)
|
|
24
|
+
const { nextIndex } = schema
|
|
25
|
+
|
|
26
|
+
if (
|
|
27
|
+
!schema.valid ||
|
|
28
|
+
nextIndex + 3 >= endIndex ||
|
|
29
|
+
nodePoints[nextIndex].codePoint !== AsciiCodePoint.COLON ||
|
|
30
|
+
nodePoints[nextIndex + 1].codePoint !== AsciiCodePoint.SLASH ||
|
|
31
|
+
nodePoints[nextIndex + 2].codePoint !== AsciiCodePoint.SLASH
|
|
32
|
+
)
|
|
33
|
+
return { valid: false, nextIndex: nextIndex + 1 }
|
|
34
|
+
|
|
35
|
+
const result = eatValidDomain(nodePoints, nextIndex + 3, endIndex)
|
|
36
|
+
result.nextIndex = eatOptionalDomainFollows(nodePoints, result.nextIndex, endIndex)
|
|
37
|
+
return result
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* An extended www autolink will be recognised when the text 'www.' is found
|
|
42
|
+
* followed by a valid domain
|
|
43
|
+
*/
|
|
44
|
+
export function eatWWWDomain(
|
|
45
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
46
|
+
startIndex: number,
|
|
47
|
+
endIndex: number,
|
|
48
|
+
): IResultOfRequiredEater {
|
|
49
|
+
const segment = eatDomainSegment(nodePoints, startIndex, endIndex)
|
|
50
|
+
const nextIndex = segment.nextIndex
|
|
51
|
+
|
|
52
|
+
if (
|
|
53
|
+
!segment.valid ||
|
|
54
|
+
nextIndex >= endIndex ||
|
|
55
|
+
nodePoints[nextIndex].codePoint !== AsciiCodePoint.DOT ||
|
|
56
|
+
nextIndex - startIndex !== 3
|
|
57
|
+
)
|
|
58
|
+
return { valid: false, nextIndex }
|
|
59
|
+
|
|
60
|
+
for (let i = startIndex; i < nextIndex; ++i) {
|
|
61
|
+
const c = nodePoints[i].codePoint
|
|
62
|
+
if (c !== AsciiCodePoint.LOWERCASE_W && c !== AsciiCodePoint.UPPERCASE_W)
|
|
63
|
+
return { valid: false, nextIndex }
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const result = eatValidDomain(nodePoints, nextIndex + 1, endIndex)
|
|
67
|
+
result.nextIndex = eatOptionalDomainFollows(nodePoints, result.nextIndex, endIndex)
|
|
68
|
+
return result
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Try to eat an optional domain follows.
|
|
73
|
+
*
|
|
74
|
+
* After a valid domain, zero or more non-space non-'<' characters may follow.
|
|
75
|
+
*
|
|
76
|
+
* @param nodePoints
|
|
77
|
+
* @param startIndex
|
|
78
|
+
* @param endIndex
|
|
79
|
+
* @see https://github.github.com/gfm/#extended-autolink-path-validation
|
|
80
|
+
*/
|
|
81
|
+
export function eatOptionalDomainFollows(
|
|
82
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
83
|
+
startIndex: number,
|
|
84
|
+
endIndex: number,
|
|
85
|
+
): IResultOfOptionalEater {
|
|
86
|
+
let nextIndex = startIndex
|
|
87
|
+
for (; nextIndex < endIndex; ++nextIndex) {
|
|
88
|
+
const c = nodePoints[nextIndex].codePoint
|
|
89
|
+
if (isWhitespaceCharacter(c) || c === AsciiCodePoint.OPEN_ANGLE) break
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Trailing punctuation (specifically, '?', '!', '.', ',', ':', '*', '_', and '~')
|
|
93
|
+
// will not be considered part of the autolink, though they may be included in
|
|
94
|
+
// the interior of the link
|
|
95
|
+
for (nextIndex -= 1; nextIndex >= startIndex; nextIndex -= 1) {
|
|
96
|
+
const c = nodePoints[nextIndex].codePoint
|
|
97
|
+
if (
|
|
98
|
+
isPunctuationCharacter(c) ||
|
|
99
|
+
c === AsciiCodePoint.QUESTION_MARK ||
|
|
100
|
+
c === AsciiCodePoint.EXCLAMATION_MARK ||
|
|
101
|
+
c === AsciiCodePoint.DOT ||
|
|
102
|
+
c === AsciiCodePoint.COMMA ||
|
|
103
|
+
c === AsciiCodePoint.COLON ||
|
|
104
|
+
c === AsciiCodePoint.ASTERISK ||
|
|
105
|
+
c === AsciiCodePoint.UNDERSCORE ||
|
|
106
|
+
c === AsciiCodePoint.TILDE
|
|
107
|
+
)
|
|
108
|
+
continue
|
|
109
|
+
break
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* When an autolink ends in ')', we scan the entire autolink for the total
|
|
114
|
+
* number of parentheses. If there is a greater number of closing parentheses
|
|
115
|
+
* than opening ones, we don’t consider the unmatched trailing parentheses
|
|
116
|
+
* part of the autolink, in order to facilitate including an autolink inside
|
|
117
|
+
* a parenthesis.
|
|
118
|
+
* @see https://github.github.com/gfm/#example-624
|
|
119
|
+
* @see https://github.github.com/gfm/#example-625
|
|
120
|
+
*/
|
|
121
|
+
if (
|
|
122
|
+
nextIndex >= startIndex &&
|
|
123
|
+
nextIndex + 1 < endIndex &&
|
|
124
|
+
nodePoints[nextIndex + 1].codePoint === AsciiCodePoint.CLOSE_PARENTHESIS
|
|
125
|
+
) {
|
|
126
|
+
let countOfOpenParenthesis = 0
|
|
127
|
+
for (let i = startIndex; i < nextIndex; ++i) {
|
|
128
|
+
const c = nodePoints[i].codePoint
|
|
129
|
+
switch (c) {
|
|
130
|
+
case AsciiCodePoint.OPEN_PARENTHESIS:
|
|
131
|
+
countOfOpenParenthesis += 1
|
|
132
|
+
break
|
|
133
|
+
case AsciiCodePoint.CLOSE_PARENTHESIS:
|
|
134
|
+
countOfOpenParenthesis -= 1
|
|
135
|
+
break
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if (countOfOpenParenthesis > 0) {
|
|
140
|
+
nextIndex += 2
|
|
141
|
+
countOfOpenParenthesis -= 1
|
|
142
|
+
for (; nextIndex < endIndex && countOfOpenParenthesis > 0; ) {
|
|
143
|
+
const c = nodePoints[nextIndex].codePoint
|
|
144
|
+
if (c !== AsciiCodePoint.CLOSE_PARENTHESIS) break
|
|
145
|
+
countOfOpenParenthesis -= 1
|
|
146
|
+
nextIndex += 1
|
|
147
|
+
}
|
|
148
|
+
nextIndex -= 1
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* If an autolink ends in a semicolon (;), we check to see if it appears to
|
|
154
|
+
* resemble an entity reference; if the preceding text is & followed by one
|
|
155
|
+
* or more alphanumeric characters. If so, it is excluded from the autolink.
|
|
156
|
+
* @see https://github.github.com/gfm/#example-626
|
|
157
|
+
*/
|
|
158
|
+
if (
|
|
159
|
+
nextIndex + 1 < endIndex &&
|
|
160
|
+
nodePoints[nextIndex + 1].codePoint === AsciiCodePoint.SEMICOLON
|
|
161
|
+
) {
|
|
162
|
+
let i = nextIndex
|
|
163
|
+
for (; i >= startIndex; --i) {
|
|
164
|
+
const c = nodePoints[i].codePoint
|
|
165
|
+
if (!isAlphanumeric(c)) break
|
|
166
|
+
}
|
|
167
|
+
if (i >= startIndex && nodePoints[i].codePoint === AsciiCodePoint.AMPERSAND) nextIndex = i - 1
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return nextIndex + 1
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* A valid domain consists of segments of alphanumeric characters,
|
|
175
|
+
* underscores (_) and hyphens (-) separated by periods (.).
|
|
176
|
+
*
|
|
177
|
+
* @see https://github.github.com/gfm/#valid-domain
|
|
178
|
+
*/
|
|
179
|
+
export function eatValidDomain(
|
|
180
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
181
|
+
startIndex: number,
|
|
182
|
+
endIndex: number,
|
|
183
|
+
): IResultOfRequiredEater {
|
|
184
|
+
const segment = eatDomainSegment(nodePoints, startIndex, endIndex)!
|
|
185
|
+
if (!segment.valid || segment.nextIndex >= endIndex) {
|
|
186
|
+
return { valid: false, nextIndex: segment.nextIndex }
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
let nextIndex = segment.nextIndex,
|
|
190
|
+
countOfPeriod = 0
|
|
191
|
+
let countOfUnderscoreOfLastTwoSegment = segment.hasUnderscore ? 2 : 0
|
|
192
|
+
for (; nextIndex < endIndex; ) {
|
|
193
|
+
if (nodePoints[nextIndex].codePoint !== AsciiCodePoint.DOT) break
|
|
194
|
+
|
|
195
|
+
const segment = eatDomainSegment(nodePoints, nextIndex + 1, endIndex)
|
|
196
|
+
if (!segment.valid) break
|
|
197
|
+
|
|
198
|
+
nextIndex = segment.nextIndex
|
|
199
|
+
countOfPeriod += 1
|
|
200
|
+
countOfUnderscoreOfLastTwoSegment >>>= 1
|
|
201
|
+
countOfUnderscoreOfLastTwoSegment |= segment.hasUnderscore ? 2 : 0
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// There must be at least one period, and no underscores may be present in the
|
|
205
|
+
// last two segments of the domain.
|
|
206
|
+
if (countOfPeriod <= 0 && countOfUnderscoreOfLastTwoSegment === 0) {
|
|
207
|
+
return { valid: false, nextIndex }
|
|
208
|
+
}
|
|
209
|
+
return { valid: true, nextIndex }
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* A valid domain segment consists of alphanumeric characters,
|
|
214
|
+
* underscores (_) and hyphens (-).
|
|
215
|
+
* @see https://github.github.com/gfm/#valid-domain
|
|
216
|
+
*/
|
|
217
|
+
export function eatDomainSegment(
|
|
218
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
219
|
+
startIndex: number,
|
|
220
|
+
endIndex: number,
|
|
221
|
+
): IResultOfRequiredEater & { hasUnderscore: boolean } {
|
|
222
|
+
let i = startIndex,
|
|
223
|
+
hasUnderscore = false
|
|
224
|
+
for (; i < endIndex; ++i) {
|
|
225
|
+
const c = nodePoints[i].codePoint
|
|
226
|
+
if (c === AsciiCodePoint.UNDERSCORE) {
|
|
227
|
+
hasUnderscore = true
|
|
228
|
+
continue
|
|
229
|
+
}
|
|
230
|
+
if (!isAlphanumeric(c) && c !== AsciiCodePoint.MINUS_SIGN) break
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
if (i > startIndex) return { valid: true, nextIndex: i, hasUnderscore }
|
|
234
|
+
return { valid: false, nextIndex: i, hasUnderscore }
|
|
235
|
+
}
|