@yozora/tokenizer-autolink 2.0.4 → 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/{index.js → index.cjs} +5 -6
- package/lib/esm/{index.js → index.mjs} +4 -5
- package/lib/types/index.d.ts +5 -5
- package/package.json +18 -14
- package/src/index.ts +12 -0
- package/src/match.ts +79 -0
- package/src/parse.ts +31 -0
- package/src/tokenizer.ts +36 -0
- package/src/types.ts +46 -0
- package/src/util/email.ts +99 -0
- package/src/util/uri.ts +84 -0
|
@@ -178,21 +178,20 @@ const uniqueName = '@yozora/tokenizer-autolink';
|
|
|
178
178
|
|
|
179
179
|
class AutolinkTokenizer extends coreTokenizer.BaseInlineTokenizer {
|
|
180
180
|
constructor(props = {}) {
|
|
181
|
-
var _a, _b;
|
|
182
181
|
super({
|
|
183
|
-
name:
|
|
184
|
-
priority:
|
|
182
|
+
name: props.name ?? uniqueName,
|
|
183
|
+
priority: props.priority ?? coreTokenizer.TokenizerPriority.ATOMIC,
|
|
185
184
|
});
|
|
186
|
-
this.match = match;
|
|
187
|
-
this.parse = parse;
|
|
188
185
|
}
|
|
186
|
+
match = match;
|
|
187
|
+
parse = parse;
|
|
189
188
|
}
|
|
190
189
|
|
|
191
190
|
exports.AutolinkTokenizer = AutolinkTokenizer;
|
|
192
191
|
exports.AutolinkTokenizerName = uniqueName;
|
|
193
192
|
exports.autolinkMatch = match;
|
|
194
193
|
exports.autolinkParse = parse;
|
|
195
|
-
exports
|
|
194
|
+
exports.default = AutolinkTokenizer;
|
|
196
195
|
exports.eatAbsoluteUri = eatAbsoluteUri;
|
|
197
196
|
exports.eatAutolinkSchema = eatAutolinkSchema;
|
|
198
197
|
exports.eatEmailAddress = eatEmailAddress;
|
|
@@ -174,14 +174,13 @@ const uniqueName = '@yozora/tokenizer-autolink';
|
|
|
174
174
|
|
|
175
175
|
class AutolinkTokenizer extends BaseInlineTokenizer {
|
|
176
176
|
constructor(props = {}) {
|
|
177
|
-
var _a, _b;
|
|
178
177
|
super({
|
|
179
|
-
name:
|
|
180
|
-
priority:
|
|
178
|
+
name: props.name ?? uniqueName,
|
|
179
|
+
priority: props.priority ?? TokenizerPriority.ATOMIC,
|
|
181
180
|
});
|
|
182
|
-
this.match = match;
|
|
183
|
-
this.parse = parse;
|
|
184
181
|
}
|
|
182
|
+
match = match;
|
|
183
|
+
parse = parse;
|
|
185
184
|
}
|
|
186
185
|
|
|
187
186
|
export { AutolinkTokenizer, uniqueName as AutolinkTokenizerName, match as autolinkMatch, parse as autolinkParse, AutolinkTokenizer as default, eatAbsoluteUri, eatAutolinkSchema, eatEmailAddress };
|
package/lib/types/index.d.ts
CHANGED
|
@@ -35,9 +35,9 @@ declare function eatAbsoluteUri(nodePoints: ReadonlyArray<INodePoint>, startInde
|
|
|
35
35
|
*/
|
|
36
36
|
declare function eatAutolinkSchema(nodePoints: ReadonlyArray<INodePoint>, startIndex: number, endIndex: number): IResultOfRequiredEater;
|
|
37
37
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
38
|
+
type AutolinkContentType = 'uri' | 'email';
|
|
39
|
+
type T = LinkType;
|
|
40
|
+
type INode = Link;
|
|
41
41
|
declare const uniqueName = "@yozora/tokenizer-autolink";
|
|
42
42
|
interface IToken extends IPartialYastInlineToken<T> {
|
|
43
43
|
/**
|
|
@@ -52,8 +52,8 @@ interface IDelimiter extends IYastTokenDelimiter {
|
|
|
52
52
|
*/
|
|
53
53
|
contentType: AutolinkContentType;
|
|
54
54
|
}
|
|
55
|
-
|
|
56
|
-
|
|
55
|
+
type IThis = ITokenizer;
|
|
56
|
+
type ITokenizerProps = Partial<IBaseInlineTokenizerProps>;
|
|
57
57
|
|
|
58
58
|
/**
|
|
59
59
|
* Autolinks are absolute URIs and email addresses inside '<' and '>'.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yozora/tokenizer-autolink",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.5",
|
|
4
4
|
"author": {
|
|
5
5
|
"name": "guanghechen",
|
|
6
6
|
"url": "https://github.com/guanghechen/"
|
|
@@ -11,33 +11,37 @@
|
|
|
11
11
|
"directory": "tokenizers/autolink"
|
|
12
12
|
},
|
|
13
13
|
"homepage": "https://github.com/yozorajs/yozora/tree/release-2.x.x/tokenizers/autolink",
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
|
|
17
|
-
|
|
14
|
+
"type": "module",
|
|
15
|
+
"exports": {
|
|
16
|
+
"types": "./lib/types/index.d.ts",
|
|
17
|
+
"import": "./lib/esm/index.mjs",
|
|
18
|
+
"require": "./lib/cjs/index.cjs"
|
|
19
|
+
},
|
|
20
|
+
"source": "./src/index.ts",
|
|
21
|
+
"types": "./lib/types/index.d.ts",
|
|
22
|
+
"main": "./lib/cjs/index.cjs",
|
|
23
|
+
"module": "./lib/esm/index.mjs",
|
|
18
24
|
"license": "MIT",
|
|
19
25
|
"engines": {
|
|
20
26
|
"node": ">= 16.0.0"
|
|
21
27
|
},
|
|
22
28
|
"files": [
|
|
23
29
|
"lib/",
|
|
24
|
-
"
|
|
25
|
-
"!lib/**/*.d.ts.map",
|
|
30
|
+
"src/",
|
|
26
31
|
"package.json",
|
|
27
32
|
"CHANGELOG.md",
|
|
28
33
|
"LICENSE",
|
|
29
34
|
"README.md"
|
|
30
35
|
],
|
|
31
36
|
"scripts": {
|
|
32
|
-
"build": "cross-env NODE_ENV=production rollup -c ../../rollup.config.
|
|
33
|
-
"prebuild": "rimraf lib/",
|
|
37
|
+
"build": "rimraf lib/ && cross-env NODE_ENV=production rollup -c ../../rollup.config.mjs",
|
|
34
38
|
"prepublishOnly": "cross-env ROLLUP_SHOULD_SOURCEMAP=false yarn build",
|
|
35
|
-
"test": "cross-env TS_NODE_FILES=true jest --config ../../jest.config.
|
|
39
|
+
"test": "cross-env TS_NODE_FILES=true NODE_OPTIONS=--experimental-vm-modules jest --config ../../jest.config.mjs --rootDir ."
|
|
36
40
|
},
|
|
37
41
|
"dependencies": {
|
|
38
|
-
"@yozora/ast": "^2.0.
|
|
39
|
-
"@yozora/character": "^2.0.
|
|
40
|
-
"@yozora/core-tokenizer": "^2.0.
|
|
42
|
+
"@yozora/ast": "^2.0.5",
|
|
43
|
+
"@yozora/character": "^2.0.5",
|
|
44
|
+
"@yozora/core-tokenizer": "^2.0.5"
|
|
41
45
|
},
|
|
42
|
-
"gitHead": "
|
|
46
|
+
"gitHead": "7ba3bab49fe65cf2f57082c0503af73da9356cf0"
|
|
43
47
|
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export * from './util/email'
|
|
2
|
+
export * from './util/uri'
|
|
3
|
+
export { match as autolinkMatch } from './match'
|
|
4
|
+
export { parse as autolinkParse } from './parse'
|
|
5
|
+
export { AutolinkTokenizer, AutolinkTokenizer as default } from './tokenizer'
|
|
6
|
+
export { uniqueName as AutolinkTokenizerName } from './types'
|
|
7
|
+
export type {
|
|
8
|
+
IThis as IAutolinkHookContext,
|
|
9
|
+
IToken as IAutolinkToken,
|
|
10
|
+
ITokenizerProps as IAutolinkTokenizerProps,
|
|
11
|
+
AutolinkContentType,
|
|
12
|
+
} from './types'
|
package/src/match.ts
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { LinkType } from '@yozora/ast'
|
|
2
|
+
import type { INodePoint } from '@yozora/character'
|
|
3
|
+
import { AsciiCodePoint } from '@yozora/character'
|
|
4
|
+
import type {
|
|
5
|
+
IMatchInlineHookCreator,
|
|
6
|
+
IResultOfProcessSingleDelimiter,
|
|
7
|
+
} from '@yozora/core-tokenizer'
|
|
8
|
+
import { genFindDelimiter } from '@yozora/core-tokenizer'
|
|
9
|
+
import type { AutolinkContentType, IContentHelper, IDelimiter, IThis, IToken, T } from './types'
|
|
10
|
+
import { eatEmailAddress } from './util/email'
|
|
11
|
+
import { eatAbsoluteUri } from './util/uri'
|
|
12
|
+
|
|
13
|
+
const helpers: ReadonlyArray<IContentHelper> = [
|
|
14
|
+
{ contentType: 'uri', eat: eatAbsoluteUri },
|
|
15
|
+
{ contentType: 'email', eat: eatEmailAddress },
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Autolinks are absolute URIs and email addresses inside '<' and '>'.
|
|
20
|
+
* They are parsed as links, with the URL or email address as the link label.
|
|
21
|
+
*
|
|
22
|
+
* @see https://github.github.com/gfm/#autolink
|
|
23
|
+
*/
|
|
24
|
+
export const match: IMatchInlineHookCreator<T, IDelimiter, IToken, IThis> = function (api) {
|
|
25
|
+
return {
|
|
26
|
+
findDelimiter: () => genFindDelimiter<IDelimiter>(_findDelimiter),
|
|
27
|
+
processSingleDelimiter,
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function _findDelimiter(startIndex: number, endIndex: number): IDelimiter | null {
|
|
31
|
+
const nodePoints: ReadonlyArray<INodePoint> = api.getNodePoints()
|
|
32
|
+
|
|
33
|
+
for (let i = startIndex; i < endIndex; ++i) {
|
|
34
|
+
if (nodePoints[i].codePoint !== AsciiCodePoint.OPEN_ANGLE) continue
|
|
35
|
+
|
|
36
|
+
let nextIndex: number = endIndex
|
|
37
|
+
let contentType: AutolinkContentType | null = null
|
|
38
|
+
for (const helper of helpers) {
|
|
39
|
+
const eatResult = helper.eat(nodePoints, i + 1, endIndex)
|
|
40
|
+
nextIndex = Math.min(nextIndex, eatResult.nextIndex)
|
|
41
|
+
if (eatResult.valid) {
|
|
42
|
+
contentType = helper.contentType
|
|
43
|
+
nextIndex = eatResult.nextIndex
|
|
44
|
+
break
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Optimization: move forward to the next latest potential position.
|
|
49
|
+
if (contentType == null) {
|
|
50
|
+
i = Math.max(i, nextIndex - 1)
|
|
51
|
+
continue
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (nextIndex < endIndex && nodePoints[nextIndex].codePoint === AsciiCodePoint.CLOSE_ANGLE) {
|
|
55
|
+
return {
|
|
56
|
+
type: 'full',
|
|
57
|
+
startIndex: i,
|
|
58
|
+
endIndex: nextIndex + 1,
|
|
59
|
+
contentType,
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
i = nextIndex - 1
|
|
63
|
+
}
|
|
64
|
+
return null
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function processSingleDelimiter(
|
|
68
|
+
delimiter: IDelimiter,
|
|
69
|
+
): IResultOfProcessSingleDelimiter<T, IToken> {
|
|
70
|
+
const token: IToken = {
|
|
71
|
+
nodeType: LinkType,
|
|
72
|
+
startIndex: delimiter.startIndex,
|
|
73
|
+
endIndex: delimiter.endIndex,
|
|
74
|
+
contentType: delimiter.contentType,
|
|
75
|
+
children: api.resolveFallbackTokens([], delimiter.startIndex + 1, delimiter.endIndex - 1),
|
|
76
|
+
}
|
|
77
|
+
return [token]
|
|
78
|
+
}
|
|
79
|
+
}
|
package/src/parse.ts
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { Node } from '@yozora/ast'
|
|
2
|
+
import { LinkType } from '@yozora/ast'
|
|
3
|
+
import type { INodePoint } from '@yozora/character'
|
|
4
|
+
import { calcStringFromNodePoints } from '@yozora/character'
|
|
5
|
+
import type { IParseInlineHookCreator } from '@yozora/core-tokenizer'
|
|
6
|
+
import { encodeLinkDestination } from '@yozora/core-tokenizer'
|
|
7
|
+
import type { INode, IThis, IToken, T } from './types'
|
|
8
|
+
|
|
9
|
+
export const parse: IParseInlineHookCreator<T, IToken, INode, IThis> = function (api) {
|
|
10
|
+
return {
|
|
11
|
+
parse: tokens =>
|
|
12
|
+
tokens.map(token => {
|
|
13
|
+
const nodePoints: ReadonlyArray<INodePoint> = api.getNodePoints()
|
|
14
|
+
|
|
15
|
+
// Backslash-escapes do not work inside autolink.
|
|
16
|
+
let url = calcStringFromNodePoints(nodePoints, token.startIndex + 1, token.endIndex - 1)
|
|
17
|
+
|
|
18
|
+
// Add 'mailto:' prefix to email address type autolink.
|
|
19
|
+
if (token.contentType === 'email') {
|
|
20
|
+
url = 'mailto:' + url
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const encodedUrl = encodeLinkDestination(url)
|
|
24
|
+
const children: Node[] = api.parseInlineTokens(token.children)
|
|
25
|
+
const node: INode = api.shouldReservePosition
|
|
26
|
+
? { type: LinkType, position: api.calcPosition(token), url: encodedUrl, children }
|
|
27
|
+
: { type: LinkType, url: encodedUrl, children }
|
|
28
|
+
return node
|
|
29
|
+
}),
|
|
30
|
+
}
|
|
31
|
+
}
|
package/src/tokenizer.ts
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
IInlineTokenizer,
|
|
3
|
+
IMatchInlineHookCreator,
|
|
4
|
+
IParseInlineHookCreator,
|
|
5
|
+
} from '@yozora/core-tokenizer'
|
|
6
|
+
import { BaseInlineTokenizer, TokenizerPriority } from '@yozora/core-tokenizer'
|
|
7
|
+
import { match } from './match'
|
|
8
|
+
import { parse } from './parse'
|
|
9
|
+
import type { IDelimiter, INode, IThis, IToken, ITokenizerProps, T } from './types'
|
|
10
|
+
import { uniqueName } from './types'
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Lexical Analyzer for Autolink.
|
|
14
|
+
* @see https://github.github.com/gfm/#autolink
|
|
15
|
+
*/
|
|
16
|
+
export class AutolinkTokenizer
|
|
17
|
+
extends BaseInlineTokenizer<T, IDelimiter, IToken, INode, IThis>
|
|
18
|
+
implements IInlineTokenizer<T, IDelimiter, IToken, INode, IThis>
|
|
19
|
+
{
|
|
20
|
+
/* istanbul ignore next */
|
|
21
|
+
constructor(props: ITokenizerProps = {}) {
|
|
22
|
+
super({
|
|
23
|
+
name: props.name ?? uniqueName,
|
|
24
|
+
/**
|
|
25
|
+
* Autolink has the same priority as inline-code.
|
|
26
|
+
* @see https://github.github.com/gfm/#example-355
|
|
27
|
+
* @see https://github.github.com/gfm/#example-356
|
|
28
|
+
*/
|
|
29
|
+
priority: props.priority ?? TokenizerPriority.ATOMIC,
|
|
30
|
+
})
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
public override readonly match: IMatchInlineHookCreator<T, IDelimiter, IToken, IThis> = match
|
|
34
|
+
|
|
35
|
+
public override readonly parse: IParseInlineHookCreator<T, IToken, INode, IThis> = parse
|
|
36
|
+
}
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import type { Link, LinkType } from '@yozora/ast'
|
|
2
|
+
import type { INodePoint } from '@yozora/character'
|
|
3
|
+
import type {
|
|
4
|
+
IBaseInlineTokenizerProps,
|
|
5
|
+
IPartialYastInlineToken,
|
|
6
|
+
IResultOfRequiredEater,
|
|
7
|
+
ITokenizer,
|
|
8
|
+
IYastTokenDelimiter,
|
|
9
|
+
} from '@yozora/core-tokenizer'
|
|
10
|
+
|
|
11
|
+
// Content type of autolink
|
|
12
|
+
export type AutolinkContentType = 'uri' | 'email'
|
|
13
|
+
|
|
14
|
+
export type T = LinkType
|
|
15
|
+
export type INode = Link
|
|
16
|
+
export const uniqueName = '@yozora/tokenizer-autolink'
|
|
17
|
+
|
|
18
|
+
export interface IToken extends IPartialYastInlineToken<T> {
|
|
19
|
+
/**
|
|
20
|
+
* Autolink content type: absolute uri or email.
|
|
21
|
+
*/
|
|
22
|
+
contentType: AutolinkContentType
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface IDelimiter extends IYastTokenDelimiter {
|
|
26
|
+
type: 'full'
|
|
27
|
+
/**
|
|
28
|
+
* Autolink content type: absolute uri or email.
|
|
29
|
+
*/
|
|
30
|
+
contentType: AutolinkContentType
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export type IThis = ITokenizer
|
|
34
|
+
|
|
35
|
+
export type ITokenizerProps = Partial<IBaseInlineTokenizerProps>
|
|
36
|
+
|
|
37
|
+
export type ContentEater = (
|
|
38
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
39
|
+
startIndex: number,
|
|
40
|
+
endIndex: number,
|
|
41
|
+
) => IResultOfRequiredEater
|
|
42
|
+
|
|
43
|
+
export interface IContentHelper {
|
|
44
|
+
contentType: AutolinkContentType
|
|
45
|
+
eat: ContentEater
|
|
46
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import type { INodePoint } from '@yozora/character'
|
|
2
|
+
import {
|
|
3
|
+
AsciiCodePoint,
|
|
4
|
+
isAlphanumeric,
|
|
5
|
+
isAsciiDigitCharacter,
|
|
6
|
+
isAsciiLetter,
|
|
7
|
+
} from '@yozora/character'
|
|
8
|
+
import type { IResultOfOptionalEater, IResultOfRequiredEater } from '@yozora/core-tokenizer'
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* An email address, for these purposes, is anything that matches the
|
|
12
|
+
* non-normative regex from the HTML5 spec:
|
|
13
|
+
*
|
|
14
|
+
* /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}
|
|
15
|
+
* [a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/
|
|
16
|
+
*
|
|
17
|
+
* @see https://github.github.com/gfm/#email-address
|
|
18
|
+
*/
|
|
19
|
+
export function eatEmailAddress(
|
|
20
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
21
|
+
startIndex: number,
|
|
22
|
+
endIndex: number,
|
|
23
|
+
): IResultOfRequiredEater {
|
|
24
|
+
let i = startIndex
|
|
25
|
+
|
|
26
|
+
// Match /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+/
|
|
27
|
+
for (; i < endIndex; i += 1) {
|
|
28
|
+
const c = nodePoints[i].codePoint
|
|
29
|
+
if (isAsciiLetter(c) || isAsciiDigitCharacter(c)) continue
|
|
30
|
+
if (
|
|
31
|
+
c !== AsciiCodePoint.DOT &&
|
|
32
|
+
c !== AsciiCodePoint.EXCLAMATION_MARK &&
|
|
33
|
+
c !== AsciiCodePoint.NUMBER_SIGN &&
|
|
34
|
+
c !== AsciiCodePoint.DOLLAR_SIGN &&
|
|
35
|
+
c !== AsciiCodePoint.PERCENT_SIGN &&
|
|
36
|
+
c !== AsciiCodePoint.AMPERSAND &&
|
|
37
|
+
c !== AsciiCodePoint.SINGLE_QUOTE &&
|
|
38
|
+
c !== AsciiCodePoint.ASTERISK &&
|
|
39
|
+
c !== AsciiCodePoint.PLUS_SIGN &&
|
|
40
|
+
c !== AsciiCodePoint.SLASH &&
|
|
41
|
+
c !== AsciiCodePoint.EQUALS_SIGN &&
|
|
42
|
+
c !== AsciiCodePoint.QUESTION_MARK &&
|
|
43
|
+
c !== AsciiCodePoint.CARET &&
|
|
44
|
+
c !== AsciiCodePoint.UNDERSCORE &&
|
|
45
|
+
c !== AsciiCodePoint.BACKTICK &&
|
|
46
|
+
c !== AsciiCodePoint.OPEN_BRACE &&
|
|
47
|
+
c !== AsciiCodePoint.VERTICAL_SLASH &&
|
|
48
|
+
c !== AsciiCodePoint.CLOSE_BRACE &&
|
|
49
|
+
c !== AsciiCodePoint.TILDE &&
|
|
50
|
+
c !== AsciiCodePoint.MINUS_SIGN
|
|
51
|
+
)
|
|
52
|
+
break
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (
|
|
56
|
+
i === startIndex ||
|
|
57
|
+
i + 1 >= endIndex ||
|
|
58
|
+
nodePoints[i].codePoint !== AsciiCodePoint.AT_SIGN ||
|
|
59
|
+
!isAlphanumeric(nodePoints[i + 1].codePoint)
|
|
60
|
+
)
|
|
61
|
+
return { valid: false, nextIndex: i + 1 }
|
|
62
|
+
|
|
63
|
+
i = eatAddressPart0(nodePoints, i + 2, endIndex)
|
|
64
|
+
|
|
65
|
+
// Match /(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*/
|
|
66
|
+
for (; i + 1 < endIndex; ) {
|
|
67
|
+
let c = nodePoints[i].codePoint
|
|
68
|
+
if (c !== AsciiCodePoint.DOT) break
|
|
69
|
+
|
|
70
|
+
c = nodePoints[i + 1].codePoint
|
|
71
|
+
if (!isAsciiLetter(c) && !isAsciiDigitCharacter(c)) break
|
|
72
|
+
i = eatAddressPart0(nodePoints, i + 2, endIndex)
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return { valid: true, nextIndex: i }
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Match regex /(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?/
|
|
80
|
+
*
|
|
81
|
+
*/
|
|
82
|
+
function eatAddressPart0(
|
|
83
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
84
|
+
startIndex: number,
|
|
85
|
+
endIndex: number,
|
|
86
|
+
): IResultOfOptionalEater {
|
|
87
|
+
let i = startIndex,
|
|
88
|
+
result = -1
|
|
89
|
+
|
|
90
|
+
for (let _endIndex = Math.min(endIndex, i + 62); i < _endIndex; ++i) {
|
|
91
|
+
const c = nodePoints[i].codePoint
|
|
92
|
+
if (isAsciiLetter(c) || isAsciiDigitCharacter(c)) {
|
|
93
|
+
result = i
|
|
94
|
+
continue
|
|
95
|
+
}
|
|
96
|
+
if (c !== AsciiCodePoint.MINUS_SIGN) break
|
|
97
|
+
}
|
|
98
|
+
return result >= startIndex ? result + 1 : startIndex
|
|
99
|
+
}
|
package/src/util/uri.ts
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import type { INodePoint } from '@yozora/character'
|
|
2
|
+
import {
|
|
3
|
+
AsciiCodePoint,
|
|
4
|
+
isAlphanumeric,
|
|
5
|
+
isAsciiCharacter,
|
|
6
|
+
isAsciiControlCharacter,
|
|
7
|
+
isAsciiLetter,
|
|
8
|
+
isWhitespaceCharacter,
|
|
9
|
+
} from '@yozora/character'
|
|
10
|
+
import type { IResultOfRequiredEater } from '@yozora/core-tokenizer'
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Try to find to autolink absolute uri strictly start from the give `startIndex`.
|
|
14
|
+
*
|
|
15
|
+
* An absolute URI, for these purposes, consists of a scheme followed by a
|
|
16
|
+
* colon (:) followed by zero or more characters other than ASCII whitespace
|
|
17
|
+
* and control characters, `<`, and `>`. If the URI includes these characters,
|
|
18
|
+
* they must be percent-encoded (e.g. %20 for a space).
|
|
19
|
+
*
|
|
20
|
+
* @see https://github.github.com/gfm/#absolute-uri
|
|
21
|
+
*/
|
|
22
|
+
export function eatAbsoluteUri(
|
|
23
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
24
|
+
startIndex: number,
|
|
25
|
+
endIndex: number,
|
|
26
|
+
): IResultOfRequiredEater {
|
|
27
|
+
const schema = eatAutolinkSchema(nodePoints, startIndex, endIndex)
|
|
28
|
+
let { nextIndex } = schema
|
|
29
|
+
|
|
30
|
+
if (
|
|
31
|
+
!schema.valid ||
|
|
32
|
+
nextIndex >= endIndex ||
|
|
33
|
+
nodePoints[nextIndex].codePoint !== AsciiCodePoint.COLON
|
|
34
|
+
)
|
|
35
|
+
return { valid: false, nextIndex }
|
|
36
|
+
|
|
37
|
+
for (nextIndex += 1; nextIndex < endIndex; ++nextIndex) {
|
|
38
|
+
const c = nodePoints[nextIndex].codePoint
|
|
39
|
+
if (
|
|
40
|
+
!isAsciiCharacter(c) ||
|
|
41
|
+
isWhitespaceCharacter(c) ||
|
|
42
|
+
isAsciiControlCharacter(c) ||
|
|
43
|
+
c === AsciiCodePoint.OPEN_ANGLE ||
|
|
44
|
+
c === AsciiCodePoint.CLOSE_ANGLE
|
|
45
|
+
)
|
|
46
|
+
break
|
|
47
|
+
}
|
|
48
|
+
return { valid: true, nextIndex }
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Try to find to autolink schema strictly start from the give `startIndex`.
|
|
53
|
+
*
|
|
54
|
+
* A scheme is any sequence of 2–32 characters beginning with an ASCII letter
|
|
55
|
+
* and followed by any combination of ASCII letters, digits, or the symbols
|
|
56
|
+
* plus (`+`), period (`.`), or hyphen (`-`).
|
|
57
|
+
*
|
|
58
|
+
* @see https://github.github.com/gfm/#scheme
|
|
59
|
+
*/
|
|
60
|
+
export function eatAutolinkSchema(
|
|
61
|
+
nodePoints: ReadonlyArray<INodePoint>,
|
|
62
|
+
startIndex: number,
|
|
63
|
+
endIndex: number,
|
|
64
|
+
): IResultOfRequiredEater {
|
|
65
|
+
let i = startIndex
|
|
66
|
+
const c = nodePoints[i].codePoint
|
|
67
|
+
if (!isAsciiLetter(c)) return { valid: false, nextIndex: i + 1 }
|
|
68
|
+
|
|
69
|
+
for (i += 1; i < endIndex; ++i) {
|
|
70
|
+
const d = nodePoints[i].codePoint
|
|
71
|
+
if (
|
|
72
|
+
isAlphanumeric(d) ||
|
|
73
|
+
d === AsciiCodePoint.PLUS_SIGN ||
|
|
74
|
+
d === AsciiCodePoint.DOT ||
|
|
75
|
+
d === AsciiCodePoint.MINUS_SIGN
|
|
76
|
+
)
|
|
77
|
+
continue
|
|
78
|
+
break
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const count = i - startIndex
|
|
82
|
+
if (count < 2 || count > 32) return { valid: false, nextIndex: i + 1 }
|
|
83
|
+
return { valid: true, nextIndex: i }
|
|
84
|
+
}
|