extra-parser 0.6.2 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/package.json +4 -3
- package/src/consume-node.ts +22 -0
- package/src/consume-token.ts +19 -0
- package/src/create-binary-operator-expression-node-pattern.ts +50 -0
- package/src/create-grouped-expression-node-pattern.ts +30 -0
- package/src/create-token-pattern-from-regexp.ts +32 -0
- package/src/create-unary-operator-expression-node-pattern.ts +44 -0
- package/src/create-value-expression-node-pattern.ts +36 -0
- package/src/index.ts +20 -0
- package/src/match-any-of.ts +20 -0
- package/src/match-repetitions.ts +59 -0
- package/src/match-sequence.ts +180 -0
- package/src/parse.ts +26 -0
- package/src/tokenize.ts +23 -0
- package/src/types.ts +65 -0
package/README.md
CHANGED
|
@@ -118,7 +118,7 @@ function consumeToken<Token extends IToken = IToken>(
|
|
|
118
118
|
): Token | Falsy
|
|
119
119
|
```
|
|
120
120
|
|
|
121
|
-
|
|
121
|
+
### matchAnyOf
|
|
122
122
|
```ts
|
|
123
123
|
function matchAnyOf<
|
|
124
124
|
Token extends IToken = IToken
|
|
@@ -129,7 +129,7 @@ function matchAnyOf<
|
|
|
129
129
|
): Promise<INodePatternMatch<Node> | Falsy>
|
|
130
130
|
```
|
|
131
131
|
|
|
132
|
-
|
|
132
|
+
### matchSequence
|
|
133
133
|
```ts
|
|
134
134
|
function matchSequence<
|
|
135
135
|
Sequence extends ReadonlyArray<Token | Node>
|
|
@@ -141,7 +141,7 @@ function matchSequence<
|
|
|
141
141
|
): Promise<MapSequenceToMatches<Sequence, Token, Node> | Falsy>
|
|
142
142
|
```
|
|
143
143
|
|
|
144
|
-
|
|
144
|
+
### matchRepetitions
|
|
145
145
|
```ts
|
|
146
146
|
function matchRepetitions<
|
|
147
147
|
Sequence extends ReadonlyArray<Token | Node>
|
package/package.json
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "extra-parser",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.4",
|
|
4
4
|
"description": "A functional parser toolkit",
|
|
5
5
|
"keywords": [],
|
|
6
6
|
"files": [
|
|
7
|
-
"lib"
|
|
7
|
+
"lib",
|
|
8
|
+
"src"
|
|
8
9
|
],
|
|
9
10
|
"main": "lib/index.js",
|
|
10
11
|
"types": "lib/index.d.ts",
|
|
@@ -31,7 +32,6 @@
|
|
|
31
32
|
}
|
|
32
33
|
},
|
|
33
34
|
"devDependencies": {
|
|
34
|
-
"@blackglory/jest-matchers": "^0.5.0",
|
|
35
35
|
"@commitlint/cli": "^17.2.0",
|
|
36
36
|
"@commitlint/config-conventional": "^17.2.0",
|
|
37
37
|
"@types/jest": "^29.2.2",
|
|
@@ -51,6 +51,7 @@
|
|
|
51
51
|
},
|
|
52
52
|
"dependencies": {
|
|
53
53
|
"@blackglory/prelude": "^0.1.8",
|
|
54
|
+
"hotypes": "^0.5.1",
|
|
54
55
|
"iterable-operator": "^2.3.0"
|
|
55
56
|
}
|
|
56
57
|
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { Falsy, isntFalsy } from '@blackglory/prelude'
|
|
2
|
+
import { IToken, INode, INodePattern, INodePatternMatch } from './types'
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* 尝试匹配node, 如果成功, 则消耗掉相应的token.
|
|
6
|
+
*
|
|
7
|
+
* @param tokens 匹配成功时会发生原地修改
|
|
8
|
+
*/
|
|
9
|
+
export async function consumeNode<
|
|
10
|
+
Token extends IToken = IToken
|
|
11
|
+
, Node extends INode = INode
|
|
12
|
+
>(
|
|
13
|
+
nodePattern: INodePattern<Token, Node>
|
|
14
|
+
, tokens: Token[]
|
|
15
|
+
): Promise<INodePatternMatch<Node> | Falsy> {
|
|
16
|
+
const match = await nodePattern(tokens)
|
|
17
|
+
|
|
18
|
+
if (isntFalsy(match)) {
|
|
19
|
+
tokens.splice(0, match.consumed)
|
|
20
|
+
return match
|
|
21
|
+
}
|
|
22
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { Falsy } from '@blackglory/prelude'
|
|
2
|
+
import { IToken } from './types'
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* 尝试匹配token, 如果成功, 则消耗掉相应的token.
|
|
6
|
+
*
|
|
7
|
+
* @param tokens 匹配成功时会发生原地修改
|
|
8
|
+
*/
|
|
9
|
+
export function consumeToken<Token extends IToken = IToken>(
|
|
10
|
+
tokenType: string
|
|
11
|
+
, tokens: Token[]
|
|
12
|
+
): Token | Falsy {
|
|
13
|
+
const firstToken: IToken | undefined = tokens[0]
|
|
14
|
+
|
|
15
|
+
if (firstToken && firstToken.tokenType === tokenType) {
|
|
16
|
+
tokens.shift()
|
|
17
|
+
return firstToken as Token
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { isntFalsy } from '@blackglory/prelude'
|
|
2
|
+
import { IToken, INode, INodePattern } from './types'
|
|
3
|
+
import { matchSequence } from './match-sequence'
|
|
4
|
+
|
|
5
|
+
export interface IBinaryOperatorExpressionNode<
|
|
6
|
+
NodeType extends string
|
|
7
|
+
, LeftNode extends INode
|
|
8
|
+
, RightNode extends INode
|
|
9
|
+
> extends INode {
|
|
10
|
+
nodeType: NodeType
|
|
11
|
+
left: LeftNode
|
|
12
|
+
right: RightNode
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function createBinaryOperatorExpressionNodePattern<
|
|
16
|
+
Token extends IToken
|
|
17
|
+
, Node extends IBinaryOperatorExpressionNode<string, LeftNode, RightNode>
|
|
18
|
+
, LeftNode extends INode
|
|
19
|
+
, RightNode extends INode
|
|
20
|
+
>({ nodeType, centerTokenType, rightNodePattern, leftNodePattern }: {
|
|
21
|
+
nodeType: Node['nodeType']
|
|
22
|
+
centerTokenType: string
|
|
23
|
+
leftNodePattern: INodePattern<Token, LeftNode>
|
|
24
|
+
rightNodePattern: INodePattern<Token, RightNode>
|
|
25
|
+
}): INodePattern<
|
|
26
|
+
Token
|
|
27
|
+
, IBinaryOperatorExpressionNode<Node['nodeType'], Node['left'], Node['right']>
|
|
28
|
+
> {
|
|
29
|
+
return async tokens => {
|
|
30
|
+
const matches = await matchSequence<[INode, IToken, INode]>(
|
|
31
|
+
[
|
|
32
|
+
leftNodePattern as INodePattern<IToken, LeftNode>
|
|
33
|
+
, centerTokenType
|
|
34
|
+
, rightNodePattern as INodePattern<IToken, RightNode>
|
|
35
|
+
]
|
|
36
|
+
, tokens
|
|
37
|
+
)
|
|
38
|
+
if (isntFalsy(matches)) {
|
|
39
|
+
const [leftMatch, token, rightMatch] = matches
|
|
40
|
+
return {
|
|
41
|
+
consumed: leftMatch.consumed + 1 + rightMatch.consumed
|
|
42
|
+
, node: {
|
|
43
|
+
nodeType
|
|
44
|
+
, left: leftMatch.node as Node['left']
|
|
45
|
+
, right: rightMatch.node as Node['right']
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { isntFalsy } from '@blackglory/prelude'
|
|
2
|
+
import { IToken, INode, INodePattern } from './types'
|
|
3
|
+
import { matchSequence } from './match-sequence'
|
|
4
|
+
|
|
5
|
+
export function createGroupedExpressionNodePattern<
|
|
6
|
+
Token extends IToken
|
|
7
|
+
, CenterNode extends INode
|
|
8
|
+
>({ leftTokenType, rightTokenType, centerNodePattern }: {
|
|
9
|
+
leftTokenType: string
|
|
10
|
+
rightTokenType: string
|
|
11
|
+
centerNodePattern: INodePattern<Token, CenterNode>
|
|
12
|
+
}): INodePattern<Token, CenterNode> {
|
|
13
|
+
return async tokens => {
|
|
14
|
+
const matches = await matchSequence<[IToken, INode, IToken]>(
|
|
15
|
+
[
|
|
16
|
+
leftTokenType
|
|
17
|
+
, centerNodePattern as INodePattern<IToken, CenterNode>
|
|
18
|
+
, rightTokenType
|
|
19
|
+
]
|
|
20
|
+
, tokens
|
|
21
|
+
)
|
|
22
|
+
if (isntFalsy(matches)) {
|
|
23
|
+
const [leftToken, nodeMatch, rightToken] = matches
|
|
24
|
+
return {
|
|
25
|
+
consumed: 1 + nodeMatch.consumed + 1
|
|
26
|
+
, node: nodeMatch.node as CenterNode
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { Falsy, isntNull } from '@blackglory/prelude'
|
|
2
|
+
import { IToken, ITokenPattern, ITokenPatternMatch } from './types'
|
|
3
|
+
|
|
4
|
+
export function createTokenPatternFromRegExp<Token extends IToken>(
|
|
5
|
+
tokenType: Token['tokenType']
|
|
6
|
+
, regExp: RegExp
|
|
7
|
+
): ITokenPattern<IToken> {
|
|
8
|
+
const startsWithRegExp = convertToStartsWithRegExp(regExp)
|
|
9
|
+
|
|
10
|
+
return (text: string): ITokenPatternMatch<IToken> | Falsy => {
|
|
11
|
+
const result = startsWithRegExp.exec(text)
|
|
12
|
+
if (isntNull(result)) {
|
|
13
|
+
const [matchedText] = result
|
|
14
|
+
return {
|
|
15
|
+
consumed: matchedText.length
|
|
16
|
+
, token: {
|
|
17
|
+
tokenType: tokenType
|
|
18
|
+
, value: matchedText
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
} else {
|
|
22
|
+
return false
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function convertToStartsWithRegExp(re: RegExp): RegExp {
|
|
28
|
+
return new RegExp(
|
|
29
|
+
re.source.startsWith('^') ? re.source : `^${re.source}`
|
|
30
|
+
, re.flags
|
|
31
|
+
)
|
|
32
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { isntFalsy } from '@blackglory/prelude'
|
|
2
|
+
import { IToken, INode, INodePattern } from './types'
|
|
3
|
+
import { matchSequence } from './match-sequence'
|
|
4
|
+
|
|
5
|
+
export interface IUnaryOperatorExpressionNode<
|
|
6
|
+
NodeType extends string
|
|
7
|
+
, RightNode extends INode
|
|
8
|
+
> extends INode {
|
|
9
|
+
nodeType: NodeType
|
|
10
|
+
right: RightNode
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function createUnaryOperatorExpressionNodePattern<
|
|
14
|
+
Token extends IToken
|
|
15
|
+
, Node extends IUnaryOperatorExpressionNode<string, RightNode>
|
|
16
|
+
, RightNode extends INode
|
|
17
|
+
>({ leftTokenType, nodeType, rightNodePattern }: {
|
|
18
|
+
nodeType: Node['nodeType']
|
|
19
|
+
leftTokenType: string
|
|
20
|
+
rightNodePattern: INodePattern<Token, RightNode>
|
|
21
|
+
}): INodePattern<
|
|
22
|
+
Token
|
|
23
|
+
, IUnaryOperatorExpressionNode<Node['nodeType'], Node['right']>
|
|
24
|
+
> {
|
|
25
|
+
return async tokens => {
|
|
26
|
+
const matches = await matchSequence<[IToken, INode]>(
|
|
27
|
+
[
|
|
28
|
+
leftTokenType
|
|
29
|
+
, rightNodePattern as INodePattern<IToken, RightNode>
|
|
30
|
+
]
|
|
31
|
+
, tokens
|
|
32
|
+
)
|
|
33
|
+
if (isntFalsy(matches)) {
|
|
34
|
+
const [leftToken, rightMatch] = matches
|
|
35
|
+
return {
|
|
36
|
+
consumed: 1 + rightMatch.consumed
|
|
37
|
+
, node: {
|
|
38
|
+
nodeType: nodeType
|
|
39
|
+
, right: rightMatch.node as RightNode
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { isntFalsy, toArray } from '@blackglory/prelude'
|
|
2
|
+
import { consumeToken } from './consume-token'
|
|
3
|
+
import { IToken, INode, INodePattern } from './types'
|
|
4
|
+
|
|
5
|
+
export interface IValueExpressionNode<
|
|
6
|
+
NodeType extends string
|
|
7
|
+
, Value
|
|
8
|
+
> extends INode {
|
|
9
|
+
nodeType: NodeType
|
|
10
|
+
value: Value
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function createValueExpressionNodePattern<
|
|
14
|
+
Token extends IToken
|
|
15
|
+
, Node extends IValueExpressionNode<string, Value>
|
|
16
|
+
, Value
|
|
17
|
+
>({ valueTokenType, nodeType, transformValue }: {
|
|
18
|
+
nodeType: Node['nodeType']
|
|
19
|
+
valueTokenType: string
|
|
20
|
+
transformValue: (value: string) => Value
|
|
21
|
+
}): INodePattern<Token, IValueExpressionNode<Node['nodeType'], Node['value']>> {
|
|
22
|
+
return tokens => {
|
|
23
|
+
const mutableTokens = toArray(tokens)
|
|
24
|
+
|
|
25
|
+
const token = consumeToken(valueTokenType, mutableTokens)
|
|
26
|
+
if (isntFalsy(token)) {
|
|
27
|
+
return {
|
|
28
|
+
consumed: 1
|
|
29
|
+
, node: {
|
|
30
|
+
nodeType
|
|
31
|
+
, value: transformValue(token.value)
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export {
|
|
2
|
+
IToken
|
|
3
|
+
, ITokenPattern
|
|
4
|
+
, ITokenPatternMatch
|
|
5
|
+
, INode
|
|
6
|
+
, INodePattern
|
|
7
|
+
, INodePatternMatch
|
|
8
|
+
} from './types'
|
|
9
|
+
export * from './tokenize'
|
|
10
|
+
export * from './parse'
|
|
11
|
+
export * from './consume-token'
|
|
12
|
+
export * from './consume-node'
|
|
13
|
+
export { matchSequence } from './match-sequence'
|
|
14
|
+
export * from './match-repetitions'
|
|
15
|
+
export * from './match-any-of'
|
|
16
|
+
export * from './create-token-pattern-from-regexp'
|
|
17
|
+
export * from './create-unary-operator-expression-node-pattern'
|
|
18
|
+
export * from './create-binary-operator-expression-node-pattern'
|
|
19
|
+
export * from './create-grouped-expression-node-pattern'
|
|
20
|
+
export * from './create-value-expression-node-pattern'
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { Falsy, isntFalsy } from '@blackglory/prelude'
|
|
2
|
+
import { IToken, INode, INodePattern, INodePatternMatch } from './types'
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* 从多个模式中依序匹配, 直到有一个匹配结果为真值, 返回该真值.
|
|
6
|
+
*/
|
|
7
|
+
export async function matchAnyOf<
|
|
8
|
+
Token extends IToken = IToken
|
|
9
|
+
, Node extends INode = INode
|
|
10
|
+
>(
|
|
11
|
+
nodePatterns: ReadonlyArray<INodePattern<Token, Node>>
|
|
12
|
+
, tokens: ReadonlyArray<Token>
|
|
13
|
+
): Promise<INodePatternMatch<Node> | Falsy> {
|
|
14
|
+
for (const pattern of nodePatterns) {
|
|
15
|
+
const match = await pattern(tokens)
|
|
16
|
+
if (isntFalsy(match)) {
|
|
17
|
+
return match
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { assert, Falsy, isntFalsy, toArray } from '@blackglory/prelude'
|
|
2
|
+
import { Flatten } from 'hotypes'
|
|
3
|
+
import { IToken, INode, MapSequenceToPatterns, MapSequenceToMatches } from './types'
|
|
4
|
+
import { matchSequence } from './match-sequence'
|
|
5
|
+
|
|
6
|
+
export async function matchRepetitions<
|
|
7
|
+
Sequence extends ReadonlyArray<Token | Node>
|
|
8
|
+
, Token extends IToken = IToken
|
|
9
|
+
, Node extends INode = INode
|
|
10
|
+
>(
|
|
11
|
+
patterns: MapSequenceToPatterns<Sequence, Token, Node>
|
|
12
|
+
, tokens: ReadonlyArray<Token>
|
|
13
|
+
, {
|
|
14
|
+
minimumRepetitions = 1
|
|
15
|
+
, maximumRepetitions = Infinity
|
|
16
|
+
}: {
|
|
17
|
+
minimumRepetitions?: number
|
|
18
|
+
maximumRepetitions?: number
|
|
19
|
+
} = {}
|
|
20
|
+
): Promise<Flatten<Array<MapSequenceToMatches<Sequence, Token, Node>>> | Falsy> {
|
|
21
|
+
assert(Number.isInteger(minimumRepetitions), 'The minimum repetiions must be an integer')
|
|
22
|
+
assert(
|
|
23
|
+
minimumRepetitions >= 0
|
|
24
|
+
, 'The minimum repetitions must be greater than or equal to 0'
|
|
25
|
+
)
|
|
26
|
+
assert(
|
|
27
|
+
Number.isInteger(maximumRepetitions) || maximumRepetitions === Infinity
|
|
28
|
+
, 'The maxmium repetiions must be an integer or an Infinity'
|
|
29
|
+
)
|
|
30
|
+
assert(
|
|
31
|
+
maximumRepetitions >= minimumRepetitions
|
|
32
|
+
, 'The maximum repetitions must be greater than or equal to the minimum repetitions'
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
const results: Array<MapSequenceToMatches<Sequence, Token, Node>> = []
|
|
36
|
+
const mutableTokens = toArray(tokens)
|
|
37
|
+
|
|
38
|
+
for (let i = 0; i < minimumRepetitions; i++) {
|
|
39
|
+
const matches = await matchSequence(patterns, mutableTokens)
|
|
40
|
+
if (isntFalsy(matches)) {
|
|
41
|
+
results.push(matches)
|
|
42
|
+
mutableTokens.splice(0, matches.length)
|
|
43
|
+
} else {
|
|
44
|
+
return
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
for (let i = minimumRepetitions; i < maximumRepetitions; i++) {
|
|
49
|
+
const matches = await matchSequence(patterns, mutableTokens)
|
|
50
|
+
if (isntFalsy(matches)) {
|
|
51
|
+
results.push(matches)
|
|
52
|
+
mutableTokens.splice(0, matches.length)
|
|
53
|
+
} else {
|
|
54
|
+
break
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return results.flat() as Flatten<Array<MapSequenceToMatches<Sequence, Token, Node>>>
|
|
59
|
+
}
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import {
|
|
2
|
+
NonEmptyArray
|
|
3
|
+
, Falsy
|
|
4
|
+
, isntFalsy
|
|
5
|
+
, isString
|
|
6
|
+
, isFunction
|
|
7
|
+
, toArray
|
|
8
|
+
} from '@blackglory/prelude'
|
|
9
|
+
import { findAllIndexes } from 'iterable-operator'
|
|
10
|
+
import {
|
|
11
|
+
IToken
|
|
12
|
+
, INode
|
|
13
|
+
, INodePattern
|
|
14
|
+
, INodePatternMatch
|
|
15
|
+
, MapSequenceToPatterns
|
|
16
|
+
, MapSequenceToMatches
|
|
17
|
+
} from './types'
|
|
18
|
+
import { consumeToken } from './consume-token'
|
|
19
|
+
import { consumeNode } from './consume-node'
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* 模式将被拆分为以下子模式来处理:
|
|
23
|
+
* - `[TokenType, ...TokenType[]]`
|
|
24
|
+
* - `[NodePattern, ...NodePattern[]]`:
|
|
25
|
+
* 根据NodePattern的定义, 这种子模式有可能陷入死循环.
|
|
26
|
+
* - `[NodePattern, TokenType]`:
|
|
27
|
+
* 这种子模式适用于二元或三元运算符这样的规则.
|
|
28
|
+
* 在引擎盖下, 它首先匹配TokenType以防止NodePattern在匹配时陷入死循环.
|
|
29
|
+
*/
|
|
30
|
+
export async function matchSequence<
|
|
31
|
+
Sequence extends ReadonlyArray<Token | Node>
|
|
32
|
+
, Token extends IToken = IToken
|
|
33
|
+
, Node extends INode = INode
|
|
34
|
+
>(
|
|
35
|
+
patterns: MapSequenceToPatterns<Sequence, Token, Node>
|
|
36
|
+
, tokens: ReadonlyArray<Token>
|
|
37
|
+
): Promise<MapSequenceToMatches<Sequence, Token, Node> | Falsy> {
|
|
38
|
+
if (isTokenTypes(patterns)) {
|
|
39
|
+
const matches: Array<Token> = []
|
|
40
|
+
|
|
41
|
+
const mutableTokens = toArray(tokens)
|
|
42
|
+
for (const pattern of patterns) {
|
|
43
|
+
const match = consumeToken(pattern, mutableTokens)
|
|
44
|
+
if (isntFalsy(match)) {
|
|
45
|
+
matches.push(match)
|
|
46
|
+
} else {
|
|
47
|
+
return
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
return matches as MapSequenceToMatches<Sequence, Token, Node>
|
|
52
|
+
} else if (isNodePatterns<Token, Node>(patterns)) {
|
|
53
|
+
const matches: Array<INodePatternMatch<Node>> = []
|
|
54
|
+
|
|
55
|
+
const mutableTokens = toArray(tokens)
|
|
56
|
+
for (const pattern of patterns) {
|
|
57
|
+
const match = await consumeNode(pattern, mutableTokens)
|
|
58
|
+
if (isntFalsy(match)) {
|
|
59
|
+
matches.push(match)
|
|
60
|
+
} else {
|
|
61
|
+
return
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return matches as MapSequenceToMatches<Sequence, Token, Node>
|
|
66
|
+
} else if (isNodePatternNodeType<Token, Node>(patterns)) {
|
|
67
|
+
const [nodePattern, tokenType] = patterns
|
|
68
|
+
|
|
69
|
+
for (
|
|
70
|
+
const indexOfToken of findAllIndexes(tokens, x => x.tokenType === tokenType)
|
|
71
|
+
) {
|
|
72
|
+
const leftTokens = tokens.slice(0, indexOfToken)
|
|
73
|
+
const leftMatch = await nodePattern(leftTokens)
|
|
74
|
+
if (
|
|
75
|
+
isntFalsy(leftMatch) &&
|
|
76
|
+
leftMatch.consumed === indexOfToken
|
|
77
|
+
) {
|
|
78
|
+
const matches: [INodePatternMatch<Node>, Token] = [
|
|
79
|
+
leftMatch
|
|
80
|
+
, tokens[indexOfToken]
|
|
81
|
+
]
|
|
82
|
+
return matches as MapSequenceToMatches<Sequence>
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
} else {
|
|
86
|
+
const matches: Array<INodePatternMatch<Node> | Token> = []
|
|
87
|
+
const remainingTokens = toArray(tokens)
|
|
88
|
+
for (const subPatterns of splitPatterns(patterns)) {
|
|
89
|
+
const subMatches = await matchSequence(
|
|
90
|
+
subPatterns as MapSequenceToPatterns<Sequence, Token, Node>
|
|
91
|
+
, remainingTokens
|
|
92
|
+
)
|
|
93
|
+
if (isntFalsy(subMatches)) {
|
|
94
|
+
const consumed = subMatches
|
|
95
|
+
.map((match: Token | INodePatternMatch<Node>) => {
|
|
96
|
+
return 'consumed' in match
|
|
97
|
+
? match.consumed
|
|
98
|
+
: 1
|
|
99
|
+
})
|
|
100
|
+
.reduce((acc, cur) => acc + cur, 0)
|
|
101
|
+
remainingTokens.splice(0, consumed)
|
|
102
|
+
matches.push(...subMatches)
|
|
103
|
+
} else {
|
|
104
|
+
return
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
return matches as MapSequenceToMatches<Sequence>
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
type SubPatterns<Token extends IToken = IToken, Node extends INode = INode> =
|
|
112
|
+
| [INodePattern<Token, Node>, string]
|
|
113
|
+
| NonEmptyArray<string>
|
|
114
|
+
| NonEmptyArray<INodePattern<Token, Node>>
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* 该函数会匹配尽可能长的subPatterns.
|
|
118
|
+
*/
|
|
119
|
+
export function* splitPatterns<
|
|
120
|
+
Sequence extends ReadonlyArray<Token | Node>
|
|
121
|
+
, Token extends IToken = IToken
|
|
122
|
+
, Node extends INode = INode
|
|
123
|
+
>(
|
|
124
|
+
patterns: MapSequenceToPatterns<Sequence, Token, Node>
|
|
125
|
+
): IterableIterator<SubPatterns<Token, Node>> {
|
|
126
|
+
const mutablePatterns: Array<INodePattern<Token, Node> | string> = toArray(patterns)
|
|
127
|
+
|
|
128
|
+
while (mutablePatterns.length > 0) {
|
|
129
|
+
if (isTokenType(mutablePatterns[0])) {
|
|
130
|
+
const indexOfNodePattern = mutablePatterns.findIndex(x => isNodePattern(x))
|
|
131
|
+
if (indexOfNodePattern === -1) {
|
|
132
|
+
yield mutablePatterns.splice(0) as NonEmptyArray<string>
|
|
133
|
+
} else {
|
|
134
|
+
yield mutablePatterns.splice(0, indexOfNodePattern) as NonEmptyArray<string>
|
|
135
|
+
}
|
|
136
|
+
} else if (isNodePattern(mutablePatterns[0])) {
|
|
137
|
+
const indexOfToken = mutablePatterns.findIndex(x => isTokenType(x))
|
|
138
|
+
if (indexOfToken === -1) {
|
|
139
|
+
yield mutablePatterns.splice(0) as NonEmptyArray<
|
|
140
|
+
INodePattern<Token, Node>
|
|
141
|
+
>
|
|
142
|
+
} else {
|
|
143
|
+
yield mutablePatterns.splice(0, indexOfToken + 1) as [
|
|
144
|
+
INodePattern<Token, Node>
|
|
145
|
+
, string
|
|
146
|
+
]
|
|
147
|
+
}
|
|
148
|
+
} else {
|
|
149
|
+
throw new Error('Unknown patterns')
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function isTokenTypes(arr: ReadonlyArray<unknown>): arr is ReadonlyArray<string> {
|
|
155
|
+
return arr.every(isTokenType)
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function isTokenType(val: unknown): val is string {
|
|
159
|
+
return isString(val)
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function isNodePatterns<Token extends IToken = IToken, Node extends INode = INode>(
|
|
163
|
+
arr: ReadonlyArray<unknown>
|
|
164
|
+
): arr is ReadonlyArray<INodePattern<Token, Node>> {
|
|
165
|
+
return arr.every(isNodePattern)
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function isNodePattern<Token extends IToken = IToken, Node extends INode = INode>(
|
|
169
|
+
val: unknown
|
|
170
|
+
): val is INodePattern<Token, Node> {
|
|
171
|
+
return isFunction(val)
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function isNodePatternNodeType<Token extends IToken = IToken, Node extends INode = INode>(
|
|
175
|
+
arr: ReadonlyArray<unknown>
|
|
176
|
+
): arr is readonly [INodePattern<Token, Node>, string] {
|
|
177
|
+
return arr.length === 2
|
|
178
|
+
&& isNodePattern(arr[0])
|
|
179
|
+
&& isTokenType(arr[1])
|
|
180
|
+
}
|
package/src/parse.ts
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { isntFalsy } from '@blackglory/prelude'
|
|
2
|
+
import { IToken, INodePattern, INode } from './types'
|
|
3
|
+
|
|
4
|
+
export async function* parse<
|
|
5
|
+
Token extends IToken = IToken
|
|
6
|
+
, Node extends INode = INode
|
|
7
|
+
>(
|
|
8
|
+
patterns: Array<INodePattern<Token, Node>>
|
|
9
|
+
, tokens: Token[]
|
|
10
|
+
): AsyncIterableIterator<Node> {
|
|
11
|
+
let i = 0
|
|
12
|
+
loop: while (i < tokens.length) {
|
|
13
|
+
const remainingTokens = tokens.slice(i)
|
|
14
|
+
|
|
15
|
+
for (const pattern of patterns) {
|
|
16
|
+
const result = await pattern(remainingTokens)
|
|
17
|
+
if (isntFalsy(result)) {
|
|
18
|
+
yield result.node
|
|
19
|
+
i += result.consumed
|
|
20
|
+
continue loop
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
throw new Error(`Unparseable tokens: ${JSON.stringify(remainingTokens)}`)
|
|
25
|
+
}
|
|
26
|
+
}
|
package/src/tokenize.ts
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { isntFalsy } from '@blackglory/prelude'
|
|
2
|
+
import { ITokenPattern, IToken } from './types'
|
|
3
|
+
|
|
4
|
+
export async function* tokenize<Token extends IToken = IToken>(
|
|
5
|
+
patterns: Array<ITokenPattern<Token>>
|
|
6
|
+
, text: string
|
|
7
|
+
): AsyncIterableIterator<Token> {
|
|
8
|
+
let i = 0
|
|
9
|
+
loop: while (i < text.length) {
|
|
10
|
+
const remainingText = text.slice(i)
|
|
11
|
+
|
|
12
|
+
for (const pattern of patterns) {
|
|
13
|
+
const result = await pattern(remainingText)
|
|
14
|
+
if (isntFalsy(result)) {
|
|
15
|
+
yield result.token
|
|
16
|
+
i += result.consumed
|
|
17
|
+
continue loop
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
throw new Error(`Unknown text: ${JSON.stringify(remainingText)}`)
|
|
22
|
+
}
|
|
23
|
+
}
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { Falsy, Awaitable } from '@blackglory/prelude'
|
|
2
|
+
|
|
3
|
+
export interface IToken {
|
|
4
|
+
tokenType: string
|
|
5
|
+
value: string
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export interface INode {
|
|
9
|
+
nodeType: string
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface ITokenPatternMatch<Token extends IToken> {
|
|
13
|
+
consumed: number
|
|
14
|
+
token: Token
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface INodePatternMatch<Node extends INode> {
|
|
18
|
+
consumed: number
|
|
19
|
+
node: Node
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface ITokenPattern<Token extends IToken = IToken> {
|
|
23
|
+
(text: string): Awaitable<ITokenPatternMatch<Token> | Falsy>
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface INodePattern<
|
|
27
|
+
Token extends IToken = IToken
|
|
28
|
+
, Node extends INode = INode
|
|
29
|
+
> {
|
|
30
|
+
(tokens: ReadonlyArray<Token>): Awaitable<INodePatternMatch<Node> | Falsy>
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export type MapSequenceToPatterns<
|
|
34
|
+
Sequence extends ReadonlyArray<Token | Node>
|
|
35
|
+
, Token extends IToken = IToken
|
|
36
|
+
, Node extends INode = INode
|
|
37
|
+
> = {
|
|
38
|
+
[Index in keyof Sequence]:
|
|
39
|
+
[Sequence[Index]] extends [infer Element]
|
|
40
|
+
? (
|
|
41
|
+
Element extends Token
|
|
42
|
+
? string
|
|
43
|
+
: Element extends Node
|
|
44
|
+
? INodePattern<Token, Element>
|
|
45
|
+
: never
|
|
46
|
+
)
|
|
47
|
+
: never
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export type MapSequenceToMatches<
|
|
51
|
+
Sequence extends ReadonlyArray<Token | Node>
|
|
52
|
+
, Token extends IToken = IToken
|
|
53
|
+
, Node extends INode = INode
|
|
54
|
+
> = {
|
|
55
|
+
[Index in keyof Sequence]:
|
|
56
|
+
[Sequence[Index]] extends [infer Element]
|
|
57
|
+
? (
|
|
58
|
+
Element extends IToken
|
|
59
|
+
? Token
|
|
60
|
+
: Element extends INode
|
|
61
|
+
? INodePatternMatch<Element>
|
|
62
|
+
: never
|
|
63
|
+
)
|
|
64
|
+
: never
|
|
65
|
+
}
|