cddl 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lexer.ts ADDED
@@ -0,0 +1,238 @@
1
+ import { Token, Tokens } from './tokens.js';
2
+ import { isLetter, isAlphabeticCharacter, isDigit, hasSpecialNumberCharacter } from './utils.js'
3
+ import { WHITESPACE_CHARACTERS } from './constants.js';
4
+
5
+ export default class Lexer {
6
+ input: string
7
+ position: number = 0
8
+ readPosition: number = 0
9
+ ch: number = 0
10
+
11
+ constructor (source: string) {
12
+ this.input = source
13
+
14
+ this.readChar()
15
+ }
16
+
17
+ private readChar (): void {
18
+ if (this.readPosition >= this.input.length) {
19
+ this.ch = 0
20
+ } else {
21
+ this.ch = this.input[this.readPosition].charCodeAt(0)
22
+ }
23
+ this.position = this.readPosition
24
+ this.readPosition++
25
+ }
26
+
27
+ getLocation () {
28
+ const position = this.position - 2
29
+ const sourceLines = this.input.split('\n')
30
+ const sourceLineLength = sourceLines.map((l) => l.length)
31
+ let i = 0
32
+
33
+ for (const [line, lineLength] of Object.entries(sourceLineLength)) {
34
+ i += lineLength + 1
35
+ if (i > position) {
36
+ const lineBegin = i - lineLength
37
+ return {
38
+ line: parseInt(line, 10),
39
+ position: position - lineBegin + 1
40
+ }
41
+ }
42
+ }
43
+
44
+ return { line: 0, position: 0 }
45
+ }
46
+
47
+ getLine (lineNumber: number) {
48
+ return this.input.split('\n')[lineNumber]
49
+ }
50
+
51
+ getLocationInfo () {
52
+ const loc = this.getLocation()
53
+ const line = loc ? this.getLine(loc.line) : ''
54
+ let locationInfo = line + '\n'
55
+ locationInfo += ' '.repeat(loc?.position || 0) + '^\n'
56
+ locationInfo += ' '.repeat(loc?.position || 0) + '|\n'
57
+ return locationInfo
58
+ }
59
+
60
+ nextToken (): Token {
61
+ let token: Token
62
+ this.skipWhitespace()
63
+
64
+ const Literal = String.fromCharCode(this.ch)
65
+ switch (this.ch) {
66
+ case '='.charCodeAt(0):
67
+ token = { Type: Tokens.ASSIGN, Literal }
68
+ break
69
+ case '('.charCodeAt(0):
70
+ token = { Type: Tokens.LPAREN, Literal }
71
+ break
72
+ case ')'.charCodeAt(0):
73
+ token = { Type: Tokens.RPAREN, Literal }
74
+ break
75
+ case '{'.charCodeAt(0):
76
+ token = { Type: Tokens.LBRACE, Literal }
77
+ break
78
+ case '}'.charCodeAt(0):
79
+ token = { Type: Tokens.RBRACE, Literal }
80
+ break
81
+ case '['.charCodeAt(0):
82
+ token = { Type: Tokens.LBRACK, Literal }
83
+ break
84
+ case ']'.charCodeAt(0):
85
+ token = { Type: Tokens.RBRACK, Literal }
86
+ break
87
+ case '<'.charCodeAt(0):
88
+ token = { Type: Tokens.LT, Literal }
89
+ break
90
+ case '>'.charCodeAt(0):
91
+ token = { Type: Tokens.GT, Literal }
92
+ break
93
+ case '+'.charCodeAt(0):
94
+ token = { Type: Tokens.PLUS, Literal }
95
+ break
96
+ case ','.charCodeAt(0):
97
+ token = { Type: Tokens.COMMA, Literal }
98
+ break
99
+ case '.'.charCodeAt(0):
100
+ token = { Type: Tokens.DOT, Literal }
101
+ break
102
+ case ':'.charCodeAt(0):
103
+ token = { Type: Tokens.COLON, Literal }
104
+ break
105
+ case '?'.charCodeAt(0):
106
+ token = { Type: Tokens.QUEST, Literal }
107
+ break
108
+ case '/'.charCodeAt(0):
109
+ token = { Type: Tokens.SLASH, Literal }
110
+ break
111
+ case '*'.charCodeAt(0):
112
+ token = { Type: Tokens.ASTERISK, Literal }
113
+ break
114
+ case '^'.charCodeAt(0):
115
+ token = { Type: Tokens.CARET, Literal }
116
+ break
117
+ case '#'.charCodeAt(0):
118
+ token = { Type: Tokens.HASH, Literal }
119
+ break
120
+ case '~'.charCodeAt(0):
121
+ token = { Type: Tokens.TILDE, Literal }
122
+ break
123
+ case '"'.charCodeAt(0):
124
+ token = { Type: Tokens.STRING, Literal: this.readString() }
125
+ break
126
+ case ';'.charCodeAt(0):
127
+ token = { Type: Tokens.COMMENT, Literal: this.readComment() }
128
+ break
129
+ case 0:
130
+ token = { Type: Tokens.EOF, Literal: '' }
131
+ break
132
+ default: {
133
+ if (isAlphabeticCharacter(Literal)) {
134
+ return { Type: Tokens.IDENT, Literal: this.readIdentifier() }
135
+ } else if (
136
+ // positive number
137
+ isDigit(Literal) ||
138
+ // negative number
139
+ (this.ch === Tokens.MINUS.charCodeAt(0) && isDigit(this.input[this.readPosition]))
140
+ ) {
141
+ const numberOrFloat = this.readNumberOrFloat()
142
+ return {
143
+ Type: numberOrFloat.includes(Tokens.DOT) ? Tokens.FLOAT : Tokens.NUMBER,
144
+ Literal: numberOrFloat
145
+ }
146
+ }
147
+ token = { Type: Tokens.ILLEGAL, Literal: '' }
148
+ }
149
+ }
150
+
151
+ this.readChar()
152
+ return token
153
+ }
154
+
155
+ private readIdentifier (): string {
156
+ const position = this.position
157
+
158
+ /**
159
+ * an identifier can contain
160
+ * see https://tools.ietf.org/html/draft-ietf-cbor-cddl-08#section-3.1
161
+ */
162
+ while (
163
+ // a letter (a-z, A-Z)
164
+ isLetter(String.fromCharCode(this.ch)) ||
165
+ // a digit (0-9)
166
+ isDigit(String.fromCharCode(this.ch)) ||
167
+ // and special characters (-, _, @, ., $)
168
+ [
169
+ Tokens.MINUS.charCodeAt(0),
170
+ Tokens.UNDERSCORE.charCodeAt(0),
171
+ Tokens.ATSIGN.charCodeAt(0),
172
+ Tokens.DOT.charCodeAt(0),
173
+ Tokens.DOLLAR.charCodeAt(0)
174
+ ].includes(this.ch)
175
+ ) {
176
+ this.readChar()
177
+ }
178
+
179
+ return this.input.slice(position, this.position)
180
+ }
181
+
182
+ private readComment (): string {
183
+ const position = this.position
184
+
185
+ while (this.ch && String.fromCharCode(this.ch) !== '\n') {
186
+ this.readChar()
187
+ }
188
+
189
+ return this.input.slice(position, this.position).trim()
190
+ }
191
+
192
+ private readString (): string {
193
+ const position = this.position
194
+
195
+ this.readChar() // eat "
196
+ while (this.ch && String.fromCharCode(this.ch) !== Tokens.QUOT) {
197
+ this.readChar() // eat any character until "
198
+ }
199
+
200
+ return this.input.slice(position + 1, this.position).trim()
201
+ }
202
+
203
+ private readNumberOrFloat (): string {
204
+ const position = this.position
205
+ let foundSpecialCharacter = false
206
+
207
+ /**
208
+ * a number of float can contain
209
+ */
210
+ while (
211
+ // a number
212
+ isDigit(String.fromCharCode(this.ch)) ||
213
+ // a special character, e.g. ".", "x" and "b"
214
+ hasSpecialNumberCharacter(this.ch)
215
+ ) {
216
+ /**
217
+ * ensure we respect ranges, e.g. 0..10
218
+ * so break after the second dot and adjust read position
219
+ */
220
+ if (hasSpecialNumberCharacter(this.ch) && foundSpecialCharacter) {
221
+ this.position--
222
+ this.readPosition--
223
+ break
224
+ }
225
+
226
+ foundSpecialCharacter = hasSpecialNumberCharacter(this.ch)
227
+ this.readChar() // eat any character until a non digit or a 2nd dot
228
+ }
229
+
230
+ return this.input.slice(position, this.position).trim()
231
+ }
232
+
233
+ private skipWhitespace () {
234
+ while (WHITESPACE_CHARACTERS.includes(String.fromCharCode(this.ch))) {
235
+ this.readChar()
236
+ }
237
+ }
238
+ }