dms-parser 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE-APACHE +15 -0
- data/LICENSE-MIT +21 -0
- data/README.md +166 -0
- data/bin/dms-encoder +234 -0
- data/lib/dms/emitter.rb +674 -0
- data/lib/dms/parser.rb +3007 -0
- data/lib/dms/tier1.rb +1750 -0
- data/lib/dms/types.rb +129 -0
- data/lib/dms.rb +161 -0
- metadata +56 -0
data/lib/dms/parser.rb
ADDED
|
@@ -0,0 +1,3007 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# DMS parser - port of the Python/Rust reference, optimized for Ruby.
|
|
4
|
+
#
|
|
5
|
+
# Hand-written recursive-descent + stateful lexer. Errors carry
|
|
6
|
+
# (line, column, message). Tables are insertion-ordered Hashes.
|
|
7
|
+
#
|
|
8
|
+
# Hot-path strategy: positions are *byte* offsets into a UTF-8 source.
|
|
9
|
+
# Inline scanners use String#getbyte (returns Integer or nil) which is
|
|
10
|
+
# allocation-free, vs. String#[] which copies a 1-char String per call.
|
|
11
|
+
# DMS structural characters (':', '+', '-', '\n', digits, ASCII keys)
|
|
12
|
+
# are all ASCII, so byte-level checks are exact for the common cases.
|
|
13
|
+
# Multi-byte content only matters in string bodies (handled via
|
|
14
|
+
# byteslice + force_encoding) and for non-ASCII bare-key chars (label-
|
|
15
|
+
# class check has a UTF-8 multi-byte fallback).
|
|
16
|
+
|
|
17
|
+
module Dms
|
|
18
|
+
class Parser
|
|
19
|
+
# ---------- Byte constants (faster than ?x.ord at call site) ----------
|
|
20
|
+
SP = 0x20
|
|
21
|
+
TAB = 0x09
|
|
22
|
+
LF = 0x0A
|
|
23
|
+
CR = 0x0D
|
|
24
|
+
HASH = 0x23 # '#'
|
|
25
|
+
SLASH = 0x2F # '/'
|
|
26
|
+
STAR = 0x2A # '*'
|
|
27
|
+
BANG = 0x21 # '!' (unused)
|
|
28
|
+
PLUS = 0x2B # '+'
|
|
29
|
+
MINUS = 0x2D # '-'
|
|
30
|
+
COLON = 0x3A # ':'
|
|
31
|
+
COMMA = 0x2C # ','
|
|
32
|
+
LBRACK = 0x5B # '['
|
|
33
|
+
RBRACK = 0x5D # ']'
|
|
34
|
+
LBRACE = 0x7B # '{'
|
|
35
|
+
RBRACE = 0x7D # '}'
|
|
36
|
+
DOT = 0x2E # '.'
|
|
37
|
+
UNDERSCORE = 0x5F # '_'
|
|
38
|
+
BACKSLASH = 0x5C # '\\'
|
|
39
|
+
DQUOTE = 0x22 # '"'
|
|
40
|
+
SQUOTE = 0x27 # "'"
|
|
41
|
+
DIGIT0 = 0x30
|
|
42
|
+
DIGIT9 = 0x39
|
|
43
|
+
LOWER_A = 0x61
|
|
44
|
+
LOWER_F = 0x66
|
|
45
|
+
LOWER_Z = 0x7A
|
|
46
|
+
UPPER_A = 0x41
|
|
47
|
+
UPPER_F = 0x46
|
|
48
|
+
UPPER_Z = 0x5A
|
|
49
|
+
LOWER_X = 0x78
|
|
50
|
+
LOWER_O = 0x6F
|
|
51
|
+
LOWER_B = 0x62
|
|
52
|
+
LOWER_E = 0x65
|
|
53
|
+
UPPER_E = 0x45
|
|
54
|
+
LOWER_P = 0x70
|
|
55
|
+
LOWER_T = 0x74
|
|
56
|
+
LOWER_N = 0x6E
|
|
57
|
+
LOWER_U = 0x75
|
|
58
|
+
UPPER_U = 0x55
|
|
59
|
+
UPPER_T = 0x54
|
|
60
|
+
UPPER_Z_LETTER = 0x5A
|
|
61
|
+
LOWER_Z_LETTER = 0x7A
|
|
62
|
+
LOWER_R = 0x72
|
|
63
|
+
LOWER_F_LETTER = 0x66
|
|
64
|
+
|
|
65
|
+
HEX_DIGITS = "0123456789abcdefABCDEF"
|
|
66
|
+
|
|
67
|
+
BUILT_IN_HEREDOC_MODIFIERS = %w[_trim _fold_paragraphs].freeze
|
|
68
|
+
|
|
69
|
+
# Bare-key fast lookup: 256-entry table, value true iff that ASCII byte
|
|
70
|
+
# is a bare-key char (alnum, '_', '-').
|
|
71
|
+
BARE_KEY_BYTE = Array.new(256, false)
|
|
72
|
+
(DIGIT0..DIGIT9).each { |b| BARE_KEY_BYTE[b] = true }
|
|
73
|
+
(LOWER_A..LOWER_Z).each { |b| BARE_KEY_BYTE[b] = true }
|
|
74
|
+
(UPPER_A..UPPER_Z).each { |b| BARE_KEY_BYTE[b] = true }
|
|
75
|
+
BARE_KEY_BYTE[UNDERSCORE] = true
|
|
76
|
+
BARE_KEY_BYTE[MINUS] = true
|
|
77
|
+
BARE_KEY_BYTE.freeze
|
|
78
|
+
|
|
79
|
+
# Reserved decorator sigils (SPEC tier-0): a body line whose first
|
|
80
|
+
# non-whitespace byte is one of these is a parse error. The check is
|
|
81
|
+
# line-start only; these chars are unrestricted inside string bodies,
|
|
82
|
+
# comments, and heredoc bodies. Underscore is intentionally NOT in
|
|
83
|
+
# this set (it remains a valid identifier-leading byte).
|
|
84
|
+
RESERVED_SIGIL_BYTE = Array.new(256, false)
|
|
85
|
+
[0x21, 0x40, 0x24, 0x25, 0x5E, 0x26, 0x2A, 0x7C, 0x7E, 0x60,
|
|
86
|
+
0x2E, 0x2C, 0x3E, 0x3C, 0x3F, 0x3B, 0x3D].each do |b|
|
|
87
|
+
RESERVED_SIGIL_BYTE[b] = true
|
|
88
|
+
end
|
|
89
|
+
RESERVED_SIGIL_BYTE.freeze
|
|
90
|
+
|
|
91
|
+
# XID_Continue snapshot, frozen at Unicode 15.1 per UAX #31 §2 default
|
|
92
|
+
# identifier syntax (XID_Continue \ Default_Ignorable_Code_Point). Embedded
|
|
93
|
+
# so accept/reject is identical across Ruby/Onigmo Unicode-data versions.
|
|
94
|
+
# 773 sorted, non-overlapping ranges; binary-searched by #xid_continue?.
|
|
95
|
+
XID_CONTINUE_RANGES = [
|
|
96
|
+
[0x00AA, 0x00AA],
|
|
97
|
+
[0x00B5, 0x00B5],
|
|
98
|
+
[0x00B7, 0x00B7],
|
|
99
|
+
[0x00BA, 0x00BA],
|
|
100
|
+
[0x00C0, 0x00D6],
|
|
101
|
+
[0x00D8, 0x00F6],
|
|
102
|
+
[0x00F8, 0x02C1],
|
|
103
|
+
[0x02C6, 0x02D1],
|
|
104
|
+
[0x02E0, 0x02E4],
|
|
105
|
+
[0x02EC, 0x02EC],
|
|
106
|
+
[0x02EE, 0x02EE],
|
|
107
|
+
[0x0300, 0x034E],
|
|
108
|
+
[0x0350, 0x0374],
|
|
109
|
+
[0x0376, 0x0377],
|
|
110
|
+
[0x037B, 0x037D],
|
|
111
|
+
[0x037F, 0x037F],
|
|
112
|
+
[0x0386, 0x038A],
|
|
113
|
+
[0x038C, 0x038C],
|
|
114
|
+
[0x038E, 0x03A1],
|
|
115
|
+
[0x03A3, 0x03F5],
|
|
116
|
+
[0x03F7, 0x0481],
|
|
117
|
+
[0x0483, 0x0487],
|
|
118
|
+
[0x048A, 0x052F],
|
|
119
|
+
[0x0531, 0x0556],
|
|
120
|
+
[0x0559, 0x0559],
|
|
121
|
+
[0x0560, 0x0588],
|
|
122
|
+
[0x0591, 0x05BD],
|
|
123
|
+
[0x05BF, 0x05BF],
|
|
124
|
+
[0x05C1, 0x05C2],
|
|
125
|
+
[0x05C4, 0x05C5],
|
|
126
|
+
[0x05C7, 0x05C7],
|
|
127
|
+
[0x05D0, 0x05EA],
|
|
128
|
+
[0x05EF, 0x05F2],
|
|
129
|
+
[0x0610, 0x061A],
|
|
130
|
+
[0x0620, 0x0669],
|
|
131
|
+
[0x066E, 0x06D3],
|
|
132
|
+
[0x06D5, 0x06DC],
|
|
133
|
+
[0x06DF, 0x06E8],
|
|
134
|
+
[0x06EA, 0x06FC],
|
|
135
|
+
[0x06FF, 0x06FF],
|
|
136
|
+
[0x0710, 0x074A],
|
|
137
|
+
[0x074D, 0x07B1],
|
|
138
|
+
[0x07C0, 0x07F5],
|
|
139
|
+
[0x07FA, 0x07FA],
|
|
140
|
+
[0x07FD, 0x07FD],
|
|
141
|
+
[0x0800, 0x082D],
|
|
142
|
+
[0x0840, 0x085B],
|
|
143
|
+
[0x0860, 0x086A],
|
|
144
|
+
[0x0870, 0x0887],
|
|
145
|
+
[0x0889, 0x088E],
|
|
146
|
+
[0x0898, 0x08E1],
|
|
147
|
+
[0x08E3, 0x0963],
|
|
148
|
+
[0x0966, 0x096F],
|
|
149
|
+
[0x0971, 0x0983],
|
|
150
|
+
[0x0985, 0x098C],
|
|
151
|
+
[0x098F, 0x0990],
|
|
152
|
+
[0x0993, 0x09A8],
|
|
153
|
+
[0x09AA, 0x09B0],
|
|
154
|
+
[0x09B2, 0x09B2],
|
|
155
|
+
[0x09B6, 0x09B9],
|
|
156
|
+
[0x09BC, 0x09C4],
|
|
157
|
+
[0x09C7, 0x09C8],
|
|
158
|
+
[0x09CB, 0x09CE],
|
|
159
|
+
[0x09D7, 0x09D7],
|
|
160
|
+
[0x09DC, 0x09DD],
|
|
161
|
+
[0x09DF, 0x09E3],
|
|
162
|
+
[0x09E6, 0x09F1],
|
|
163
|
+
[0x09FC, 0x09FC],
|
|
164
|
+
[0x09FE, 0x09FE],
|
|
165
|
+
[0x0A01, 0x0A03],
|
|
166
|
+
[0x0A05, 0x0A0A],
|
|
167
|
+
[0x0A0F, 0x0A10],
|
|
168
|
+
[0x0A13, 0x0A28],
|
|
169
|
+
[0x0A2A, 0x0A30],
|
|
170
|
+
[0x0A32, 0x0A33],
|
|
171
|
+
[0x0A35, 0x0A36],
|
|
172
|
+
[0x0A38, 0x0A39],
|
|
173
|
+
[0x0A3C, 0x0A3C],
|
|
174
|
+
[0x0A3E, 0x0A42],
|
|
175
|
+
[0x0A47, 0x0A48],
|
|
176
|
+
[0x0A4B, 0x0A4D],
|
|
177
|
+
[0x0A51, 0x0A51],
|
|
178
|
+
[0x0A59, 0x0A5C],
|
|
179
|
+
[0x0A5E, 0x0A5E],
|
|
180
|
+
[0x0A66, 0x0A75],
|
|
181
|
+
[0x0A81, 0x0A83],
|
|
182
|
+
[0x0A85, 0x0A8D],
|
|
183
|
+
[0x0A8F, 0x0A91],
|
|
184
|
+
[0x0A93, 0x0AA8],
|
|
185
|
+
[0x0AAA, 0x0AB0],
|
|
186
|
+
[0x0AB2, 0x0AB3],
|
|
187
|
+
[0x0AB5, 0x0AB9],
|
|
188
|
+
[0x0ABC, 0x0AC5],
|
|
189
|
+
[0x0AC7, 0x0AC9],
|
|
190
|
+
[0x0ACB, 0x0ACD],
|
|
191
|
+
[0x0AD0, 0x0AD0],
|
|
192
|
+
[0x0AE0, 0x0AE3],
|
|
193
|
+
[0x0AE6, 0x0AEF],
|
|
194
|
+
[0x0AF9, 0x0AFF],
|
|
195
|
+
[0x0B01, 0x0B03],
|
|
196
|
+
[0x0B05, 0x0B0C],
|
|
197
|
+
[0x0B0F, 0x0B10],
|
|
198
|
+
[0x0B13, 0x0B28],
|
|
199
|
+
[0x0B2A, 0x0B30],
|
|
200
|
+
[0x0B32, 0x0B33],
|
|
201
|
+
[0x0B35, 0x0B39],
|
|
202
|
+
[0x0B3C, 0x0B44],
|
|
203
|
+
[0x0B47, 0x0B48],
|
|
204
|
+
[0x0B4B, 0x0B4D],
|
|
205
|
+
[0x0B55, 0x0B57],
|
|
206
|
+
[0x0B5C, 0x0B5D],
|
|
207
|
+
[0x0B5F, 0x0B63],
|
|
208
|
+
[0x0B66, 0x0B6F],
|
|
209
|
+
[0x0B71, 0x0B71],
|
|
210
|
+
[0x0B82, 0x0B83],
|
|
211
|
+
[0x0B85, 0x0B8A],
|
|
212
|
+
[0x0B8E, 0x0B90],
|
|
213
|
+
[0x0B92, 0x0B95],
|
|
214
|
+
[0x0B99, 0x0B9A],
|
|
215
|
+
[0x0B9C, 0x0B9C],
|
|
216
|
+
[0x0B9E, 0x0B9F],
|
|
217
|
+
[0x0BA3, 0x0BA4],
|
|
218
|
+
[0x0BA8, 0x0BAA],
|
|
219
|
+
[0x0BAE, 0x0BB9],
|
|
220
|
+
[0x0BBE, 0x0BC2],
|
|
221
|
+
[0x0BC6, 0x0BC8],
|
|
222
|
+
[0x0BCA, 0x0BCD],
|
|
223
|
+
[0x0BD0, 0x0BD0],
|
|
224
|
+
[0x0BD7, 0x0BD7],
|
|
225
|
+
[0x0BE6, 0x0BEF],
|
|
226
|
+
[0x0C00, 0x0C0C],
|
|
227
|
+
[0x0C0E, 0x0C10],
|
|
228
|
+
[0x0C12, 0x0C28],
|
|
229
|
+
[0x0C2A, 0x0C39],
|
|
230
|
+
[0x0C3C, 0x0C44],
|
|
231
|
+
[0x0C46, 0x0C48],
|
|
232
|
+
[0x0C4A, 0x0C4D],
|
|
233
|
+
[0x0C55, 0x0C56],
|
|
234
|
+
[0x0C58, 0x0C5A],
|
|
235
|
+
[0x0C5D, 0x0C5D],
|
|
236
|
+
[0x0C60, 0x0C63],
|
|
237
|
+
[0x0C66, 0x0C6F],
|
|
238
|
+
[0x0C80, 0x0C83],
|
|
239
|
+
[0x0C85, 0x0C8C],
|
|
240
|
+
[0x0C8E, 0x0C90],
|
|
241
|
+
[0x0C92, 0x0CA8],
|
|
242
|
+
[0x0CAA, 0x0CB3],
|
|
243
|
+
[0x0CB5, 0x0CB9],
|
|
244
|
+
[0x0CBC, 0x0CC4],
|
|
245
|
+
[0x0CC6, 0x0CC8],
|
|
246
|
+
[0x0CCA, 0x0CCD],
|
|
247
|
+
[0x0CD5, 0x0CD6],
|
|
248
|
+
[0x0CDD, 0x0CDE],
|
|
249
|
+
[0x0CE0, 0x0CE3],
|
|
250
|
+
[0x0CE6, 0x0CEF],
|
|
251
|
+
[0x0CF1, 0x0CF3],
|
|
252
|
+
[0x0D00, 0x0D0C],
|
|
253
|
+
[0x0D0E, 0x0D10],
|
|
254
|
+
[0x0D12, 0x0D44],
|
|
255
|
+
[0x0D46, 0x0D48],
|
|
256
|
+
[0x0D4A, 0x0D4E],
|
|
257
|
+
[0x0D54, 0x0D57],
|
|
258
|
+
[0x0D5F, 0x0D63],
|
|
259
|
+
[0x0D66, 0x0D6F],
|
|
260
|
+
[0x0D7A, 0x0D7F],
|
|
261
|
+
[0x0D81, 0x0D83],
|
|
262
|
+
[0x0D85, 0x0D96],
|
|
263
|
+
[0x0D9A, 0x0DB1],
|
|
264
|
+
[0x0DB3, 0x0DBB],
|
|
265
|
+
[0x0DBD, 0x0DBD],
|
|
266
|
+
[0x0DC0, 0x0DC6],
|
|
267
|
+
[0x0DCA, 0x0DCA],
|
|
268
|
+
[0x0DCF, 0x0DD4],
|
|
269
|
+
[0x0DD6, 0x0DD6],
|
|
270
|
+
[0x0DD8, 0x0DDF],
|
|
271
|
+
[0x0DE6, 0x0DEF],
|
|
272
|
+
[0x0DF2, 0x0DF3],
|
|
273
|
+
[0x0E01, 0x0E3A],
|
|
274
|
+
[0x0E40, 0x0E4E],
|
|
275
|
+
[0x0E50, 0x0E59],
|
|
276
|
+
[0x0E81, 0x0E82],
|
|
277
|
+
[0x0E84, 0x0E84],
|
|
278
|
+
[0x0E86, 0x0E8A],
|
|
279
|
+
[0x0E8C, 0x0EA3],
|
|
280
|
+
[0x0EA5, 0x0EA5],
|
|
281
|
+
[0x0EA7, 0x0EBD],
|
|
282
|
+
[0x0EC0, 0x0EC4],
|
|
283
|
+
[0x0EC6, 0x0EC6],
|
|
284
|
+
[0x0EC8, 0x0ECE],
|
|
285
|
+
[0x0ED0, 0x0ED9],
|
|
286
|
+
[0x0EDC, 0x0EDF],
|
|
287
|
+
[0x0F00, 0x0F00],
|
|
288
|
+
[0x0F18, 0x0F19],
|
|
289
|
+
[0x0F20, 0x0F29],
|
|
290
|
+
[0x0F35, 0x0F35],
|
|
291
|
+
[0x0F37, 0x0F37],
|
|
292
|
+
[0x0F39, 0x0F39],
|
|
293
|
+
[0x0F3E, 0x0F47],
|
|
294
|
+
[0x0F49, 0x0F6C],
|
|
295
|
+
[0x0F71, 0x0F84],
|
|
296
|
+
[0x0F86, 0x0F97],
|
|
297
|
+
[0x0F99, 0x0FBC],
|
|
298
|
+
[0x0FC6, 0x0FC6],
|
|
299
|
+
[0x1000, 0x1049],
|
|
300
|
+
[0x1050, 0x109D],
|
|
301
|
+
[0x10A0, 0x10C5],
|
|
302
|
+
[0x10C7, 0x10C7],
|
|
303
|
+
[0x10CD, 0x10CD],
|
|
304
|
+
[0x10D0, 0x10FA],
|
|
305
|
+
[0x10FC, 0x115E],
|
|
306
|
+
[0x1161, 0x1248],
|
|
307
|
+
[0x124A, 0x124D],
|
|
308
|
+
[0x1250, 0x1256],
|
|
309
|
+
[0x1258, 0x1258],
|
|
310
|
+
[0x125A, 0x125D],
|
|
311
|
+
[0x1260, 0x1288],
|
|
312
|
+
[0x128A, 0x128D],
|
|
313
|
+
[0x1290, 0x12B0],
|
|
314
|
+
[0x12B2, 0x12B5],
|
|
315
|
+
[0x12B8, 0x12BE],
|
|
316
|
+
[0x12C0, 0x12C0],
|
|
317
|
+
[0x12C2, 0x12C5],
|
|
318
|
+
[0x12C8, 0x12D6],
|
|
319
|
+
[0x12D8, 0x1310],
|
|
320
|
+
[0x1312, 0x1315],
|
|
321
|
+
[0x1318, 0x135A],
|
|
322
|
+
[0x135D, 0x135F],
|
|
323
|
+
[0x1369, 0x1371],
|
|
324
|
+
[0x1380, 0x138F],
|
|
325
|
+
[0x13A0, 0x13F5],
|
|
326
|
+
[0x13F8, 0x13FD],
|
|
327
|
+
[0x1401, 0x166C],
|
|
328
|
+
[0x166F, 0x167F],
|
|
329
|
+
[0x1681, 0x169A],
|
|
330
|
+
[0x16A0, 0x16EA],
|
|
331
|
+
[0x16EE, 0x16F8],
|
|
332
|
+
[0x1700, 0x1715],
|
|
333
|
+
[0x171F, 0x1734],
|
|
334
|
+
[0x1740, 0x1753],
|
|
335
|
+
[0x1760, 0x176C],
|
|
336
|
+
[0x176E, 0x1770],
|
|
337
|
+
[0x1772, 0x1773],
|
|
338
|
+
[0x1780, 0x17B3],
|
|
339
|
+
[0x17B6, 0x17D3],
|
|
340
|
+
[0x17D7, 0x17D7],
|
|
341
|
+
[0x17DC, 0x17DD],
|
|
342
|
+
[0x17E0, 0x17E9],
|
|
343
|
+
[0x1810, 0x1819],
|
|
344
|
+
[0x1820, 0x1878],
|
|
345
|
+
[0x1880, 0x18AA],
|
|
346
|
+
[0x18B0, 0x18F5],
|
|
347
|
+
[0x1900, 0x191E],
|
|
348
|
+
[0x1920, 0x192B],
|
|
349
|
+
[0x1930, 0x193B],
|
|
350
|
+
[0x1946, 0x196D],
|
|
351
|
+
[0x1970, 0x1974],
|
|
352
|
+
[0x1980, 0x19AB],
|
|
353
|
+
[0x19B0, 0x19C9],
|
|
354
|
+
[0x19D0, 0x19DA],
|
|
355
|
+
[0x1A00, 0x1A1B],
|
|
356
|
+
[0x1A20, 0x1A5E],
|
|
357
|
+
[0x1A60, 0x1A7C],
|
|
358
|
+
[0x1A7F, 0x1A89],
|
|
359
|
+
[0x1A90, 0x1A99],
|
|
360
|
+
[0x1AA7, 0x1AA7],
|
|
361
|
+
[0x1AB0, 0x1ABD],
|
|
362
|
+
[0x1ABF, 0x1ACE],
|
|
363
|
+
[0x1B00, 0x1B4C],
|
|
364
|
+
[0x1B50, 0x1B59],
|
|
365
|
+
[0x1B6B, 0x1B73],
|
|
366
|
+
[0x1B80, 0x1BF3],
|
|
367
|
+
[0x1C00, 0x1C37],
|
|
368
|
+
[0x1C40, 0x1C49],
|
|
369
|
+
[0x1C4D, 0x1C7D],
|
|
370
|
+
[0x1C80, 0x1C88],
|
|
371
|
+
[0x1C90, 0x1CBA],
|
|
372
|
+
[0x1CBD, 0x1CBF],
|
|
373
|
+
[0x1CD0, 0x1CD2],
|
|
374
|
+
[0x1CD4, 0x1CFA],
|
|
375
|
+
[0x1D00, 0x1F15],
|
|
376
|
+
[0x1F18, 0x1F1D],
|
|
377
|
+
[0x1F20, 0x1F45],
|
|
378
|
+
[0x1F48, 0x1F4D],
|
|
379
|
+
[0x1F50, 0x1F57],
|
|
380
|
+
[0x1F59, 0x1F59],
|
|
381
|
+
[0x1F5B, 0x1F5B],
|
|
382
|
+
[0x1F5D, 0x1F5D],
|
|
383
|
+
[0x1F5F, 0x1F7D],
|
|
384
|
+
[0x1F80, 0x1FB4],
|
|
385
|
+
[0x1FB6, 0x1FBC],
|
|
386
|
+
[0x1FBE, 0x1FBE],
|
|
387
|
+
[0x1FC2, 0x1FC4],
|
|
388
|
+
[0x1FC6, 0x1FCC],
|
|
389
|
+
[0x1FD0, 0x1FD3],
|
|
390
|
+
[0x1FD6, 0x1FDB],
|
|
391
|
+
[0x1FE0, 0x1FEC],
|
|
392
|
+
[0x1FF2, 0x1FF4],
|
|
393
|
+
[0x1FF6, 0x1FFC],
|
|
394
|
+
[0x203F, 0x2040],
|
|
395
|
+
[0x2054, 0x2054],
|
|
396
|
+
[0x2071, 0x2071],
|
|
397
|
+
[0x207F, 0x207F],
|
|
398
|
+
[0x2090, 0x209C],
|
|
399
|
+
[0x20D0, 0x20DC],
|
|
400
|
+
[0x20E1, 0x20E1],
|
|
401
|
+
[0x20E5, 0x20F0],
|
|
402
|
+
[0x2102, 0x2102],
|
|
403
|
+
[0x2107, 0x2107],
|
|
404
|
+
[0x210A, 0x2113],
|
|
405
|
+
[0x2115, 0x2115],
|
|
406
|
+
[0x2118, 0x211D],
|
|
407
|
+
[0x2124, 0x2124],
|
|
408
|
+
[0x2126, 0x2126],
|
|
409
|
+
[0x2128, 0x2128],
|
|
410
|
+
[0x212A, 0x2139],
|
|
411
|
+
[0x213C, 0x213F],
|
|
412
|
+
[0x2145, 0x2149],
|
|
413
|
+
[0x214E, 0x214E],
|
|
414
|
+
[0x2160, 0x2188],
|
|
415
|
+
[0x2C00, 0x2CE4],
|
|
416
|
+
[0x2CEB, 0x2CF3],
|
|
417
|
+
[0x2D00, 0x2D25],
|
|
418
|
+
[0x2D27, 0x2D27],
|
|
419
|
+
[0x2D2D, 0x2D2D],
|
|
420
|
+
[0x2D30, 0x2D67],
|
|
421
|
+
[0x2D6F, 0x2D6F],
|
|
422
|
+
[0x2D7F, 0x2D96],
|
|
423
|
+
[0x2DA0, 0x2DA6],
|
|
424
|
+
[0x2DA8, 0x2DAE],
|
|
425
|
+
[0x2DB0, 0x2DB6],
|
|
426
|
+
[0x2DB8, 0x2DBE],
|
|
427
|
+
[0x2DC0, 0x2DC6],
|
|
428
|
+
[0x2DC8, 0x2DCE],
|
|
429
|
+
[0x2DD0, 0x2DD6],
|
|
430
|
+
[0x2DD8, 0x2DDE],
|
|
431
|
+
[0x2DE0, 0x2DFF],
|
|
432
|
+
[0x3005, 0x3007],
|
|
433
|
+
[0x3021, 0x302F],
|
|
434
|
+
[0x3031, 0x3035],
|
|
435
|
+
[0x3038, 0x303C],
|
|
436
|
+
[0x3041, 0x3096],
|
|
437
|
+
[0x3099, 0x309A],
|
|
438
|
+
[0x309D, 0x309F],
|
|
439
|
+
[0x30A1, 0x30FF],
|
|
440
|
+
[0x3105, 0x312F],
|
|
441
|
+
[0x3131, 0x3163],
|
|
442
|
+
[0x3165, 0x318E],
|
|
443
|
+
[0x31A0, 0x31BF],
|
|
444
|
+
[0x31F0, 0x31FF],
|
|
445
|
+
[0x3400, 0x4DBF],
|
|
446
|
+
[0x4E00, 0xA48C],
|
|
447
|
+
[0xA4D0, 0xA4FD],
|
|
448
|
+
[0xA500, 0xA60C],
|
|
449
|
+
[0xA610, 0xA62B],
|
|
450
|
+
[0xA640, 0xA66F],
|
|
451
|
+
[0xA674, 0xA67D],
|
|
452
|
+
[0xA67F, 0xA6F1],
|
|
453
|
+
[0xA717, 0xA71F],
|
|
454
|
+
[0xA722, 0xA788],
|
|
455
|
+
[0xA78B, 0xA7CA],
|
|
456
|
+
[0xA7D0, 0xA7D1],
|
|
457
|
+
[0xA7D3, 0xA7D3],
|
|
458
|
+
[0xA7D5, 0xA7D9],
|
|
459
|
+
[0xA7F2, 0xA827],
|
|
460
|
+
[0xA82C, 0xA82C],
|
|
461
|
+
[0xA840, 0xA873],
|
|
462
|
+
[0xA880, 0xA8C5],
|
|
463
|
+
[0xA8D0, 0xA8D9],
|
|
464
|
+
[0xA8E0, 0xA8F7],
|
|
465
|
+
[0xA8FB, 0xA8FB],
|
|
466
|
+
[0xA8FD, 0xA92D],
|
|
467
|
+
[0xA930, 0xA953],
|
|
468
|
+
[0xA960, 0xA97C],
|
|
469
|
+
[0xA980, 0xA9C0],
|
|
470
|
+
[0xA9CF, 0xA9D9],
|
|
471
|
+
[0xA9E0, 0xA9FE],
|
|
472
|
+
[0xAA00, 0xAA36],
|
|
473
|
+
[0xAA40, 0xAA4D],
|
|
474
|
+
[0xAA50, 0xAA59],
|
|
475
|
+
[0xAA60, 0xAA76],
|
|
476
|
+
[0xAA7A, 0xAAC2],
|
|
477
|
+
[0xAADB, 0xAADD],
|
|
478
|
+
[0xAAE0, 0xAAEF],
|
|
479
|
+
[0xAAF2, 0xAAF6],
|
|
480
|
+
[0xAB01, 0xAB06],
|
|
481
|
+
[0xAB09, 0xAB0E],
|
|
482
|
+
[0xAB11, 0xAB16],
|
|
483
|
+
[0xAB20, 0xAB26],
|
|
484
|
+
[0xAB28, 0xAB2E],
|
|
485
|
+
[0xAB30, 0xAB5A],
|
|
486
|
+
[0xAB5C, 0xAB69],
|
|
487
|
+
[0xAB70, 0xABEA],
|
|
488
|
+
[0xABEC, 0xABED],
|
|
489
|
+
[0xABF0, 0xABF9],
|
|
490
|
+
[0xAC00, 0xD7A3],
|
|
491
|
+
[0xD7B0, 0xD7C6],
|
|
492
|
+
[0xD7CB, 0xD7FB],
|
|
493
|
+
[0xF900, 0xFA6D],
|
|
494
|
+
[0xFA70, 0xFAD9],
|
|
495
|
+
[0xFB00, 0xFB06],
|
|
496
|
+
[0xFB13, 0xFB17],
|
|
497
|
+
[0xFB1D, 0xFB28],
|
|
498
|
+
[0xFB2A, 0xFB36],
|
|
499
|
+
[0xFB38, 0xFB3C],
|
|
500
|
+
[0xFB3E, 0xFB3E],
|
|
501
|
+
[0xFB40, 0xFB41],
|
|
502
|
+
[0xFB43, 0xFB44],
|
|
503
|
+
[0xFB46, 0xFBB1],
|
|
504
|
+
[0xFBD3, 0xFC5D],
|
|
505
|
+
[0xFC64, 0xFD3D],
|
|
506
|
+
[0xFD50, 0xFD8F],
|
|
507
|
+
[0xFD92, 0xFDC7],
|
|
508
|
+
[0xFDF0, 0xFDF9],
|
|
509
|
+
[0xFE20, 0xFE2F],
|
|
510
|
+
[0xFE33, 0xFE34],
|
|
511
|
+
[0xFE4D, 0xFE4F],
|
|
512
|
+
[0xFE71, 0xFE71],
|
|
513
|
+
[0xFE73, 0xFE73],
|
|
514
|
+
[0xFE77, 0xFE77],
|
|
515
|
+
[0xFE79, 0xFE79],
|
|
516
|
+
[0xFE7B, 0xFE7B],
|
|
517
|
+
[0xFE7D, 0xFE7D],
|
|
518
|
+
[0xFE7F, 0xFEFC],
|
|
519
|
+
[0xFF10, 0xFF19],
|
|
520
|
+
[0xFF21, 0xFF3A],
|
|
521
|
+
[0xFF3F, 0xFF3F],
|
|
522
|
+
[0xFF41, 0xFF5A],
|
|
523
|
+
[0xFF65, 0xFF9F],
|
|
524
|
+
[0xFFA1, 0xFFBE],
|
|
525
|
+
[0xFFC2, 0xFFC7],
|
|
526
|
+
[0xFFCA, 0xFFCF],
|
|
527
|
+
[0xFFD2, 0xFFD7],
|
|
528
|
+
[0xFFDA, 0xFFDC],
|
|
529
|
+
[0x10000, 0x1000B],
|
|
530
|
+
[0x1000D, 0x10026],
|
|
531
|
+
[0x10028, 0x1003A],
|
|
532
|
+
[0x1003C, 0x1003D],
|
|
533
|
+
[0x1003F, 0x1004D],
|
|
534
|
+
[0x10050, 0x1005D],
|
|
535
|
+
[0x10080, 0x100FA],
|
|
536
|
+
[0x10140, 0x10174],
|
|
537
|
+
[0x101FD, 0x101FD],
|
|
538
|
+
[0x10280, 0x1029C],
|
|
539
|
+
[0x102A0, 0x102D0],
|
|
540
|
+
[0x102E0, 0x102E0],
|
|
541
|
+
[0x10300, 0x1031F],
|
|
542
|
+
[0x1032D, 0x1034A],
|
|
543
|
+
[0x10350, 0x1037A],
|
|
544
|
+
[0x10380, 0x1039D],
|
|
545
|
+
[0x103A0, 0x103C3],
|
|
546
|
+
[0x103C8, 0x103CF],
|
|
547
|
+
[0x103D1, 0x103D5],
|
|
548
|
+
[0x10400, 0x1049D],
|
|
549
|
+
[0x104A0, 0x104A9],
|
|
550
|
+
[0x104B0, 0x104D3],
|
|
551
|
+
[0x104D8, 0x104FB],
|
|
552
|
+
[0x10500, 0x10527],
|
|
553
|
+
[0x10530, 0x10563],
|
|
554
|
+
[0x10570, 0x1057A],
|
|
555
|
+
[0x1057C, 0x1058A],
|
|
556
|
+
[0x1058C, 0x10592],
|
|
557
|
+
[0x10594, 0x10595],
|
|
558
|
+
[0x10597, 0x105A1],
|
|
559
|
+
[0x105A3, 0x105B1],
|
|
560
|
+
[0x105B3, 0x105B9],
|
|
561
|
+
[0x105BB, 0x105BC],
|
|
562
|
+
[0x10600, 0x10736],
|
|
563
|
+
[0x10740, 0x10755],
|
|
564
|
+
[0x10760, 0x10767],
|
|
565
|
+
[0x10780, 0x10785],
|
|
566
|
+
[0x10787, 0x107B0],
|
|
567
|
+
[0x107B2, 0x107BA],
|
|
568
|
+
[0x10800, 0x10805],
|
|
569
|
+
[0x10808, 0x10808],
|
|
570
|
+
[0x1080A, 0x10835],
|
|
571
|
+
[0x10837, 0x10838],
|
|
572
|
+
[0x1083C, 0x1083C],
|
|
573
|
+
[0x1083F, 0x10855],
|
|
574
|
+
[0x10860, 0x10876],
|
|
575
|
+
[0x10880, 0x1089E],
|
|
576
|
+
[0x108E0, 0x108F2],
|
|
577
|
+
[0x108F4, 0x108F5],
|
|
578
|
+
[0x10900, 0x10915],
|
|
579
|
+
[0x10920, 0x10939],
|
|
580
|
+
[0x10980, 0x109B7],
|
|
581
|
+
[0x109BE, 0x109BF],
|
|
582
|
+
[0x10A00, 0x10A03],
|
|
583
|
+
[0x10A05, 0x10A06],
|
|
584
|
+
[0x10A0C, 0x10A13],
|
|
585
|
+
[0x10A15, 0x10A17],
|
|
586
|
+
[0x10A19, 0x10A35],
|
|
587
|
+
[0x10A38, 0x10A3A],
|
|
588
|
+
[0x10A3F, 0x10A3F],
|
|
589
|
+
[0x10A60, 0x10A7C],
|
|
590
|
+
[0x10A80, 0x10A9C],
|
|
591
|
+
[0x10AC0, 0x10AC7],
|
|
592
|
+
[0x10AC9, 0x10AE6],
|
|
593
|
+
[0x10B00, 0x10B35],
|
|
594
|
+
[0x10B40, 0x10B55],
|
|
595
|
+
[0x10B60, 0x10B72],
|
|
596
|
+
[0x10B80, 0x10B91],
|
|
597
|
+
[0x10C00, 0x10C48],
|
|
598
|
+
[0x10C80, 0x10CB2],
|
|
599
|
+
[0x10CC0, 0x10CF2],
|
|
600
|
+
[0x10D00, 0x10D27],
|
|
601
|
+
[0x10D30, 0x10D39],
|
|
602
|
+
[0x10E80, 0x10EA9],
|
|
603
|
+
[0x10EAB, 0x10EAC],
|
|
604
|
+
[0x10EB0, 0x10EB1],
|
|
605
|
+
[0x10EFD, 0x10F1C],
|
|
606
|
+
[0x10F27, 0x10F27],
|
|
607
|
+
[0x10F30, 0x10F50],
|
|
608
|
+
[0x10F70, 0x10F85],
|
|
609
|
+
[0x10FB0, 0x10FC4],
|
|
610
|
+
[0x10FE0, 0x10FF6],
|
|
611
|
+
[0x11000, 0x11046],
|
|
612
|
+
[0x11066, 0x11075],
|
|
613
|
+
[0x1107F, 0x110BA],
|
|
614
|
+
[0x110C2, 0x110C2],
|
|
615
|
+
[0x110D0, 0x110E8],
|
|
616
|
+
[0x110F0, 0x110F9],
|
|
617
|
+
[0x11100, 0x11134],
|
|
618
|
+
[0x11136, 0x1113F],
|
|
619
|
+
[0x11144, 0x11147],
|
|
620
|
+
[0x11150, 0x11173],
|
|
621
|
+
[0x11176, 0x11176],
|
|
622
|
+
[0x11180, 0x111C4],
|
|
623
|
+
[0x111C9, 0x111CC],
|
|
624
|
+
[0x111CE, 0x111DA],
|
|
625
|
+
[0x111DC, 0x111DC],
|
|
626
|
+
[0x11200, 0x11211],
|
|
627
|
+
[0x11213, 0x11237],
|
|
628
|
+
[0x1123E, 0x11241],
|
|
629
|
+
[0x11280, 0x11286],
|
|
630
|
+
[0x11288, 0x11288],
|
|
631
|
+
[0x1128A, 0x1128D],
|
|
632
|
+
[0x1128F, 0x1129D],
|
|
633
|
+
[0x1129F, 0x112A8],
|
|
634
|
+
[0x112B0, 0x112EA],
|
|
635
|
+
[0x112F0, 0x112F9],
|
|
636
|
+
[0x11300, 0x11303],
|
|
637
|
+
[0x11305, 0x1130C],
|
|
638
|
+
[0x1130F, 0x11310],
|
|
639
|
+
[0x11313, 0x11328],
|
|
640
|
+
[0x1132A, 0x11330],
|
|
641
|
+
[0x11332, 0x11333],
|
|
642
|
+
[0x11335, 0x11339],
|
|
643
|
+
[0x1133B, 0x11344],
|
|
644
|
+
[0x11347, 0x11348],
|
|
645
|
+
[0x1134B, 0x1134D],
|
|
646
|
+
[0x11350, 0x11350],
|
|
647
|
+
[0x11357, 0x11357],
|
|
648
|
+
[0x1135D, 0x11363],
|
|
649
|
+
[0x11366, 0x1136C],
|
|
650
|
+
[0x11370, 0x11374],
|
|
651
|
+
[0x11400, 0x1144A],
|
|
652
|
+
[0x11450, 0x11459],
|
|
653
|
+
[0x1145E, 0x11461],
|
|
654
|
+
[0x11480, 0x114C5],
|
|
655
|
+
[0x114C7, 0x114C7],
|
|
656
|
+
[0x114D0, 0x114D9],
|
|
657
|
+
[0x11580, 0x115B5],
|
|
658
|
+
[0x115B8, 0x115C0],
|
|
659
|
+
[0x115D8, 0x115DD],
|
|
660
|
+
[0x11600, 0x11640],
|
|
661
|
+
[0x11644, 0x11644],
|
|
662
|
+
[0x11650, 0x11659],
|
|
663
|
+
[0x11680, 0x116B8],
|
|
664
|
+
[0x116C0, 0x116C9],
|
|
665
|
+
[0x11700, 0x1171A],
|
|
666
|
+
[0x1171D, 0x1172B],
|
|
667
|
+
[0x11730, 0x11739],
|
|
668
|
+
[0x11740, 0x11746],
|
|
669
|
+
[0x11800, 0x1183A],
|
|
670
|
+
[0x118A0, 0x118E9],
|
|
671
|
+
[0x118FF, 0x11906],
|
|
672
|
+
[0x11909, 0x11909],
|
|
673
|
+
[0x1190C, 0x11913],
|
|
674
|
+
[0x11915, 0x11916],
|
|
675
|
+
[0x11918, 0x11935],
|
|
676
|
+
[0x11937, 0x11938],
|
|
677
|
+
[0x1193B, 0x11943],
|
|
678
|
+
[0x11950, 0x11959],
|
|
679
|
+
[0x119A0, 0x119A7],
|
|
680
|
+
[0x119AA, 0x119D7],
|
|
681
|
+
[0x119DA, 0x119E1],
|
|
682
|
+
[0x119E3, 0x119E4],
|
|
683
|
+
[0x11A00, 0x11A3E],
|
|
684
|
+
[0x11A47, 0x11A47],
|
|
685
|
+
[0x11A50, 0x11A99],
|
|
686
|
+
[0x11A9D, 0x11A9D],
|
|
687
|
+
[0x11AB0, 0x11AF8],
|
|
688
|
+
[0x11C00, 0x11C08],
|
|
689
|
+
[0x11C0A, 0x11C36],
|
|
690
|
+
[0x11C38, 0x11C40],
|
|
691
|
+
[0x11C50, 0x11C59],
|
|
692
|
+
[0x11C72, 0x11C8F],
|
|
693
|
+
[0x11C92, 0x11CA7],
|
|
694
|
+
[0x11CA9, 0x11CB6],
|
|
695
|
+
[0x11D00, 0x11D06],
|
|
696
|
+
[0x11D08, 0x11D09],
|
|
697
|
+
[0x11D0B, 0x11D36],
|
|
698
|
+
[0x11D3A, 0x11D3A],
|
|
699
|
+
[0x11D3C, 0x11D3D],
|
|
700
|
+
[0x11D3F, 0x11D47],
|
|
701
|
+
[0x11D50, 0x11D59],
|
|
702
|
+
[0x11D60, 0x11D65],
|
|
703
|
+
[0x11D67, 0x11D68],
|
|
704
|
+
[0x11D6A, 0x11D8E],
|
|
705
|
+
[0x11D90, 0x11D91],
|
|
706
|
+
[0x11D93, 0x11D98],
|
|
707
|
+
[0x11DA0, 0x11DA9],
|
|
708
|
+
[0x11EE0, 0x11EF6],
|
|
709
|
+
[0x11F00, 0x11F10],
|
|
710
|
+
[0x11F12, 0x11F3A],
|
|
711
|
+
[0x11F3E, 0x11F42],
|
|
712
|
+
[0x11F50, 0x11F59],
|
|
713
|
+
[0x11FB0, 0x11FB0],
|
|
714
|
+
[0x12000, 0x12399],
|
|
715
|
+
[0x12400, 0x1246E],
|
|
716
|
+
[0x12480, 0x12543],
|
|
717
|
+
[0x12F90, 0x12FF0],
|
|
718
|
+
[0x13000, 0x1342F],
|
|
719
|
+
[0x13440, 0x13455],
|
|
720
|
+
[0x14400, 0x14646],
|
|
721
|
+
[0x16800, 0x16A38],
|
|
722
|
+
[0x16A40, 0x16A5E],
|
|
723
|
+
[0x16A60, 0x16A69],
|
|
724
|
+
[0x16A70, 0x16ABE],
|
|
725
|
+
[0x16AC0, 0x16AC9],
|
|
726
|
+
[0x16AD0, 0x16AED],
|
|
727
|
+
[0x16AF0, 0x16AF4],
|
|
728
|
+
[0x16B00, 0x16B36],
|
|
729
|
+
[0x16B40, 0x16B43],
|
|
730
|
+
[0x16B50, 0x16B59],
|
|
731
|
+
[0x16B63, 0x16B77],
|
|
732
|
+
[0x16B7D, 0x16B8F],
|
|
733
|
+
[0x16E40, 0x16E7F],
|
|
734
|
+
[0x16F00, 0x16F4A],
|
|
735
|
+
[0x16F4F, 0x16F87],
|
|
736
|
+
[0x16F8F, 0x16F9F],
|
|
737
|
+
[0x16FE0, 0x16FE1],
|
|
738
|
+
[0x16FE3, 0x16FE4],
|
|
739
|
+
[0x16FF0, 0x16FF1],
|
|
740
|
+
[0x17000, 0x187F7],
|
|
741
|
+
[0x18800, 0x18CD5],
|
|
742
|
+
[0x18D00, 0x18D08],
|
|
743
|
+
[0x1AFF0, 0x1AFF3],
|
|
744
|
+
[0x1AFF5, 0x1AFFB],
|
|
745
|
+
[0x1AFFD, 0x1AFFE],
|
|
746
|
+
[0x1B000, 0x1B122],
|
|
747
|
+
[0x1B132, 0x1B132],
|
|
748
|
+
[0x1B150, 0x1B152],
|
|
749
|
+
[0x1B155, 0x1B155],
|
|
750
|
+
[0x1B164, 0x1B167],
|
|
751
|
+
[0x1B170, 0x1B2FB],
|
|
752
|
+
[0x1BC00, 0x1BC6A],
|
|
753
|
+
[0x1BC70, 0x1BC7C],
|
|
754
|
+
[0x1BC80, 0x1BC88],
|
|
755
|
+
[0x1BC90, 0x1BC99],
|
|
756
|
+
[0x1BC9D, 0x1BC9E],
|
|
757
|
+
[0x1CF00, 0x1CF2D],
|
|
758
|
+
[0x1CF30, 0x1CF46],
|
|
759
|
+
[0x1D165, 0x1D169],
|
|
760
|
+
[0x1D16D, 0x1D172],
|
|
761
|
+
[0x1D17B, 0x1D182],
|
|
762
|
+
[0x1D185, 0x1D18B],
|
|
763
|
+
[0x1D1AA, 0x1D1AD],
|
|
764
|
+
[0x1D242, 0x1D244],
|
|
765
|
+
[0x1D400, 0x1D454],
|
|
766
|
+
[0x1D456, 0x1D49C],
|
|
767
|
+
[0x1D49E, 0x1D49F],
|
|
768
|
+
[0x1D4A2, 0x1D4A2],
|
|
769
|
+
[0x1D4A5, 0x1D4A6],
|
|
770
|
+
[0x1D4A9, 0x1D4AC],
|
|
771
|
+
[0x1D4AE, 0x1D4B9],
|
|
772
|
+
[0x1D4BB, 0x1D4BB],
|
|
773
|
+
[0x1D4BD, 0x1D4C3],
|
|
774
|
+
[0x1D4C5, 0x1D505],
|
|
775
|
+
[0x1D507, 0x1D50A],
|
|
776
|
+
[0x1D50D, 0x1D514],
|
|
777
|
+
[0x1D516, 0x1D51C],
|
|
778
|
+
[0x1D51E, 0x1D539],
|
|
779
|
+
[0x1D53B, 0x1D53E],
|
|
780
|
+
[0x1D540, 0x1D544],
|
|
781
|
+
[0x1D546, 0x1D546],
|
|
782
|
+
[0x1D54A, 0x1D550],
|
|
783
|
+
[0x1D552, 0x1D6A5],
|
|
784
|
+
[0x1D6A8, 0x1D6C0],
|
|
785
|
+
[0x1D6C2, 0x1D6DA],
|
|
786
|
+
[0x1D6DC, 0x1D6FA],
|
|
787
|
+
[0x1D6FC, 0x1D714],
|
|
788
|
+
[0x1D716, 0x1D734],
|
|
789
|
+
[0x1D736, 0x1D74E],
|
|
790
|
+
[0x1D750, 0x1D76E],
|
|
791
|
+
[0x1D770, 0x1D788],
|
|
792
|
+
[0x1D78A, 0x1D7A8],
|
|
793
|
+
[0x1D7AA, 0x1D7C2],
|
|
794
|
+
[0x1D7C4, 0x1D7CB],
|
|
795
|
+
[0x1D7CE, 0x1D7FF],
|
|
796
|
+
[0x1DA00, 0x1DA36],
|
|
797
|
+
[0x1DA3B, 0x1DA6C],
|
|
798
|
+
[0x1DA75, 0x1DA75],
|
|
799
|
+
[0x1DA84, 0x1DA84],
|
|
800
|
+
[0x1DA9B, 0x1DA9F],
|
|
801
|
+
[0x1DAA1, 0x1DAAF],
|
|
802
|
+
[0x1DF00, 0x1DF1E],
|
|
803
|
+
[0x1DF25, 0x1DF2A],
|
|
804
|
+
[0x1E000, 0x1E006],
|
|
805
|
+
[0x1E008, 0x1E018],
|
|
806
|
+
[0x1E01B, 0x1E021],
|
|
807
|
+
[0x1E023, 0x1E024],
|
|
808
|
+
[0x1E026, 0x1E02A],
|
|
809
|
+
[0x1E030, 0x1E06D],
|
|
810
|
+
[0x1E08F, 0x1E08F],
|
|
811
|
+
[0x1E100, 0x1E12C],
|
|
812
|
+
[0x1E130, 0x1E13D],
|
|
813
|
+
[0x1E140, 0x1E149],
|
|
814
|
+
[0x1E14E, 0x1E14E],
|
|
815
|
+
[0x1E290, 0x1E2AE],
|
|
816
|
+
[0x1E2C0, 0x1E2F9],
|
|
817
|
+
[0x1E4D0, 0x1E4F9],
|
|
818
|
+
[0x1E7E0, 0x1E7E6],
|
|
819
|
+
[0x1E7E8, 0x1E7EB],
|
|
820
|
+
[0x1E7ED, 0x1E7EE],
|
|
821
|
+
[0x1E7F0, 0x1E7FE],
|
|
822
|
+
[0x1E800, 0x1E8C4],
|
|
823
|
+
[0x1E8D0, 0x1E8D6],
|
|
824
|
+
[0x1E900, 0x1E94B],
|
|
825
|
+
[0x1E950, 0x1E959],
|
|
826
|
+
[0x1EE00, 0x1EE03],
|
|
827
|
+
[0x1EE05, 0x1EE1F],
|
|
828
|
+
[0x1EE21, 0x1EE22],
|
|
829
|
+
[0x1EE24, 0x1EE24],
|
|
830
|
+
[0x1EE27, 0x1EE27],
|
|
831
|
+
[0x1EE29, 0x1EE32],
|
|
832
|
+
[0x1EE34, 0x1EE37],
|
|
833
|
+
[0x1EE39, 0x1EE39],
|
|
834
|
+
[0x1EE3B, 0x1EE3B],
|
|
835
|
+
[0x1EE42, 0x1EE42],
|
|
836
|
+
[0x1EE47, 0x1EE47],
|
|
837
|
+
[0x1EE49, 0x1EE49],
|
|
838
|
+
[0x1EE4B, 0x1EE4B],
|
|
839
|
+
[0x1EE4D, 0x1EE4F],
|
|
840
|
+
[0x1EE51, 0x1EE52],
|
|
841
|
+
[0x1EE54, 0x1EE54],
|
|
842
|
+
[0x1EE57, 0x1EE57],
|
|
843
|
+
[0x1EE59, 0x1EE59],
|
|
844
|
+
[0x1EE5B, 0x1EE5B],
|
|
845
|
+
[0x1EE5D, 0x1EE5D],
|
|
846
|
+
[0x1EE5F, 0x1EE5F],
|
|
847
|
+
[0x1EE61, 0x1EE62],
|
|
848
|
+
[0x1EE64, 0x1EE64],
|
|
849
|
+
[0x1EE67, 0x1EE6A],
|
|
850
|
+
[0x1EE6C, 0x1EE72],
|
|
851
|
+
[0x1EE74, 0x1EE77],
|
|
852
|
+
[0x1EE79, 0x1EE7C],
|
|
853
|
+
[0x1EE7E, 0x1EE7E],
|
|
854
|
+
[0x1EE80, 0x1EE89],
|
|
855
|
+
[0x1EE8B, 0x1EE9B],
|
|
856
|
+
[0x1EEA1, 0x1EEA3],
|
|
857
|
+
[0x1EEA5, 0x1EEA9],
|
|
858
|
+
[0x1EEAB, 0x1EEBB],
|
|
859
|
+
[0x1FBF0, 0x1FBF9],
|
|
860
|
+
[0x20000, 0x2A6DF],
|
|
861
|
+
[0x2A700, 0x2B739],
|
|
862
|
+
[0x2B740, 0x2B81D],
|
|
863
|
+
[0x2B820, 0x2CEA1],
|
|
864
|
+
[0x2CEB0, 0x2EBE0],
|
|
865
|
+
[0x2EBF0, 0x2EE5D],
|
|
866
|
+
[0x2F800, 0x2FA1D],
|
|
867
|
+
[0x30000, 0x3134A],
|
|
868
|
+
[0x31350, 0x323AF],
|
|
869
|
+
].freeze
|
|
870
|
+
|
|
871
|
+
# Label-start (used for heredoc labels and modifier names): underscore or ASCII alpha.
|
|
872
|
+
LABEL_START_BYTE = Array.new(256, false)
|
|
873
|
+
(LOWER_A..LOWER_Z).each { |b| LABEL_START_BYTE[b] = true }
|
|
874
|
+
(UPPER_A..UPPER_Z).each { |b| LABEL_START_BYTE[b] = true }
|
|
875
|
+
LABEL_START_BYTE[UNDERSCORE] = true
|
|
876
|
+
LABEL_START_BYTE.freeze
|
|
877
|
+
|
|
878
|
+
# Label-cont: alpha + digit + underscore.
|
|
879
|
+
LABEL_CONT_BYTE = Array.new(256, false)
|
|
880
|
+
(LOWER_A..LOWER_Z).each { |b| LABEL_CONT_BYTE[b] = true }
|
|
881
|
+
(UPPER_A..UPPER_Z).each { |b| LABEL_CONT_BYTE[b] = true }
|
|
882
|
+
(DIGIT0..DIGIT9).each { |b| LABEL_CONT_BYTE[b] = true }
|
|
883
|
+
LABEL_CONT_BYTE[UNDERSCORE] = true
|
|
884
|
+
LABEL_CONT_BYTE.freeze
|
|
885
|
+
|
|
886
|
+
# ASCII digit lookup.
|
|
887
|
+
DIGIT_BYTE = Array.new(256, false)
|
|
888
|
+
(DIGIT0..DIGIT9).each { |b| DIGIT_BYTE[b] = true }
|
|
889
|
+
DIGIT_BYTE.freeze
|
|
890
|
+
|
|
891
|
+
# Hex digit lookup.
|
|
892
|
+
HEX_BYTE = Array.new(256, false)
|
|
893
|
+
(DIGIT0..DIGIT9).each { |b| HEX_BYTE[b] = true }
|
|
894
|
+
(LOWER_A..LOWER_F).each { |b| HEX_BYTE[b] = true }
|
|
895
|
+
(UPPER_A..UPPER_F).each { |b| HEX_BYTE[b] = true }
|
|
896
|
+
HEX_BYTE.freeze
|
|
897
|
+
|
|
898
|
+
# Value-terminator lookup (whitespace, EOL, comments, flow-end markers).
|
|
899
|
+
VALUE_TERMINATOR_BYTE = Array.new(256, false)
|
|
900
|
+
[SP, TAB, LF, CR, HASH, SLASH, COMMA, RBRACK, RBRACE].each { |b| VALUE_TERMINATOR_BYTE[b] = true }
|
|
901
|
+
VALUE_TERMINATOR_BYTE.freeze
|
|
902
|
+
|
|
903
|
+
# ---------- Public entry ----------
|
|
904
|
+
|
|
905
|
+
def self.parse_document(src)
|
|
906
|
+
_parse_document_with_mode(src, false, false)
|
|
907
|
+
end
|
|
908
|
+
|
|
909
|
+
# Lite-mode parse: same data tree, no comment AST, no original_forms.
|
|
910
|
+
# Not suitable for to_dms round-trip. SPEC §Parsing modes — full and lite.
|
|
911
|
+
def self.parse_lite_document(src)
|
|
912
|
+
_parse_document_with_mode(src, true, false)
|
|
913
|
+
end
|
|
914
|
+
|
|
915
|
+
# Unordered full-mode parse (SPEC §"Unordered tables"). Every body
|
|
916
|
+
# `Hash` is replaced by an `UnorderedHash`; iteration order is
|
|
917
|
+
# arbitrary. Comments + original_forms are still recorded, but
|
|
918
|
+
# `Dms.encode` will refuse to round-trip the result — use
|
|
919
|
+
# `Dms.encode_lite` for canonical emit instead.
|
|
920
|
+
def self.parse_document_unordered(src)
|
|
921
|
+
_parse_document_with_mode(src, false, true)
|
|
922
|
+
end
|
|
923
|
+
|
|
924
|
+
# Unordered lite-mode parse (SPEC §"Unordered tables"). The
|
|
925
|
+
# `(unordered, lite)` combo is the fastest read-only path for ports
|
|
926
|
+
# that ship a hash-only backing.
|
|
927
|
+
def self.parse_lite_document_unordered(src)
|
|
928
|
+
_parse_document_with_mode(src, true, true)
|
|
929
|
+
end
|
|
930
|
+
|
|
931
|
+
def self._parse_document_with_mode(src, lite, ignore_order = false)
|
|
932
|
+
src = src.dup if src.frozen?
|
|
933
|
+
if src.encoding == Encoding::ASCII_8BIT || src.encoding == Encoding::BINARY
|
|
934
|
+
src.force_encoding("UTF-8")
|
|
935
|
+
elsif src.encoding != Encoding::UTF_8
|
|
936
|
+
src = src.encode("UTF-8")
|
|
937
|
+
end
|
|
938
|
+
# SPEC §"UTF-8 only, NFC-normalized": DMS source is plain UTF-8 with
|
|
939
|
+
# no byte-order mark. A leading U+FEFF is not silently consumed —
|
|
940
|
+
# reject it explicitly so encoding mistakes surface loudly. (BOMs
|
|
941
|
+
# *inside* string/heredoc bodies are fine; this only fires at offset 0.)
|
|
942
|
+
if src.start_with?("")
|
|
943
|
+
raise DecodeError.new(1, 1, "BOM (U+FEFF) at file start is not allowed; DMS source is plain UTF-8")
|
|
944
|
+
end
|
|
945
|
+
nul = src.index("\0")
|
|
946
|
+
if nul
|
|
947
|
+
prefix = src.byteslice(0, nul)
|
|
948
|
+
line = 1 + prefix.count("\n")
|
|
949
|
+
last_nl = prefix.rindex("\n")
|
|
950
|
+
col = last_nl ? (nul - last_nl) : (nul + 1)
|
|
951
|
+
raise DecodeError.new(line, col, "U+0000 (NUL) is not allowed in DMS source")
|
|
952
|
+
end
|
|
953
|
+
# NFC-normalize unless ASCII-only (which is a no-op).
|
|
954
|
+
src = src.unicode_normalize(:nfc) unless src.ascii_only?
|
|
955
|
+
p = new(src, lite: lite, ignore_order: ignore_order)
|
|
956
|
+
meta = p.parse_front_matter
|
|
957
|
+
body = p.parse_body
|
|
958
|
+
Document.new(meta, body, p.comments, p.original_forms)
|
|
959
|
+
end
|
|
960
|
+
|
|
961
|
+
# SPEC §Front-matter-only decode. Decodes the leading `+++ ... +++`
|
|
962
|
+
# block and stops — body bytes after the closer are not tokenized,
|
|
963
|
+
# so body-only errors (duplicate body keys, unterminated body
|
|
964
|
+
# heredoc, etc.) are not surfaced here. Front-matter validation is
|
|
965
|
+
# byte-identical to a full decode: open/close on their own lines,
|
|
966
|
+
# `_dms_tier` is type-checked, unknown reserved keys rejected,
|
|
967
|
+
# unterminated front matter is a parse error.
|
|
968
|
+
#
|
|
969
|
+
# Returns the front-matter Hash (possibly empty) when an opener is
|
|
970
|
+
# present, or nil when the document has no front matter at all.
|
|
971
|
+
# Always runs in lite mode — no comment AST, no original_forms.
|
|
972
|
+
def self.parse_front_matter_only(src)
|
|
973
|
+
src = src.dup if src.frozen?
|
|
974
|
+
if src.encoding == Encoding::ASCII_8BIT || src.encoding == Encoding::BINARY
|
|
975
|
+
src.force_encoding("UTF-8")
|
|
976
|
+
elsif src.encoding != Encoding::UTF_8
|
|
977
|
+
src = src.encode("UTF-8")
|
|
978
|
+
end
|
|
979
|
+
if src.start_with?("")
|
|
980
|
+
raise DecodeError.new(1, 1, "BOM (U+FEFF) at file start is not allowed; DMS source is plain UTF-8")
|
|
981
|
+
end
|
|
982
|
+
# NUL scan is bounded to the FM region — only pre-closer NULs are
|
|
983
|
+
# diagnosable here (body-only errors are not surfaced per SPEC).
|
|
984
|
+
# We still do the cheap whole-buffer scan: catching a NUL anywhere
|
|
985
|
+
# is byte-identical to the full decoder for the prefix that would
|
|
986
|
+
# have been tokenized, and matches what a config loader expects.
|
|
987
|
+
nul = src.index("\0")
|
|
988
|
+
if nul
|
|
989
|
+
prefix = src.byteslice(0, nul)
|
|
990
|
+
line = 1 + prefix.count("\n")
|
|
991
|
+
last_nl = prefix.rindex("\n")
|
|
992
|
+
col = last_nl ? (nul - last_nl) : (nul + 1)
|
|
993
|
+
raise DecodeError.new(line, col, "U+0000 (NUL) is not allowed in DMS source")
|
|
994
|
+
end
|
|
995
|
+
src = src.unicode_normalize(:nfc) unless src.ascii_only?
|
|
996
|
+
p = new(src, lite: true, ignore_order: false)
|
|
997
|
+
p.parse_front_matter
|
|
998
|
+
end
|
|
999
|
+
|
|
1000
|
+
# ---------- Init ----------
|
|
1001
|
+
|
|
1002
|
+
attr_reader :comments, :original_forms
|
|
1003
|
+
|
|
1004
|
+
def initialize(src, lite: false, ignore_order: false)
|
|
1005
|
+
# NB: BOM-at-file-start rejection happens in _parse_document_with_mode
|
|
1006
|
+
# before we get here; do not silently strip a leading U+FEFF.
|
|
1007
|
+
@src = src
|
|
1008
|
+
# Position is a *byte* index into src. For pure-ASCII, this matches
|
|
1009
|
+
# the character index; for mixed UTF-8, multi-byte chars only appear
|
|
1010
|
+
# inside string bodies / non-ASCII keys, where we slice via byteslice.
|
|
1011
|
+
@len = src.bytesize
|
|
1012
|
+
@pos = 0
|
|
1013
|
+
@line = 1
|
|
1014
|
+
@line_start = 0
|
|
1015
|
+
@comments = []
|
|
1016
|
+
@pending_leading = []
|
|
1017
|
+
@path = []
|
|
1018
|
+
@original_forms = []
|
|
1019
|
+
@record_forms = true
|
|
1020
|
+
# Lite mode: skip comment-AST + original_forms bookkeeping.
|
|
1021
|
+
# Same grammar, same errors. SPEC §Parsing modes — full and lite.
|
|
1022
|
+
@lite = lite
|
|
1023
|
+
# Unordered mode (SPEC §"Unordered tables"): when true, every body
|
|
1024
|
+
# table is built as an UnorderedHash. Keys are shuffled at
|
|
1025
|
+
# end-of-build to expose the arbitrary-order contract.
|
|
1026
|
+
@ignore_order = ignore_order
|
|
1027
|
+
end
|
|
1028
|
+
|
|
1029
|
+
# Allocate a fresh table (Hash by default, UnorderedHash when
|
|
1030
|
+
# `ignore_order` is set). Used for body tables, list-item tables,
|
|
1031
|
+
# and flow tables. Front-matter `meta` is excluded — the front-matter
|
|
1032
|
+
# block is always insertion-ordered regardless of body mode.
|
|
1033
|
+
def new_table
|
|
1034
|
+
@ignore_order ? UnorderedHash.new : {}
|
|
1035
|
+
end
|
|
1036
|
+
|
|
1037
|
+
# Shuffle an UnorderedHash in place at end-of-build so callers cannot
|
|
1038
|
+
# rely on insertion order. No-op for plain `Hash` or empty tables.
|
|
1039
|
+
def finalize_table(t)
|
|
1040
|
+
return t unless @ignore_order && t.is_a?(UnorderedHash) && t.size > 1
|
|
1041
|
+
keys = t.keys.shuffle
|
|
1042
|
+
pairs = keys.map { |k| [k, t[k]] }
|
|
1043
|
+
t.clear
|
|
1044
|
+
pairs.each { |k, v| t[k] = v }
|
|
1045
|
+
t
|
|
1046
|
+
end
|
|
1047
|
+
|
|
1048
|
+
# ---------- Position primitives ----------
|
|
1049
|
+
|
|
1050
|
+
# 1-based char column. For pure-ASCII lines this is identical to the
|
|
1051
|
+
# byte offset within the line; for mixed UTF-8 we count chars from
|
|
1052
|
+
# @line_start to @pos (rare, only used in error messages).
|
|
1053
|
+
def col
|
|
1054
|
+
bytes = @pos - @line_start
|
|
1055
|
+
return bytes + 1 if @src.ascii_only?
|
|
1056
|
+
@src.byteslice(@line_start, bytes).length + 1
|
|
1057
|
+
end
|
|
1058
|
+
|
|
1059
|
+
def err(msg); DecodeError.new(@line, col, msg); end
|
|
1060
|
+
|
|
1061
|
+
def err_at(line, line_start, byte_pos, msg)
|
|
1062
|
+
bytes = byte_pos - line_start
|
|
1063
|
+
column =
|
|
1064
|
+
if @src.ascii_only?
|
|
1065
|
+
bytes + 1
|
|
1066
|
+
else
|
|
1067
|
+
@src.byteslice(line_start, bytes).length + 1
|
|
1068
|
+
end
|
|
1069
|
+
DecodeError.new(line, column, msg)
|
|
1070
|
+
end
|
|
1071
|
+
|
|
1072
|
+
# peek: returns the byte at @pos as Integer, or nil at EOF.
|
|
1073
|
+
def peek_byte
|
|
1074
|
+
@src.getbyte(@pos)
|
|
1075
|
+
end
|
|
1076
|
+
|
|
1077
|
+
# peek_char: returns the *character* at @pos as a String. Used in
|
|
1078
|
+
# error messages and a handful of dispatch sites where we already
|
|
1079
|
+
# know we're on a single-byte ASCII char.
|
|
1080
|
+
def peek_char_byte_safe
|
|
1081
|
+
b = @src.getbyte(@pos)
|
|
1082
|
+
return nil if b.nil?
|
|
1083
|
+
return b.chr if b < 128
|
|
1084
|
+
# Multi-byte: read full character.
|
|
1085
|
+
@src.byteslice(@pos, 4).force_encoding(Encoding::UTF_8)[0]
|
|
1086
|
+
end
|
|
1087
|
+
|
|
1088
|
+
def starts_bytes?(s)
|
|
1089
|
+
# Compare s (ASCII string) byte-for-byte at @pos.
|
|
1090
|
+
slen = s.bytesize
|
|
1091
|
+
return false if @pos + slen > @len
|
|
1092
|
+
i = 0
|
|
1093
|
+
while i < slen
|
|
1094
|
+
return false if @src.getbyte(@pos + i) != s.getbyte(i)
|
|
1095
|
+
i += 1
|
|
1096
|
+
end
|
|
1097
|
+
true
|
|
1098
|
+
end
|
|
1099
|
+
|
|
1100
|
+
def eof?; @pos >= @len; end
|
|
1101
|
+
|
|
1102
|
+
def advance_line
|
|
1103
|
+
@line += 1
|
|
1104
|
+
@line_start = @pos
|
|
1105
|
+
end
|
|
1106
|
+
|
|
1107
|
+
# ---------- Whitespace / EOL ----------
|
|
1108
|
+
|
|
1109
|
+
def skip_inline_ws
|
|
1110
|
+
s = @src
|
|
1111
|
+
n = @len
|
|
1112
|
+
p = @pos
|
|
1113
|
+
while p < n
|
|
1114
|
+
b = s.getbyte(p)
|
|
1115
|
+
break unless b == SP || b == TAB
|
|
1116
|
+
p += 1
|
|
1117
|
+
end
|
|
1118
|
+
@pos = p
|
|
1119
|
+
end
|
|
1120
|
+
|
|
1121
|
+
def consume_eol
|
|
1122
|
+
b = @src.getbyte(@pos)
|
|
1123
|
+
if b == LF
|
|
1124
|
+
@pos += 1
|
|
1125
|
+
advance_line
|
|
1126
|
+
return true
|
|
1127
|
+
end
|
|
1128
|
+
if b == CR && @src.getbyte(@pos + 1) == LF
|
|
1129
|
+
@pos += 2
|
|
1130
|
+
advance_line
|
|
1131
|
+
return true
|
|
1132
|
+
end
|
|
1133
|
+
false
|
|
1134
|
+
end
|
|
1135
|
+
|
|
1136
|
+
def skip_trivia
|
|
1137
|
+
loop do
|
|
1138
|
+
line_start_pos = @pos
|
|
1139
|
+
skip_inline_ws
|
|
1140
|
+
b = @src.getbyte(@pos)
|
|
1141
|
+
if b.nil?
|
|
1142
|
+
@pos = line_start_pos
|
|
1143
|
+
return
|
|
1144
|
+
elsif b == LF
|
|
1145
|
+
flush_pending_as_floating
|
|
1146
|
+
@pos += 1; advance_line
|
|
1147
|
+
elsif b == CR
|
|
1148
|
+
if @src.getbyte(@pos + 1) != LF
|
|
1149
|
+
raise err("bare CR is not a valid line terminator")
|
|
1150
|
+
end
|
|
1151
|
+
flush_pending_as_floating
|
|
1152
|
+
@pos += 2; advance_line
|
|
1153
|
+
elsif b == HASH
|
|
1154
|
+
if starts_bytes?("###")
|
|
1155
|
+
raw = read_hash_block_comment
|
|
1156
|
+
@pending_leading << Comment.new(raw, :block) unless @lite
|
|
1157
|
+
else
|
|
1158
|
+
raw = read_line_comment_to_eol
|
|
1159
|
+
consume_eol
|
|
1160
|
+
@pending_leading << Comment.new(raw, :line) unless @lite
|
|
1161
|
+
end
|
|
1162
|
+
elsif b == SLASH
|
|
1163
|
+
n2 = @src.getbyte(@pos + 1)
|
|
1164
|
+
if n2 == SLASH
|
|
1165
|
+
raw = read_line_comment_to_eol
|
|
1166
|
+
consume_eol
|
|
1167
|
+
@pending_leading << Comment.new(raw, :line) unless @lite
|
|
1168
|
+
elsif n2 == STAR
|
|
1169
|
+
raw = read_c_block_comment
|
|
1170
|
+
@pending_leading << Comment.new(raw, :block) unless @lite
|
|
1171
|
+
else
|
|
1172
|
+
@pos = line_start_pos
|
|
1173
|
+
return
|
|
1174
|
+
end
|
|
1175
|
+
else
|
|
1176
|
+
@pos = line_start_pos
|
|
1177
|
+
return
|
|
1178
|
+
end
|
|
1179
|
+
end
|
|
1180
|
+
end
|
|
1181
|
+
|
|
1182
|
+
# ---------- Pending leading flushers ----------
|
|
1183
|
+
|
|
1184
|
+
def flush_pending_as_floating
|
|
1185
|
+
return if @pending_leading.empty?
|
|
1186
|
+
drained = @pending_leading
|
|
1187
|
+
@pending_leading = []
|
|
1188
|
+
path = @path.dup.freeze
|
|
1189
|
+
drained.each do |c|
|
|
1190
|
+
@comments << AttachedComment.new(c, :floating, path)
|
|
1191
|
+
end
|
|
1192
|
+
end
|
|
1193
|
+
|
|
1194
|
+
def flush_pending_as_leading_on_current
|
|
1195
|
+
return if @pending_leading.empty?
|
|
1196
|
+
drained = @pending_leading
|
|
1197
|
+
@pending_leading = []
|
|
1198
|
+
path = @path.dup.freeze
|
|
1199
|
+
drained.each do |c|
|
|
1200
|
+
@comments << AttachedComment.new(c, :leading, path)
|
|
1201
|
+
end
|
|
1202
|
+
end
|
|
1203
|
+
|
|
1204
|
+
# ---------- Raw comment readers ----------
|
|
1205
|
+
|
|
1206
|
+
def read_line_comment_to_eol
|
|
1207
|
+
s = @src
|
|
1208
|
+
n = @len
|
|
1209
|
+
p = @pos
|
|
1210
|
+
start = p
|
|
1211
|
+
while p < n
|
|
1212
|
+
b = s.getbyte(p)
|
|
1213
|
+
break if b == LF || b == CR
|
|
1214
|
+
p += 1
|
|
1215
|
+
end
|
|
1216
|
+
@pos = p
|
|
1217
|
+
s.byteslice(start, p - start).force_encoding(Encoding::UTF_8)
|
|
1218
|
+
end
|
|
1219
|
+
|
|
1220
|
+
def read_c_block_comment
|
|
1221
|
+
sl = @line; sls = @line_start; sp = @pos
|
|
1222
|
+
@pos += 2
|
|
1223
|
+
depth = 1
|
|
1224
|
+
s = @src
|
|
1225
|
+
n = @len
|
|
1226
|
+
while depth > 0
|
|
1227
|
+
if @pos >= n
|
|
1228
|
+
raise err_at(sl, sls, sp, "unterminated /* block comment")
|
|
1229
|
+
end
|
|
1230
|
+
b = s.getbyte(@pos)
|
|
1231
|
+
if b == SLASH && s.getbyte(@pos + 1) == STAR
|
|
1232
|
+
@pos += 2; depth += 1
|
|
1233
|
+
elsif b == STAR && s.getbyte(@pos + 1) == SLASH
|
|
1234
|
+
@pos += 2; depth -= 1
|
|
1235
|
+
elsif b == LF
|
|
1236
|
+
@pos += 1; advance_line
|
|
1237
|
+
elsif b == CR && s.getbyte(@pos + 1) == LF
|
|
1238
|
+
@pos += 2; advance_line
|
|
1239
|
+
else
|
|
1240
|
+
@pos += 1
|
|
1241
|
+
end
|
|
1242
|
+
end
|
|
1243
|
+
s.byteslice(sp, @pos - sp).force_encoding(Encoding::UTF_8)
|
|
1244
|
+
end
|
|
1245
|
+
|
|
1246
|
+
def read_hash_block_comment
|
|
1247
|
+
sl = @line; sls = @line_start; sp = @pos
|
|
1248
|
+
@pos += 3
|
|
1249
|
+
ls = @pos
|
|
1250
|
+
s = @src
|
|
1251
|
+
n = @len
|
|
1252
|
+
while @pos < n
|
|
1253
|
+
b = s.getbyte(@pos)
|
|
1254
|
+
break unless LABEL_CONT_BYTE[b]
|
|
1255
|
+
@pos += 1
|
|
1256
|
+
end
|
|
1257
|
+
label = s.byteslice(ls, @pos - ls).force_encoding(Encoding::UTF_8)
|
|
1258
|
+
if !label.empty?
|
|
1259
|
+
first = label.getbyte(0)
|
|
1260
|
+
unless first == UNDERSCORE || (first >= LOWER_A && first <= LOWER_Z) || (first >= UPPER_A && first <= UPPER_Z)
|
|
1261
|
+
raise err_at(sl, sls, sp, "block comment label must start with a letter or underscore")
|
|
1262
|
+
end
|
|
1263
|
+
end
|
|
1264
|
+
terminator = label.empty? ? "###" : label
|
|
1265
|
+
skip_inline_ws
|
|
1266
|
+
unless consume_eol || eof?
|
|
1267
|
+
raise err("block comment opener must be on its own line")
|
|
1268
|
+
end
|
|
1269
|
+
loop do
|
|
1270
|
+
if eof?
|
|
1271
|
+
raise err_at(sl, sls, sp, "unterminated ### block comment")
|
|
1272
|
+
end
|
|
1273
|
+
line_begin = @pos
|
|
1274
|
+
while @pos < n
|
|
1275
|
+
b = s.getbyte(@pos)
|
|
1276
|
+
break if b == LF || b == CR
|
|
1277
|
+
@pos += 1
|
|
1278
|
+
end
|
|
1279
|
+
line_text = s.byteslice(line_begin, @pos - line_begin).force_encoding(Encoding::UTF_8)
|
|
1280
|
+
line_end = @pos
|
|
1281
|
+
consume_eol
|
|
1282
|
+
if line_text.strip == terminator
|
|
1283
|
+
return s.byteslice(sp, line_end - sp).force_encoding(Encoding::UTF_8)
|
|
1284
|
+
end
|
|
1285
|
+
end
|
|
1286
|
+
end
|
|
1287
|
+
|
|
1288
|
+
# ---------- Document entry ----------
|
|
1289
|
+
|
|
1290
|
+
def parse_front_matter
|
|
1291
|
+
save_pos = @pos; save_line = @line; save_lstart = @line_start
|
|
1292
|
+
save_pending = @pending_leading.length
|
|
1293
|
+
save_comments = @comments.length
|
|
1294
|
+
skip_trivia
|
|
1295
|
+
unless starts_bytes?("+++")
|
|
1296
|
+
@pos = save_pos; @line = save_line; @line_start = save_lstart
|
|
1297
|
+
@pending_leading.slice!(save_pending..)
|
|
1298
|
+
@comments.slice!(save_comments..)
|
|
1299
|
+
return nil
|
|
1300
|
+
end
|
|
1301
|
+
# Any trailing content on the opener line is a parse error
|
|
1302
|
+
# (SPEC §Front matter: "each `+++` must appear on its own line,
|
|
1303
|
+
# with no trailing content"). Advance past `+++` and let the
|
|
1304
|
+
# strict EOL check below diagnose.
|
|
1305
|
+
opener_line = @line; opener_lstart = @line_start; opener_pos = @pos
|
|
1306
|
+
@pos += 3
|
|
1307
|
+
skip_inline_ws
|
|
1308
|
+
unless consume_eol || eof?
|
|
1309
|
+
raise err("front matter opener must be on its own line")
|
|
1310
|
+
end
|
|
1311
|
+
inner_buf = +""
|
|
1312
|
+
inner_buf.force_encoding(Encoding::UTF_8)
|
|
1313
|
+
loop do
|
|
1314
|
+
if eof?
|
|
1315
|
+
raise DecodeError.new(opener_line, opener_pos - opener_lstart + 1,
|
|
1316
|
+
"unterminated front matter: missing closing '+++'")
|
|
1317
|
+
end
|
|
1318
|
+
line_begin = @pos
|
|
1319
|
+
while @pos < @len
|
|
1320
|
+
b = @src.getbyte(@pos)
|
|
1321
|
+
break if b == LF || b == CR
|
|
1322
|
+
@pos += 1
|
|
1323
|
+
end
|
|
1324
|
+
line_text = @src.byteslice(line_begin, @pos - line_begin).force_encoding(Encoding::UTF_8)
|
|
1325
|
+
if line_text.strip == "+++"
|
|
1326
|
+
consume_eol
|
|
1327
|
+
break
|
|
1328
|
+
end
|
|
1329
|
+
inner_buf << line_text
|
|
1330
|
+
inner_buf << "\n" if consume_eol
|
|
1331
|
+
end
|
|
1332
|
+
sub = self.class.new(inner_buf, lite: @lite)
|
|
1333
|
+
table = sub.parse_body_as_table
|
|
1334
|
+
meta = {}
|
|
1335
|
+
fm_err = ->(msg) { DecodeError.new(opener_line, opener_pos - opener_lstart + 1, msg) }
|
|
1336
|
+
table.each do |k, v|
|
|
1337
|
+
if k.start_with?("_")
|
|
1338
|
+
if k == "_dms_tier"
|
|
1339
|
+
unless v.is_a?(Integer) && !v.is_a?(TrueClass) && !v.is_a?(FalseClass)
|
|
1340
|
+
raise fm_err.call("_dms_tier must be a non-negative integer")
|
|
1341
|
+
end
|
|
1342
|
+
raise fm_err.call("_dms_tier must be non-negative") if v < 0
|
|
1343
|
+
if v >= 2
|
|
1344
|
+
raise fm_err.call("_dms_tier: #{v} is not supported (only tier 0 and 1 are defined)")
|
|
1345
|
+
end
|
|
1346
|
+
if v == 1
|
|
1347
|
+
raise fm_err.call("_dms_tier: 1 requires tier-1 decode mode (use --tier=1)")
|
|
1348
|
+
end
|
|
1349
|
+
else
|
|
1350
|
+
raise fm_err.call("unknown reserved key: #{k}")
|
|
1351
|
+
end
|
|
1352
|
+
else
|
|
1353
|
+
meta[k] = v
|
|
1354
|
+
end
|
|
1355
|
+
end
|
|
1356
|
+
sub.comments.each do |ac|
|
|
1357
|
+
attached_to_reserved = !ac.path.empty? && ac.path[0].is_a?(String) && ac.path[0].start_with?("_")
|
|
1358
|
+
if attached_to_reserved
|
|
1359
|
+
@comments << AttachedComment.new(ac.comment, :floating, ["__fm__"].freeze)
|
|
1360
|
+
next
|
|
1361
|
+
end
|
|
1362
|
+
@comments << AttachedComment.new(ac.comment, ac.position, (["__fm__"] + ac.path).freeze)
|
|
1363
|
+
end
|
|
1364
|
+
sub.original_forms.each do |path, lit|
|
|
1365
|
+
next if !path.empty? && path[0].is_a?(String) && path[0].start_with?("_")
|
|
1366
|
+
@original_forms << [(["__fm__"] + path).freeze, lit]
|
|
1367
|
+
end
|
|
1368
|
+
meta
|
|
1369
|
+
end
|
|
1370
|
+
|
|
1371
|
+
def parse_body_as_table
|
|
1372
|
+
skip_trivia
|
|
1373
|
+
if eof?
|
|
1374
|
+
flush_pending_as_floating
|
|
1375
|
+
return new_table
|
|
1376
|
+
end
|
|
1377
|
+
b = @src.getbyte(@pos)
|
|
1378
|
+
if b == SP || b == TAB
|
|
1379
|
+
raise err("unexpected indentation inside front matter")
|
|
1380
|
+
end
|
|
1381
|
+
reject_reserved_sigil_at_line_start!
|
|
1382
|
+
if b == PLUS && peek_after_plus_is_space_or_eol?
|
|
1383
|
+
raise err("front matter block cannot have a list root")
|
|
1384
|
+
end
|
|
1385
|
+
unless line_starts_kvpair?
|
|
1386
|
+
raise err("front matter block must be a table")
|
|
1387
|
+
end
|
|
1388
|
+
t = parse_table_block(0)
|
|
1389
|
+
skip_trivia
|
|
1390
|
+
raise err("trailing content inside front matter") unless eof?
|
|
1391
|
+
t
|
|
1392
|
+
end
|
|
1393
|
+
|
|
1394
|
+
def parse_body
|
|
1395
|
+
skip_trivia
|
|
1396
|
+
if eof?
|
|
1397
|
+
flush_pending_as_floating
|
|
1398
|
+
return new_table
|
|
1399
|
+
end
|
|
1400
|
+
b = @src.getbyte(@pos)
|
|
1401
|
+
raise err("unexpected indentation at document root") if b == SP || b == TAB
|
|
1402
|
+
reject_reserved_sigil_at_line_start!
|
|
1403
|
+
if b == PLUS && peek_after_plus_is_space_or_eol?
|
|
1404
|
+
v = parse_list_block(0)
|
|
1405
|
+
skip_trivia
|
|
1406
|
+
raise err("trailing content after list root") unless eof?
|
|
1407
|
+
flush_pending_as_floating
|
|
1408
|
+
return v
|
|
1409
|
+
end
|
|
1410
|
+
if line_starts_kvpair?
|
|
1411
|
+
t = parse_table_block(0)
|
|
1412
|
+
skip_trivia
|
|
1413
|
+
raise err("trailing content after table root") unless eof?
|
|
1414
|
+
flush_pending_as_floating
|
|
1415
|
+
return t
|
|
1416
|
+
end
|
|
1417
|
+
v = parse_inline_value_or_heredoc
|
|
1418
|
+
consume_after_value(true)
|
|
1419
|
+
skip_trivia
|
|
1420
|
+
raise err("scalar root cannot be followed by more content") unless eof?
|
|
1421
|
+
flush_pending_as_floating
|
|
1422
|
+
v
|
|
1423
|
+
end
|
|
1424
|
+
|
|
1425
|
+
def peek_after_plus_is_space_or_eol?
|
|
1426
|
+
b = @src.getbyte(@pos + 1)
|
|
1427
|
+
b.nil? || b == SP || b == TAB || b == LF || b == CR
|
|
1428
|
+
end
|
|
1429
|
+
|
|
1430
|
+
# SPEC tier-0: reject reserved decorator sigils at line-start position.
|
|
1431
|
+
# Caller has already consumed leading whitespace + trivia, so @pos sits
|
|
1432
|
+
# on the first non-whitespace byte of a body line. If that byte is one
|
|
1433
|
+
# of the 17 reserved sigils (! @ $ % ^ & * | ~ ` . , > < ? ; =), raise. The check is
|
|
1434
|
+
# only valid here — string bodies, comments, and heredoc bodies are
|
|
1435
|
+
# parsed by their own readers and never reach this dispatch.
|
|
1436
|
+
def reject_reserved_sigil_at_line_start!
|
|
1437
|
+
return if @pos >= @len
|
|
1438
|
+
b = @src.getbyte(@pos)
|
|
1439
|
+
return unless b && RESERVED_SIGIL_BYTE[b]
|
|
1440
|
+
raise err("reserved decorator sigil '#{b.chr}' at line start is not allowed")
|
|
1441
|
+
end
|
|
1442
|
+
|
|
1443
|
+
def line_starts_kvpair?
|
|
1444
|
+
p = @pos
|
|
1445
|
+
s = @src
|
|
1446
|
+
n = @len
|
|
1447
|
+
first = s.getbyte(p)
|
|
1448
|
+
if first == DQUOTE
|
|
1449
|
+
p += 1
|
|
1450
|
+
while p < n
|
|
1451
|
+
b = s.getbyte(p)
|
|
1452
|
+
if b == BACKSLASH
|
|
1453
|
+
p += 2
|
|
1454
|
+
elsif b == DQUOTE
|
|
1455
|
+
p += 1
|
|
1456
|
+
break
|
|
1457
|
+
elsif b == LF || b == CR
|
|
1458
|
+
return false
|
|
1459
|
+
else
|
|
1460
|
+
p += 1
|
|
1461
|
+
end
|
|
1462
|
+
end
|
|
1463
|
+
elsif first == SQUOTE
|
|
1464
|
+
p += 1
|
|
1465
|
+
while p < n
|
|
1466
|
+
b = s.getbyte(p)
|
|
1467
|
+
if b == SQUOTE
|
|
1468
|
+
p += 1
|
|
1469
|
+
break
|
|
1470
|
+
elsif b == LF || b == CR
|
|
1471
|
+
return false
|
|
1472
|
+
else
|
|
1473
|
+
p += 1
|
|
1474
|
+
end
|
|
1475
|
+
end
|
|
1476
|
+
else
|
|
1477
|
+
any_chars = false
|
|
1478
|
+
while p < n
|
|
1479
|
+
b = s.getbyte(p)
|
|
1480
|
+
if b < 128
|
|
1481
|
+
break unless BARE_KEY_BYTE[b]
|
|
1482
|
+
p += 1
|
|
1483
|
+
any_chars = true
|
|
1484
|
+
else
|
|
1485
|
+
# Non-ASCII byte: walk a full UTF-8 char and XID_Continue-test it.
|
|
1486
|
+
ch_len = utf8_char_len(b)
|
|
1487
|
+
ch = s.byteslice(p, ch_len).force_encoding(Encoding::UTF_8)
|
|
1488
|
+
break unless xid_continue?(ch.ord)
|
|
1489
|
+
p += ch_len
|
|
1490
|
+
any_chars = true
|
|
1491
|
+
end
|
|
1492
|
+
end
|
|
1493
|
+
return false unless any_chars
|
|
1494
|
+
end
|
|
1495
|
+
return false if p >= n || s.getbyte(p) != COLON
|
|
1496
|
+
nxt = s.getbyte(p + 1)
|
|
1497
|
+
nxt.nil? || nxt == SP || nxt == TAB || nxt == LF || nxt == CR
|
|
1498
|
+
end
|
|
1499
|
+
|
|
1500
|
+
def utf8_char_len(b)
|
|
1501
|
+
return 1 if b < 0x80
|
|
1502
|
+
return 2 if b < 0xC0
|
|
1503
|
+
return 2 if b < 0xE0
|
|
1504
|
+
return 3 if b < 0xF0
|
|
1505
|
+
4
|
|
1506
|
+
end
|
|
1507
|
+
|
|
1508
|
+
# Frozen XID_Continue test (Unicode 15.1, UAX #31 §2). ASCII fast path:
|
|
1509
|
+
# the bare-key ASCII set is handled by BARE_KEY_BYTE before this is
|
|
1510
|
+
# called, so any cp < 0x80 reaching here is not an XID_Continue char
|
|
1511
|
+
# for our purposes (we already accepted alnum/_/- and rejected the rest).
|
|
1512
|
+
# Binary-search the sorted, non-overlapping range table.
|
|
1513
|
+
def xid_continue?(cp)
|
|
1514
|
+
return false if cp < 0x80
|
|
1515
|
+
ranges = XID_CONTINUE_RANGES
|
|
1516
|
+
lo = 0
|
|
1517
|
+
hi = ranges.length - 1
|
|
1518
|
+
while lo <= hi
|
|
1519
|
+
mid = (lo + hi) >> 1
|
|
1520
|
+
r = ranges[mid]
|
|
1521
|
+
if cp < r[0]
|
|
1522
|
+
hi = mid - 1
|
|
1523
|
+
elsif cp > r[1]
|
|
1524
|
+
lo = mid + 1
|
|
1525
|
+
else
|
|
1526
|
+
return true
|
|
1527
|
+
end
|
|
1528
|
+
end
|
|
1529
|
+
false
|
|
1530
|
+
end
|
|
1531
|
+
|
|
1532
|
+
# ---------- Block parsers ----------
|
|
1533
|
+
|
|
1534
|
+
def measure_line_indent
|
|
1535
|
+
n = 0
|
|
1536
|
+
i = @line_start
|
|
1537
|
+
s = @src
|
|
1538
|
+
while i < @len && s.getbyte(i) == SP
|
|
1539
|
+
n += 1
|
|
1540
|
+
i += 1
|
|
1541
|
+
end
|
|
1542
|
+
n
|
|
1543
|
+
end
|
|
1544
|
+
|
|
1545
|
+
def parse_table_block(indent)
|
|
1546
|
+
t = new_table
|
|
1547
|
+
loop do
|
|
1548
|
+
skip_trivia
|
|
1549
|
+
break if @pos >= @len
|
|
1550
|
+
# measure indent inline
|
|
1551
|
+
li = 0
|
|
1552
|
+
i = @line_start
|
|
1553
|
+
while i < @len && @src.getbyte(i) == SP
|
|
1554
|
+
li += 1
|
|
1555
|
+
i += 1
|
|
1556
|
+
end
|
|
1557
|
+
break if li < indent
|
|
1558
|
+
if li != indent
|
|
1559
|
+
raise err_at(@line, @line_start, @line_start + indent,
|
|
1560
|
+
"inconsistent indent: expected #{indent} spaces, got #{li}")
|
|
1561
|
+
end
|
|
1562
|
+
@pos = @line_start + indent
|
|
1563
|
+
reject_reserved_sigil_at_line_start!
|
|
1564
|
+
k, v = parse_kvpair(indent)
|
|
1565
|
+
raise err("duplicate key: #{k}") if t.key?(k)
|
|
1566
|
+
t[k] = v
|
|
1567
|
+
end
|
|
1568
|
+
flush_pending_as_floating
|
|
1569
|
+
finalize_table(t)
|
|
1570
|
+
end
|
|
1571
|
+
|
|
1572
|
+
def parse_list_block(indent)
|
|
1573
|
+
items = []
|
|
1574
|
+
loop do
|
|
1575
|
+
skip_trivia
|
|
1576
|
+
break if @pos >= @len
|
|
1577
|
+
li = measure_line_indent
|
|
1578
|
+
break if li < indent
|
|
1579
|
+
if li != indent
|
|
1580
|
+
raise err_at(@line, @line_start, @line_start + indent,
|
|
1581
|
+
"inconsistent indent: expected #{indent} spaces, got #{li}")
|
|
1582
|
+
end
|
|
1583
|
+
@pos = @line_start + indent
|
|
1584
|
+
reject_reserved_sigil_at_line_start!
|
|
1585
|
+
break unless @src.getbyte(@pos) == PLUS
|
|
1586
|
+
idx = items.length
|
|
1587
|
+
@path.push(idx)
|
|
1588
|
+
flush_pending_as_leading_on_current unless @pending_leading.empty?
|
|
1589
|
+
begin
|
|
1590
|
+
@pos += 1 # consume '+'
|
|
1591
|
+
b = @src.getbyte(@pos)
|
|
1592
|
+
v =
|
|
1593
|
+
if b == SP || b == TAB
|
|
1594
|
+
@pos += 1
|
|
1595
|
+
skip_inline_ws
|
|
1596
|
+
capture_inner_block_comments
|
|
1597
|
+
nb = @src.getbyte(@pos)
|
|
1598
|
+
if nb.nil? || nb == LF || nb == CR
|
|
1599
|
+
consume_eol
|
|
1600
|
+
skip_trivia
|
|
1601
|
+
raise err("expected indented block after empty '+' marker") if @pos >= @len
|
|
1602
|
+
inner_indent = measure_line_indent
|
|
1603
|
+
raise err("expected indented block after empty '+' marker") if inner_indent <= indent
|
|
1604
|
+
parse_block_value(inner_indent)
|
|
1605
|
+
else
|
|
1606
|
+
parse_list_item_value(indent)
|
|
1607
|
+
end
|
|
1608
|
+
elsif b.nil? || b == LF || b == CR
|
|
1609
|
+
consume_eol
|
|
1610
|
+
skip_trivia
|
|
1611
|
+
raise err("expected indented block after empty '+' marker") if @pos >= @len
|
|
1612
|
+
inner_indent = measure_line_indent
|
|
1613
|
+
raise err("expected indented block after empty '+' marker") if inner_indent <= indent
|
|
1614
|
+
parse_block_value(inner_indent)
|
|
1615
|
+
else
|
|
1616
|
+
raise err("expected space after '+'")
|
|
1617
|
+
end
|
|
1618
|
+
ensure
|
|
1619
|
+
@path.pop
|
|
1620
|
+
end
|
|
1621
|
+
items << v
|
|
1622
|
+
end
|
|
1623
|
+
flush_pending_as_floating
|
|
1624
|
+
items
|
|
1625
|
+
end
|
|
1626
|
+
|
|
1627
|
+
def parse_block_value(indent)
|
|
1628
|
+
@pos = @line_start + indent
|
|
1629
|
+
if @src.getbyte(@pos) == PLUS && peek_after_plus_is_space_or_eol?
|
|
1630
|
+
return parse_list_block(indent)
|
|
1631
|
+
end
|
|
1632
|
+
parse_table_block(indent)
|
|
1633
|
+
end
|
|
1634
|
+
|
|
1635
|
+
def parse_list_item_value(list_indent)
|
|
1636
|
+
if line_starts_kvpair?
|
|
1637
|
+
key_col = col - 1
|
|
1638
|
+
k, v = parse_kvpair(key_col)
|
|
1639
|
+
t = new_table
|
|
1640
|
+
t[k] = v
|
|
1641
|
+
loop do
|
|
1642
|
+
skip_trivia
|
|
1643
|
+
break if @pos >= @len
|
|
1644
|
+
li = measure_line_indent
|
|
1645
|
+
break if li < key_col
|
|
1646
|
+
if li != key_col
|
|
1647
|
+
raise err_at(@line, @line_start, @line_start + key_col,
|
|
1648
|
+
"list-item table sibling key must align with first key")
|
|
1649
|
+
end
|
|
1650
|
+
@pos = @line_start + key_col
|
|
1651
|
+
reject_reserved_sigil_at_line_start!
|
|
1652
|
+
if @src.getbyte(@pos) == PLUS
|
|
1653
|
+
raise err("'+' marker at sibling-key column is ambiguous")
|
|
1654
|
+
end
|
|
1655
|
+
break unless line_starts_kvpair?
|
|
1656
|
+
k2, v2 = parse_kvpair(key_col)
|
|
1657
|
+
raise err("duplicate key: #{k2}") if t.key?(k2)
|
|
1658
|
+
t[k2] = v2
|
|
1659
|
+
end
|
|
1660
|
+
flush_pending_as_floating
|
|
1661
|
+
return finalize_table(t)
|
|
1662
|
+
end
|
|
1663
|
+
v = parse_inline_value_or_heredoc
|
|
1664
|
+
consume_after_value(false)
|
|
1665
|
+
v
|
|
1666
|
+
end
|
|
1667
|
+
|
|
1668
|
+
# ---------- kvpair ----------
|
|
1669
|
+
|
|
1670
|
+
def parse_kvpair(parent_indent)
|
|
1671
|
+
# Inlined parse_key fast path for bare ASCII keys (common hot-loop case).
|
|
1672
|
+
# Only takes the fast path when the *next* byte after the key run is
|
|
1673
|
+
# ASCII too — otherwise the key may include trailing unicode chars
|
|
1674
|
+
# the slow path needs to consume.
|
|
1675
|
+
s = @src
|
|
1676
|
+
n = @len
|
|
1677
|
+
start = @pos
|
|
1678
|
+
b0 = s.getbyte(start)
|
|
1679
|
+
took_fast = false
|
|
1680
|
+
if b0 && b0 < 128 && BARE_KEY_BYTE[b0]
|
|
1681
|
+
p = start + 1
|
|
1682
|
+
while p < n
|
|
1683
|
+
bb = s.getbyte(p)
|
|
1684
|
+
break unless bb && bb < 128 && BARE_KEY_BYTE[bb]
|
|
1685
|
+
p += 1
|
|
1686
|
+
end
|
|
1687
|
+
# Only commit fast path if next byte is ASCII (i.e. truly key end).
|
|
1688
|
+
nb = (p < n) ? s.getbyte(p) : nil
|
|
1689
|
+
if nb.nil? || nb < 128
|
|
1690
|
+
@pos = p
|
|
1691
|
+
key = s.byteslice(start, p - start).force_encoding(Encoding::UTF_8)
|
|
1692
|
+
took_fast = true
|
|
1693
|
+
end
|
|
1694
|
+
end
|
|
1695
|
+
key = parse_key unless took_fast
|
|
1696
|
+
raise err("expected ':' after key") if @src.getbyte(@pos) != COLON
|
|
1697
|
+
@path.push(key)
|
|
1698
|
+
flush_pending_as_leading_on_current unless @pending_leading.empty?
|
|
1699
|
+
@pos += 1 # consume ':'
|
|
1700
|
+
b = @src.getbyte(@pos)
|
|
1701
|
+
if b == SP || b == TAB
|
|
1702
|
+
@pos += 1
|
|
1703
|
+
skip_inline_ws
|
|
1704
|
+
# Only enter the comment-capture loop if we see '/' (cheap byte check).
|
|
1705
|
+
capture_inner_block_comments if @src.getbyte(@pos) == SLASH
|
|
1706
|
+
nb = @src.getbyte(@pos)
|
|
1707
|
+
if nb.nil? || nb == LF || nb == CR
|
|
1708
|
+
consume_eol
|
|
1709
|
+
skip_trivia
|
|
1710
|
+
raise err("expected indented child block") if @pos >= @len
|
|
1711
|
+
child_indent = measure_line_indent
|
|
1712
|
+
raise err("expected indented child block") if child_indent <= parent_indent
|
|
1713
|
+
v = parse_block_value(child_indent)
|
|
1714
|
+
@path.pop
|
|
1715
|
+
return [key, v]
|
|
1716
|
+
end
|
|
1717
|
+
v = parse_inline_value_or_heredoc
|
|
1718
|
+
# Fast path peek: consume optional inline ws, then if next byte is
|
|
1719
|
+
# LF we just jump past it. Anything else (including comments) falls
|
|
1720
|
+
# back to the full consume_after_value, which needs to see the ws.
|
|
1721
|
+
s2 = @src
|
|
1722
|
+
p2 = @pos
|
|
1723
|
+
while (bb = s2.getbyte(p2)) == SP || bb == TAB
|
|
1724
|
+
p2 += 1
|
|
1725
|
+
end
|
|
1726
|
+
if bb == LF
|
|
1727
|
+
@pos = p2 + 1
|
|
1728
|
+
advance_line
|
|
1729
|
+
@path.pop
|
|
1730
|
+
return [key, v]
|
|
1731
|
+
end
|
|
1732
|
+
if bb.nil?
|
|
1733
|
+
@pos = p2
|
|
1734
|
+
@path.pop
|
|
1735
|
+
return [key, v]
|
|
1736
|
+
end
|
|
1737
|
+
# leave @pos before the ws so trailing-comment whitespace check sees it
|
|
1738
|
+
consume_after_value(false)
|
|
1739
|
+
@path.pop
|
|
1740
|
+
return [key, v]
|
|
1741
|
+
end
|
|
1742
|
+
if b.nil? || b == LF || b == CR
|
|
1743
|
+
consume_eol
|
|
1744
|
+
skip_trivia
|
|
1745
|
+
raise err("expected indented child block") if @pos >= @len
|
|
1746
|
+
child_indent = measure_line_indent
|
|
1747
|
+
raise err("expected indented child block") if child_indent <= parent_indent
|
|
1748
|
+
v = parse_block_value(child_indent)
|
|
1749
|
+
@path.pop
|
|
1750
|
+
return [key, v]
|
|
1751
|
+
end
|
|
1752
|
+
raise err("expected whitespace after ':'")
|
|
1753
|
+
end
|
|
1754
|
+
|
|
1755
|
+
# ---------- Keys ----------
|
|
1756
|
+
|
|
1757
|
+
def parse_key
|
|
1758
|
+
b = @src.getbyte(@pos)
|
|
1759
|
+
if b == DQUOTE
|
|
1760
|
+
raise err("triple-quoted strings are not allowed as keys") if starts_bytes?('"""')
|
|
1761
|
+
saved = @record_forms
|
|
1762
|
+
@record_forms = false
|
|
1763
|
+
begin
|
|
1764
|
+
return parse_basic_string_value
|
|
1765
|
+
ensure
|
|
1766
|
+
@record_forms = saved
|
|
1767
|
+
end
|
|
1768
|
+
end
|
|
1769
|
+
if b == SQUOTE
|
|
1770
|
+
raise err("triple-quoted strings are not allowed as keys") if starts_bytes?("'''")
|
|
1771
|
+
saved = @record_forms
|
|
1772
|
+
@record_forms = false
|
|
1773
|
+
begin
|
|
1774
|
+
return parse_literal_string_value
|
|
1775
|
+
ensure
|
|
1776
|
+
@record_forms = saved
|
|
1777
|
+
end
|
|
1778
|
+
end
|
|
1779
|
+
raise err("expected key") if b.nil?
|
|
1780
|
+
parse_bare_key
|
|
1781
|
+
end
|
|
1782
|
+
|
|
1783
|
+
def parse_bare_key
|
|
1784
|
+
s = @src
|
|
1785
|
+
n = @len
|
|
1786
|
+
pos = @pos
|
|
1787
|
+
start = pos
|
|
1788
|
+
while pos < n
|
|
1789
|
+
b = s.getbyte(pos)
|
|
1790
|
+
if b < 128
|
|
1791
|
+
break unless BARE_KEY_BYTE[b]
|
|
1792
|
+
pos += 1
|
|
1793
|
+
else
|
|
1794
|
+
ch_len = utf8_char_len(b)
|
|
1795
|
+
ch = s.byteslice(pos, ch_len).force_encoding(Encoding::UTF_8)
|
|
1796
|
+
# SPEC §"What counts as a bare key" — UAX #31 XID_Continue.
|
|
1797
|
+
# Onigmo supports the property name natively.
|
|
1798
|
+
break unless xid_continue?(ch.ord)
|
|
1799
|
+
pos += ch_len
|
|
1800
|
+
end
|
|
1801
|
+
end
|
|
1802
|
+
raise err("expected key") if pos == start
|
|
1803
|
+
@pos = pos
|
|
1804
|
+
s.byteslice(start, pos - start).force_encoding(Encoding::UTF_8)
|
|
1805
|
+
end
|
|
1806
|
+
|
|
1807
|
+
# ---------- Value dispatch ----------
|
|
1808
|
+
|
|
1809
|
+
def capture_inner_block_comments
|
|
1810
|
+
loop do
|
|
1811
|
+
if @src.getbyte(@pos) == SLASH && @src.getbyte(@pos + 1) == STAR
|
|
1812
|
+
raw = read_c_block_comment
|
|
1813
|
+
@comments << AttachedComment.new(Comment.new(raw, :block), :inner, @path.dup.freeze) unless @lite
|
|
1814
|
+
skip_inline_ws
|
|
1815
|
+
else
|
|
1816
|
+
break
|
|
1817
|
+
end
|
|
1818
|
+
end
|
|
1819
|
+
end
|
|
1820
|
+
|
|
1821
|
+
def parse_inline_value_or_heredoc
|
|
1822
|
+
b = @src.getbyte(@pos)
|
|
1823
|
+
# Fast path: plain decimal integer. Most hot-loop benchmarks parse
|
|
1824
|
+
# millions of these, so we recognize "[0-9]+ <terminator>" inline,
|
|
1825
|
+
# skipping number_or_datetime's full lookahead/scanner setup.
|
|
1826
|
+
if b && b >= DIGIT0 && b <= DIGIT9
|
|
1827
|
+
s = @src
|
|
1828
|
+
n = @len
|
|
1829
|
+
start = @pos
|
|
1830
|
+
p = start + 1
|
|
1831
|
+
while p < n
|
|
1832
|
+
bb = s.getbyte(p)
|
|
1833
|
+
break unless bb >= DIGIT0 && bb <= DIGIT9
|
|
1834
|
+
p += 1
|
|
1835
|
+
end
|
|
1836
|
+
# If next byte is a non-numeric value terminator and the token
|
|
1837
|
+
# length is safely within i64 (<=18 digits) and not a date/time
|
|
1838
|
+
# prefix, take the fast path.
|
|
1839
|
+
len = p - start
|
|
1840
|
+
if len <= 18 && (p >= n || VALUE_TERMINATOR_BYTE[s.getbyte(p)])
|
|
1841
|
+
# Reject leading-zero on multi-digit (e.g. "012") via slow path.
|
|
1842
|
+
if !(s.getbyte(start) == DIGIT0 && len > 1)
|
|
1843
|
+
@pos = p
|
|
1844
|
+
return s.byteslice(start, len).to_i
|
|
1845
|
+
end
|
|
1846
|
+
end
|
|
1847
|
+
end
|
|
1848
|
+
case b
|
|
1849
|
+
when DQUOTE
|
|
1850
|
+
return parse_heredoc_basic if starts_bytes?('"""')
|
|
1851
|
+
return parse_basic_string_value
|
|
1852
|
+
when SQUOTE
|
|
1853
|
+
return parse_heredoc_literal if starts_bytes?("'''")
|
|
1854
|
+
v = parse_literal_string_value
|
|
1855
|
+
record_form(OriginalLiteral.string(StringForm.literal))
|
|
1856
|
+
return v
|
|
1857
|
+
when LBRACK
|
|
1858
|
+
return parse_flow_array
|
|
1859
|
+
when LBRACE
|
|
1860
|
+
return parse_flow_table
|
|
1861
|
+
when LOWER_T, LOWER_F_LETTER
|
|
1862
|
+
return parse_bool_value
|
|
1863
|
+
when 0x69 # 'i'
|
|
1864
|
+
return parse_inf_value
|
|
1865
|
+
when LOWER_N
|
|
1866
|
+
return parse_nan_value
|
|
1867
|
+
end
|
|
1868
|
+
if b && (b == PLUS || b == MINUS || (b >= DIGIT0 && b <= DIGIT9))
|
|
1869
|
+
return parse_number_or_datetime
|
|
1870
|
+
end
|
|
1871
|
+
raise err("expected value") if b.nil?
|
|
1872
|
+
raise err("unexpected character '#{b.chr}' in value")
|
|
1873
|
+
end
|
|
1874
|
+
|
|
1875
|
+
def parse_bool_value
|
|
1876
|
+
s = @src
|
|
1877
|
+
p = @pos
|
|
1878
|
+
if s.byteslice(p, 4) == "true"
|
|
1879
|
+
after = s.getbyte(p + 4)
|
|
1880
|
+
if after.nil? || VALUE_TERMINATOR_BYTE[after]
|
|
1881
|
+
@pos += 4
|
|
1882
|
+
return true
|
|
1883
|
+
end
|
|
1884
|
+
end
|
|
1885
|
+
if s.byteslice(p, 5) == "false"
|
|
1886
|
+
after = s.getbyte(p + 5)
|
|
1887
|
+
if after.nil? || VALUE_TERMINATOR_BYTE[after]
|
|
1888
|
+
@pos += 5
|
|
1889
|
+
return false
|
|
1890
|
+
end
|
|
1891
|
+
end
|
|
1892
|
+
raise err("expected value")
|
|
1893
|
+
end
|
|
1894
|
+
|
|
1895
|
+
def parse_inf_value
|
|
1896
|
+
if @src.byteslice(@pos, 3) == "inf"
|
|
1897
|
+
after = @src.getbyte(@pos + 3)
|
|
1898
|
+
if after.nil? || VALUE_TERMINATOR_BYTE[after]
|
|
1899
|
+
@pos += 3
|
|
1900
|
+
return Float::INFINITY
|
|
1901
|
+
end
|
|
1902
|
+
end
|
|
1903
|
+
raise err("expected 'inf'")
|
|
1904
|
+
end
|
|
1905
|
+
|
|
1906
|
+
def parse_nan_value
|
|
1907
|
+
if @src.byteslice(@pos, 3) == "nan"
|
|
1908
|
+
after = @src.getbyte(@pos + 3)
|
|
1909
|
+
if after.nil? || VALUE_TERMINATOR_BYTE[after]
|
|
1910
|
+
@pos += 3
|
|
1911
|
+
return Float::NAN
|
|
1912
|
+
end
|
|
1913
|
+
end
|
|
1914
|
+
raise err("expected 'nan'")
|
|
1915
|
+
end
|
|
1916
|
+
|
|
1917
|
+
# ---------- Numbers & datetimes ----------
|
|
1918
|
+
|
|
1919
|
+
def looks_like_date_prefix_at?(p)
|
|
1920
|
+
return false if p + 10 > @len
|
|
1921
|
+
s = @src
|
|
1922
|
+
return false unless DIGIT_BYTE[s.getbyte(p)]
|
|
1923
|
+
return false unless DIGIT_BYTE[s.getbyte(p + 1)]
|
|
1924
|
+
return false unless DIGIT_BYTE[s.getbyte(p + 2)]
|
|
1925
|
+
return false unless DIGIT_BYTE[s.getbyte(p + 3)]
|
|
1926
|
+
return false unless s.getbyte(p + 4) == MINUS
|
|
1927
|
+
return false unless DIGIT_BYTE[s.getbyte(p + 5)]
|
|
1928
|
+
return false unless DIGIT_BYTE[s.getbyte(p + 6)]
|
|
1929
|
+
return false unless s.getbyte(p + 7) == MINUS
|
|
1930
|
+
return false unless DIGIT_BYTE[s.getbyte(p + 8)]
|
|
1931
|
+
return false unless DIGIT_BYTE[s.getbyte(p + 9)]
|
|
1932
|
+
true
|
|
1933
|
+
end
|
|
1934
|
+
|
|
1935
|
+
def looks_like_time_prefix_at?(p)
|
|
1936
|
+
return false if p + 8 > @len
|
|
1937
|
+
s = @src
|
|
1938
|
+
return false unless DIGIT_BYTE[s.getbyte(p)]
|
|
1939
|
+
return false unless DIGIT_BYTE[s.getbyte(p + 1)]
|
|
1940
|
+
return false unless s.getbyte(p + 2) == 0x3A # ':'
|
|
1941
|
+
return false unless DIGIT_BYTE[s.getbyte(p + 3)]
|
|
1942
|
+
return false unless DIGIT_BYTE[s.getbyte(p + 4)]
|
|
1943
|
+
return false unless s.getbyte(p + 5) == 0x3A
|
|
1944
|
+
return false unless DIGIT_BYTE[s.getbyte(p + 6)]
|
|
1945
|
+
return false unless DIGIT_BYTE[s.getbyte(p + 7)]
|
|
1946
|
+
true
|
|
1947
|
+
end
|
|
1948
|
+
|
|
1949
|
+
def parse_number_or_datetime
|
|
1950
|
+
s = @src
|
|
1951
|
+
p = @pos
|
|
1952
|
+
first = s.getbyte(p)
|
|
1953
|
+
starts_sign = first == PLUS || first == MINUS
|
|
1954
|
+
if !starts_sign && looks_like_date_prefix_at?(p)
|
|
1955
|
+
return parse_datetime_value
|
|
1956
|
+
end
|
|
1957
|
+
if !starts_sign && looks_like_time_prefix_at?(p)
|
|
1958
|
+
return parse_local_time_value
|
|
1959
|
+
end
|
|
1960
|
+
if starts_sign && s.byteslice(p + 1, 3) == "inf"
|
|
1961
|
+
after = s.getbyte(p + 4)
|
|
1962
|
+
if after.nil? || VALUE_TERMINATOR_BYTE[after]
|
|
1963
|
+
neg = first == MINUS
|
|
1964
|
+
@pos += 4
|
|
1965
|
+
return neg ? -Float::INFINITY : Float::INFINITY
|
|
1966
|
+
end
|
|
1967
|
+
end
|
|
1968
|
+
tok_len, is_float = scan_number_token
|
|
1969
|
+
lex = s.byteslice(p, tok_len).force_encoding(Encoding::UTF_8)
|
|
1970
|
+
if is_float
|
|
1971
|
+
f =
|
|
1972
|
+
begin
|
|
1973
|
+
parse_float_lit(lex)
|
|
1974
|
+
rescue StandardError => e
|
|
1975
|
+
raise err("invalid float: #{lex} (#{e.message})")
|
|
1976
|
+
end
|
|
1977
|
+
@pos += tok_len
|
|
1978
|
+
return f
|
|
1979
|
+
end
|
|
1980
|
+
n =
|
|
1981
|
+
begin
|
|
1982
|
+
parse_integer_lit(lex)
|
|
1983
|
+
rescue StandardError => e
|
|
1984
|
+
raise err(e.message)
|
|
1985
|
+
end
|
|
1986
|
+
@pos += tok_len
|
|
1987
|
+
# Record original lexeme only if it differs from canonical form.
|
|
1988
|
+
# Fast cheap test: if lex contains '_', '+', or starts with '0' followed
|
|
1989
|
+
# by a non-digit (hex/oct/bin prefix marker), it's non-canonical. Simple
|
|
1990
|
+
# decimal integers like "42" / "-7" map directly to n.to_s and need no
|
|
1991
|
+
# entry; skip the to_s allocation in that common case.
|
|
1992
|
+
if @record_forms
|
|
1993
|
+
bs = lex.bytesize
|
|
1994
|
+
first = lex.getbyte(0)
|
|
1995
|
+
possibly_non_canonical =
|
|
1996
|
+
lex.include?("_") || first == PLUS ||
|
|
1997
|
+
(first == DIGIT0 && bs > 1) ||
|
|
1998
|
+
(first == MINUS && bs > 1 && lex.getbyte(1) == DIGIT0)
|
|
1999
|
+
if possibly_non_canonical && lex != n.to_s
|
|
2000
|
+
@original_forms << [@path.dup.freeze, OriginalLiteral.integer(lex)]
|
|
2001
|
+
end
|
|
2002
|
+
end
|
|
2003
|
+
n
|
|
2004
|
+
end
|
|
2005
|
+
|
|
2006
|
+
def scan_number_token
|
|
2007
|
+
s = @src
|
|
2008
|
+
n = @len
|
|
2009
|
+
i = @pos
|
|
2010
|
+
start = i
|
|
2011
|
+
first = s.getbyte(i)
|
|
2012
|
+
if first == PLUS || first == MINUS
|
|
2013
|
+
i += 1
|
|
2014
|
+
end
|
|
2015
|
+
is_prefixed = false
|
|
2016
|
+
if i + 1 < n && s.getbyte(i) == DIGIT0
|
|
2017
|
+
nb = s.getbyte(i + 1)
|
|
2018
|
+
if nb == LOWER_X || nb == LOWER_O || nb == LOWER_B
|
|
2019
|
+
is_prefixed = true
|
|
2020
|
+
end
|
|
2021
|
+
end
|
|
2022
|
+
saw_dot = false; saw_p = false; saw_e = false
|
|
2023
|
+
if is_prefixed
|
|
2024
|
+
i += 2
|
|
2025
|
+
while i < n
|
|
2026
|
+
b = s.getbyte(i)
|
|
2027
|
+
if b == UNDERSCORE || HEX_BYTE[b]
|
|
2028
|
+
i += 1
|
|
2029
|
+
elsif b == DOT && !saw_dot && !saw_p
|
|
2030
|
+
saw_dot = true; i += 1
|
|
2031
|
+
elsif b == LOWER_P && !saw_p
|
|
2032
|
+
saw_p = true; i += 1
|
|
2033
|
+
nb = s.getbyte(i)
|
|
2034
|
+
if nb == PLUS || nb == MINUS
|
|
2035
|
+
i += 1
|
|
2036
|
+
end
|
|
2037
|
+
elsif saw_p && DIGIT_BYTE[b]
|
|
2038
|
+
i += 1
|
|
2039
|
+
else
|
|
2040
|
+
break
|
|
2041
|
+
end
|
|
2042
|
+
end
|
|
2043
|
+
return [i - start, saw_dot || saw_p]
|
|
2044
|
+
end
|
|
2045
|
+
while i < n
|
|
2046
|
+
b = s.getbyte(i)
|
|
2047
|
+
if DIGIT_BYTE[b] || b == UNDERSCORE
|
|
2048
|
+
i += 1
|
|
2049
|
+
elsif b == DOT && !saw_dot && !saw_e
|
|
2050
|
+
saw_dot = true; i += 1
|
|
2051
|
+
elsif (b == LOWER_E || b == UPPER_E) && !saw_e
|
|
2052
|
+
saw_e = true; i += 1
|
|
2053
|
+
nb = s.getbyte(i)
|
|
2054
|
+
if nb == PLUS || nb == MINUS
|
|
2055
|
+
i += 1
|
|
2056
|
+
end
|
|
2057
|
+
else
|
|
2058
|
+
break
|
|
2059
|
+
end
|
|
2060
|
+
end
|
|
2061
|
+
[i - start, saw_dot || saw_e]
|
|
2062
|
+
end
|
|
2063
|
+
|
|
2064
|
+
# ---------- Numeric helpers ----------
|
|
2065
|
+
|
|
2066
|
+
def valid_underscores?(s)
|
|
2067
|
+
return true if s.empty?
|
|
2068
|
+
return false if s.start_with?("_") || s.end_with?("_")
|
|
2069
|
+
prev_us = false
|
|
2070
|
+
i = 0
|
|
2071
|
+
n = s.bytesize
|
|
2072
|
+
while i < n
|
|
2073
|
+
b = s.getbyte(i)
|
|
2074
|
+
if b == UNDERSCORE
|
|
2075
|
+
return false if prev_us
|
|
2076
|
+
prev_us = true
|
|
2077
|
+
else
|
|
2078
|
+
prev_us = false
|
|
2079
|
+
end
|
|
2080
|
+
i += 1
|
|
2081
|
+
end
|
|
2082
|
+
true
|
|
2083
|
+
end
|
|
2084
|
+
|
|
2085
|
+
INT64_MIN = -(2**63)
|
|
2086
|
+
INT64_MAX = 2**63 - 1
|
|
2087
|
+
|
|
2088
|
+
def parse_integer_lit(s)
|
|
2089
|
+
# Fast path: pure decimal digits, no underscore, no sign or just leading '-',
|
|
2090
|
+
# length such that no overflow check is needed (<=18 digits for unsigned,
|
|
2091
|
+
# <=19 with leading minus). Skip the per-char validation loop.
|
|
2092
|
+
bs = s.bytesize
|
|
2093
|
+
if bs > 0
|
|
2094
|
+
first = s.getbyte(0)
|
|
2095
|
+
first_digit_idx = (first == MINUS) ? 1 : 0
|
|
2096
|
+
digit_count = bs - first_digit_idx
|
|
2097
|
+
# 18 digits never overflow signed i64 (max 9.22e18); negatives same.
|
|
2098
|
+
if digit_count > 0 && digit_count <= 18
|
|
2099
|
+
fc = s.getbyte(first_digit_idx)
|
|
2100
|
+
if fc >= DIGIT0 && fc <= DIGIT9
|
|
2101
|
+
ok = true
|
|
2102
|
+
i = first_digit_idx + 1
|
|
2103
|
+
while i < bs
|
|
2104
|
+
b = s.getbyte(i)
|
|
2105
|
+
unless b >= DIGIT0 && b <= DIGIT9
|
|
2106
|
+
ok = false
|
|
2107
|
+
break
|
|
2108
|
+
end
|
|
2109
|
+
i += 1
|
|
2110
|
+
end
|
|
2111
|
+
if ok
|
|
2112
|
+
if fc == DIGIT0 && digit_count > 1
|
|
2113
|
+
raise "leading zeros are not allowed on decimal integers"
|
|
2114
|
+
end
|
|
2115
|
+
return s.to_i
|
|
2116
|
+
end
|
|
2117
|
+
end
|
|
2118
|
+
end
|
|
2119
|
+
end
|
|
2120
|
+
# Slow path: full parser for hex/oct/bin/underscored/edge cases.
|
|
2121
|
+
if s.start_with?("-")
|
|
2122
|
+
sign = -1; rest = s[1..]
|
|
2123
|
+
elsif s.start_with?("+")
|
|
2124
|
+
sign = 1; rest = s[1..]
|
|
2125
|
+
else
|
|
2126
|
+
sign = 1; rest = s
|
|
2127
|
+
end
|
|
2128
|
+
raise "hex prefix must be lowercase '0x'" if rest.start_with?("0X")
|
|
2129
|
+
if rest.start_with?("0x")
|
|
2130
|
+
radix = 16; body = rest[2..]
|
|
2131
|
+
elsif rest.start_with?("0o")
|
|
2132
|
+
radix = 8; body = rest[2..]
|
|
2133
|
+
elsif rest.start_with?("0b")
|
|
2134
|
+
radix = 2; body = rest[2..]
|
|
2135
|
+
else
|
|
2136
|
+
radix = 10; body = rest
|
|
2137
|
+
end
|
|
2138
|
+
raise "empty number" if body.empty?
|
|
2139
|
+
raise "underscore must be between digits" if body.start_with?("_") || body.end_with?("_")
|
|
2140
|
+
if radix == 10 && rest.length > 1 && rest.start_with?("0")
|
|
2141
|
+
raise "leading zeros are not allowed on decimal integers"
|
|
2142
|
+
end
|
|
2143
|
+
digit_chars = "0123456789abcdef"[0, radix]
|
|
2144
|
+
clean = +""
|
|
2145
|
+
prev_is_digit = false
|
|
2146
|
+
body.each_char do |c|
|
|
2147
|
+
if c == "_"
|
|
2148
|
+
raise "underscore must be between digits" unless prev_is_digit
|
|
2149
|
+
prev_is_digit = false
|
|
2150
|
+
else
|
|
2151
|
+
unless digit_chars.include?(c.downcase)
|
|
2152
|
+
raise "invalid digit '#{c}' for base #{radix}"
|
|
2153
|
+
end
|
|
2154
|
+
clean << c
|
|
2155
|
+
prev_is_digit = true
|
|
2156
|
+
end
|
|
2157
|
+
end
|
|
2158
|
+
raise "underscore must be between digits" unless prev_is_digit
|
|
2159
|
+
n = sign * clean.to_i(radix)
|
|
2160
|
+
raise "integer out of i64 range" if n < INT64_MIN || n > INT64_MAX
|
|
2161
|
+
n
|
|
2162
|
+
end
|
|
2163
|
+
|
|
2164
|
+
def parse_float_lit(s)
|
|
2165
|
+
if s.start_with?("-")
|
|
2166
|
+
sign = -1.0; rest = s[1..]
|
|
2167
|
+
elsif s.start_with?("+")
|
|
2168
|
+
sign = 1.0; rest = s[1..]
|
|
2169
|
+
else
|
|
2170
|
+
sign = 1.0; rest = s
|
|
2171
|
+
end
|
|
2172
|
+
v =
|
|
2173
|
+
if rest.start_with?("0x") || rest.start_with?("0o") || rest.start_with?("0b")
|
|
2174
|
+
parse_nondec_float(rest)
|
|
2175
|
+
else
|
|
2176
|
+
parse_dec_float(rest)
|
|
2177
|
+
end
|
|
2178
|
+
sign * v
|
|
2179
|
+
end
|
|
2180
|
+
|
|
2181
|
+
def parse_dec_float(s)
|
|
2182
|
+
e_idx = nil
|
|
2183
|
+
s.each_char.with_index do |c, i|
|
|
2184
|
+
if c == "e" || c == "E"
|
|
2185
|
+
e_idx = i
|
|
2186
|
+
break
|
|
2187
|
+
end
|
|
2188
|
+
end
|
|
2189
|
+
m = e_idx.nil? ? s : s[0, e_idx]
|
|
2190
|
+
e = e_idx.nil? ? nil : s[e_idx + 1..]
|
|
2191
|
+
raise "decimal float requires '.'" unless m.include?(".")
|
|
2192
|
+
ip, fp = m.split(".", 2)
|
|
2193
|
+
raise "decimal float requires digit on both sides of '.'" if ip.empty? || fp.nil? || fp.empty?
|
|
2194
|
+
raise "invalid character in mantissa" unless ip.each_char.all? { |c| c == "_" || (c >= "0" && c <= "9") }
|
|
2195
|
+
raise "invalid character in mantissa" unless fp.each_char.all? { |c| c == "_" || (c >= "0" && c <= "9") }
|
|
2196
|
+
raise "bad underscore in mantissa" unless valid_underscores?(ip) && valid_underscores?(fp)
|
|
2197
|
+
full = ip.delete("_") + "." + fp.delete("_")
|
|
2198
|
+
if e
|
|
2199
|
+
es_clean = e.sub(/^[+-]/, "")
|
|
2200
|
+
raise "underscore not allowed in exponent" if es_clean.include?("_")
|
|
2201
|
+
raise "invalid character in exponent" unless e.each_char.all? { |c| c == "+" || c == "-" || (c >= "0" && c <= "9") }
|
|
2202
|
+
raise "empty exponent" if es_clean.empty?
|
|
2203
|
+
full = "#{full}e#{e}"
|
|
2204
|
+
end
|
|
2205
|
+
Float(full)
|
|
2206
|
+
end
|
|
2207
|
+
|
|
2208
|
+
def parse_nondec_float(s)
|
|
2209
|
+
if s.start_with?("0x")
|
|
2210
|
+
radix = 16; rest = s[2..]
|
|
2211
|
+
elsif s.start_with?("0o")
|
|
2212
|
+
radix = 8; rest = s[2..]
|
|
2213
|
+
elsif s.start_with?("0b")
|
|
2214
|
+
radix = 2; rest = s[2..]
|
|
2215
|
+
else
|
|
2216
|
+
raise "non-decimal float prefix required"
|
|
2217
|
+
end
|
|
2218
|
+
p_idx = rest.index("p")
|
|
2219
|
+
raise "non-decimal float requires 'p' exponent" if p_idx.nil?
|
|
2220
|
+
mant = rest[0, p_idx]
|
|
2221
|
+
exp_str = rest[p_idx + 1..]
|
|
2222
|
+
raise "empty exponent" if exp_str.nil? || exp_str.empty?
|
|
2223
|
+
raise "underscore not allowed in exponent" if exp_str.include?("_")
|
|
2224
|
+
raise "invalid exponent character" unless exp_str.each_char.all? { |c| c == "+" || c == "-" || (c >= "0" && c <= "9") }
|
|
2225
|
+
exp = Integer(exp_str)
|
|
2226
|
+
if mant.include?(".")
|
|
2227
|
+
ip, fp = mant.split(".", 2)
|
|
2228
|
+
raise "digit required on both sides of '.'" if ip.empty? || fp.nil? || fp.empty?
|
|
2229
|
+
else
|
|
2230
|
+
ip = mant; fp = ""
|
|
2231
|
+
end
|
|
2232
|
+
raise "bad underscore in mantissa" unless valid_underscores?(ip) && valid_underscores?(fp)
|
|
2233
|
+
ip_clean = ip.delete("_")
|
|
2234
|
+
fp_clean = fp.delete("_")
|
|
2235
|
+
digit_chars = "0123456789abcdef"[0, radix]
|
|
2236
|
+
raise "invalid digit for base #{radix}" unless ip_clean.each_char.all? { |c| digit_chars.include?(c.downcase) }
|
|
2237
|
+
raise "invalid digit for base #{radix}" unless fp_clean.each_char.all? { |c| digit_chars.include?(c.downcase) }
|
|
2238
|
+
int_val = ip_clean.empty? ? 0 : ip_clean.to_i(radix)
|
|
2239
|
+
frac_val = 0.0
|
|
2240
|
+
div = radix.to_f
|
|
2241
|
+
fp_clean.each_char do |c|
|
|
2242
|
+
d = c.to_i(radix)
|
|
2243
|
+
frac_val += d / div
|
|
2244
|
+
div *= radix
|
|
2245
|
+
end
|
|
2246
|
+
(int_val + frac_val) * (2.0 ** exp)
|
|
2247
|
+
end
|
|
2248
|
+
|
|
2249
|
+
def days_in_month(y, m)
|
|
2250
|
+
case m
|
|
2251
|
+
when 1, 3, 5, 7, 8, 10, 12 then 31
|
|
2252
|
+
when 4, 6, 9, 11 then 30
|
|
2253
|
+
when 2
|
|
2254
|
+
leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0
|
|
2255
|
+
leap ? 29 : 28
|
|
2256
|
+
else 0
|
|
2257
|
+
end
|
|
2258
|
+
end
|
|
2259
|
+
|
|
2260
|
+
def validate_date(s)
|
|
2261
|
+
raise "invalid date format" if s.length != 10 || s[4] != "-" || s[7] != "-"
|
|
2262
|
+
[0, 1, 2, 3, 5, 6, 8, 9].each do |i|
|
|
2263
|
+
c = s[i]
|
|
2264
|
+
raise "date must be all digits" unless c >= "0" && c <= "9"
|
|
2265
|
+
end
|
|
2266
|
+
y = s[0, 4].to_i; m = s[5, 2].to_i; d = s[8, 2].to_i
|
|
2267
|
+
raise "month out of range" unless m.between?(1, 12)
|
|
2268
|
+
raise "day out of range" unless d.between?(1, days_in_month(y, m))
|
|
2269
|
+
end
|
|
2270
|
+
|
|
2271
|
+
def validate_time(s)
|
|
2272
|
+
raise "invalid time format" if s.length != 8 || s[2] != ":" || s[5] != ":"
|
|
2273
|
+
[0, 1, 3, 4, 6, 7].each do |i|
|
|
2274
|
+
c = s[i]
|
|
2275
|
+
raise "time must be all digits" unless c >= "0" && c <= "9"
|
|
2276
|
+
end
|
|
2277
|
+
h = s[0, 2].to_i; m = s[3, 2].to_i; sec = s[6, 2].to_i
|
|
2278
|
+
raise "hour out of range" if h > 23
|
|
2279
|
+
raise "minute out of range" if m > 59
|
|
2280
|
+
raise "second out of range (leap seconds not supported)" if sec > 59
|
|
2281
|
+
end
|
|
2282
|
+
|
|
2283
|
+
def parse_datetime_value
|
|
2284
|
+
rest = @src.byteslice(@pos, @len - @pos).force_encoding(Encoding::UTF_8)
|
|
2285
|
+
date = rest[0, 10]
|
|
2286
|
+
begin
|
|
2287
|
+
validate_date(date)
|
|
2288
|
+
rescue StandardError => e
|
|
2289
|
+
raise err(e.message)
|
|
2290
|
+
end
|
|
2291
|
+
rest2 = rest[10..]
|
|
2292
|
+
if !rest2.start_with?("T") && !rest2.start_with?(" ")
|
|
2293
|
+
if rest2.start_with?("t")
|
|
2294
|
+
raise err("date and time separator must be uppercase 'T' (lowercase 't' not permitted)")
|
|
2295
|
+
end
|
|
2296
|
+
after = rest2[0]
|
|
2297
|
+
unless after.nil? || after == " " || after == "\t" || after == "\n" || after == "\r" ||
|
|
2298
|
+
after == "#" || after == "/" || after == "," || after == "]" || after == "}"
|
|
2299
|
+
raise err("invalid character after date")
|
|
2300
|
+
end
|
|
2301
|
+
@pos += 10
|
|
2302
|
+
return LocalDate.new(date)
|
|
2303
|
+
end
|
|
2304
|
+
if rest2.start_with?(" ")
|
|
2305
|
+
i = 0
|
|
2306
|
+
while i < rest2.length && (rest2[i] == " " || rest2[i] == "\t")
|
|
2307
|
+
i += 1
|
|
2308
|
+
end
|
|
2309
|
+
if i < rest2.length && rest2[i] >= "0" && rest2[i] <= "9"
|
|
2310
|
+
raise err("date and time must be separated by 'T' (space not permitted)")
|
|
2311
|
+
end
|
|
2312
|
+
@pos += 10
|
|
2313
|
+
return LocalDate.new(date)
|
|
2314
|
+
end
|
|
2315
|
+
after_t = rest2[1..]
|
|
2316
|
+
raise err("expected HH:MM:SS after 'T'") unless looks_like_time_str?(after_t)
|
|
2317
|
+
time_str = after_t[0, 8]
|
|
2318
|
+
begin
|
|
2319
|
+
validate_time(time_str)
|
|
2320
|
+
rescue StandardError => e
|
|
2321
|
+
raise err(e.message)
|
|
2322
|
+
end
|
|
2323
|
+
consumed = 10 + 1 + 8
|
|
2324
|
+
after_time = rest[consumed..]
|
|
2325
|
+
frac_len = 0
|
|
2326
|
+
if after_time.start_with?(".")
|
|
2327
|
+
k = 1
|
|
2328
|
+
while k < after_time.length && after_time[k] >= "0" && after_time[k] <= "9"
|
|
2329
|
+
k += 1
|
|
2330
|
+
end
|
|
2331
|
+
digits = k - 1
|
|
2332
|
+
raise err("expected fractional digits after '.'") if digits == 0
|
|
2333
|
+
raise err("fractional seconds limited to 9 digits (nanosecond precision)") if digits > 9
|
|
2334
|
+
frac_len = k
|
|
2335
|
+
end
|
|
2336
|
+
consumed += frac_len
|
|
2337
|
+
after_frac = rest[consumed..]
|
|
2338
|
+
if after_frac.start_with?("Z") || after_frac.start_with?("z")
|
|
2339
|
+
consumed += 1
|
|
2340
|
+
s = rest[0, consumed]
|
|
2341
|
+
@pos += consumed
|
|
2342
|
+
return OffsetDateTime.new(s)
|
|
2343
|
+
end
|
|
2344
|
+
if after_frac.start_with?("+") || after_frac.start_with?("-")
|
|
2345
|
+
if after_frac.length < 6 ||
|
|
2346
|
+
!(after_frac[1] >= "0" && after_frac[1] <= "9") ||
|
|
2347
|
+
!(after_frac[2] >= "0" && after_frac[2] <= "9") ||
|
|
2348
|
+
after_frac[3] != ":" ||
|
|
2349
|
+
!(after_frac[4] >= "0" && after_frac[4] <= "9") ||
|
|
2350
|
+
!(after_frac[5] >= "0" && after_frac[5] <= "9")
|
|
2351
|
+
raise err("invalid offset; expected ±HH:MM")
|
|
2352
|
+
end
|
|
2353
|
+
oh = after_frac[1, 2].to_i
|
|
2354
|
+
om = after_frac[4, 2].to_i
|
|
2355
|
+
raise err("offset out of range") if oh > 23 || om > 59
|
|
2356
|
+
consumed += 6
|
|
2357
|
+
s = rest[0, consumed]
|
|
2358
|
+
@pos += consumed
|
|
2359
|
+
return OffsetDateTime.new(s)
|
|
2360
|
+
end
|
|
2361
|
+
after = after_frac[0]
|
|
2362
|
+
unless after.nil? || after == " " || after == "\t" || after == "\n" || after == "\r" ||
|
|
2363
|
+
after == "#" || after == "/" || after == "," || after == "]" || after == "}"
|
|
2364
|
+
raise err("invalid character after datetime")
|
|
2365
|
+
end
|
|
2366
|
+
s = rest[0, consumed]
|
|
2367
|
+
@pos += consumed
|
|
2368
|
+
LocalDateTime.new(s)
|
|
2369
|
+
end
|
|
2370
|
+
|
|
2371
|
+
def looks_like_time_str?(s)
|
|
2372
|
+
return false if s.length < 8
|
|
2373
|
+
s[0] >= "0" && s[0] <= "9" && s[1] >= "0" && s[1] <= "9" && s[2] == ":" &&
|
|
2374
|
+
s[3] >= "0" && s[3] <= "9" && s[4] >= "0" && s[4] <= "9" && s[5] == ":" &&
|
|
2375
|
+
s[6] >= "0" && s[6] <= "9" && s[7] >= "0" && s[7] <= "9"
|
|
2376
|
+
end
|
|
2377
|
+
|
|
2378
|
+
def parse_local_time_value
|
|
2379
|
+
rest = @src.byteslice(@pos, @len - @pos).force_encoding(Encoding::UTF_8)
|
|
2380
|
+
time_str = rest[0, 8]
|
|
2381
|
+
begin
|
|
2382
|
+
validate_time(time_str)
|
|
2383
|
+
rescue StandardError => e
|
|
2384
|
+
raise err(e.message)
|
|
2385
|
+
end
|
|
2386
|
+
consumed = 8
|
|
2387
|
+
after = rest[consumed..]
|
|
2388
|
+
if after.start_with?(".")
|
|
2389
|
+
k = 1
|
|
2390
|
+
while k < after.length && after[k] >= "0" && after[k] <= "9"
|
|
2391
|
+
k += 1
|
|
2392
|
+
end
|
|
2393
|
+
digits = k - 1
|
|
2394
|
+
raise err("expected fractional digits after '.'") if digits == 0
|
|
2395
|
+
raise err("fractional seconds limited to 9 digits") if digits > 9
|
|
2396
|
+
consumed += k
|
|
2397
|
+
end
|
|
2398
|
+
after2 = rest[consumed..]
|
|
2399
|
+
nxt = after2[0]
|
|
2400
|
+
unless nxt.nil? || nxt == " " || nxt == "\t" || nxt == "\n" || nxt == "\r" ||
|
|
2401
|
+
nxt == "#" || nxt == "/" || nxt == "," || nxt == "]" || nxt == "}"
|
|
2402
|
+
raise err("invalid character after time")
|
|
2403
|
+
end
|
|
2404
|
+
s = rest[0, consumed]
|
|
2405
|
+
@pos += consumed
|
|
2406
|
+
LocalTime.new(s)
|
|
2407
|
+
end
|
|
2408
|
+
|
|
2409
|
+
# ---------- Strings ----------
|
|
2410
|
+
|
|
2411
|
+
def parse_basic_string_value
|
|
2412
|
+
sl = @line; sls = @line_start; sp = @pos
|
|
2413
|
+
@pos += 1 # opening "
|
|
2414
|
+
out = +""
|
|
2415
|
+
out.force_encoding(Encoding::UTF_8)
|
|
2416
|
+
s = @src
|
|
2417
|
+
n = @len
|
|
2418
|
+
run_start = @pos
|
|
2419
|
+
loop do
|
|
2420
|
+
b = s.getbyte(@pos)
|
|
2421
|
+
if b.nil?
|
|
2422
|
+
raise err_at(sl, sls, sp, "unterminated string")
|
|
2423
|
+
elsif b == LF || b == CR
|
|
2424
|
+
raise err("strings cannot span lines")
|
|
2425
|
+
elsif b == DQUOTE
|
|
2426
|
+
out << s.byteslice(run_start, @pos - run_start).force_encoding(Encoding::UTF_8) if @pos > run_start
|
|
2427
|
+
@pos += 1
|
|
2428
|
+
# NFC re-normalize after escape decoding (only needed if escapes used).
|
|
2429
|
+
return out.ascii_only? ? out : out.unicode_normalize(:nfc)
|
|
2430
|
+
elsif b == BACKSLASH
|
|
2431
|
+
out << s.byteslice(run_start, @pos - run_start).force_encoding(Encoding::UTF_8) if @pos > run_start
|
|
2432
|
+
@pos += 1
|
|
2433
|
+
esc = s.getbyte(@pos)
|
|
2434
|
+
@pos += 1 unless esc.nil?
|
|
2435
|
+
case esc
|
|
2436
|
+
when DQUOTE then out << '"'
|
|
2437
|
+
when BACKSLASH then out << "\\"
|
|
2438
|
+
when LOWER_N then out << "\n"
|
|
2439
|
+
when LOWER_T then out << "\t"
|
|
2440
|
+
when LOWER_R then out << "\r"
|
|
2441
|
+
when 0x62 then out << "\b"
|
|
2442
|
+
when LOWER_F_LETTER then out << "\f"
|
|
2443
|
+
when LOWER_U then out << read_hex_codepoint(4)
|
|
2444
|
+
when UPPER_U then out << read_hex_codepoint(8)
|
|
2445
|
+
when nil then raise err("unterminated escape")
|
|
2446
|
+
else raise err("invalid escape '\\#{esc.chr}'")
|
|
2447
|
+
end
|
|
2448
|
+
run_start = @pos
|
|
2449
|
+
else
|
|
2450
|
+
@pos += 1
|
|
2451
|
+
end
|
|
2452
|
+
end
|
|
2453
|
+
end
|
|
2454
|
+
|
|
2455
|
+
def parse_literal_string_value
|
|
2456
|
+
sl = @line; sls = @line_start; sp = @pos
|
|
2457
|
+
@pos += 1 # opening '
|
|
2458
|
+
start = @pos
|
|
2459
|
+
s = @src
|
|
2460
|
+
n = @len
|
|
2461
|
+
while @pos < n
|
|
2462
|
+
b = s.getbyte(@pos)
|
|
2463
|
+
if b == SQUOTE
|
|
2464
|
+
out = s.byteslice(start, @pos - start).force_encoding(Encoding::UTF_8)
|
|
2465
|
+
@pos += 1
|
|
2466
|
+
return out
|
|
2467
|
+
end
|
|
2468
|
+
if b == LF || b == CR
|
|
2469
|
+
raise err("strings cannot span lines")
|
|
2470
|
+
end
|
|
2471
|
+
@pos += 1
|
|
2472
|
+
end
|
|
2473
|
+
raise err_at(sl, sls, sp, "unterminated string")
|
|
2474
|
+
end
|
|
2475
|
+
|
|
2476
|
+
def read_hex_codepoint(n)
|
|
2477
|
+
s = @src
|
|
2478
|
+
return raise(err("expected #{n} hex digits in unicode escape")) if @pos + n > @len
|
|
2479
|
+
hex_str = s.byteslice(@pos, n).force_encoding(Encoding::UTF_8)
|
|
2480
|
+
i = 0
|
|
2481
|
+
while i < n
|
|
2482
|
+
b = s.getbyte(@pos + i)
|
|
2483
|
+
unless HEX_BYTE[b]
|
|
2484
|
+
raise err("invalid hex in unicode escape: #{hex_str}")
|
|
2485
|
+
end
|
|
2486
|
+
i += 1
|
|
2487
|
+
end
|
|
2488
|
+
v = hex_str.to_i(16)
|
|
2489
|
+
@pos += n
|
|
2490
|
+
# SPEC: U+0000 is forbidden anywhere in DMS source, including via
|
|
2491
|
+
# escape decoding. `` / `\U00000000` must not slip through.
|
|
2492
|
+
if v == 0
|
|
2493
|
+
raise err("\\u0000 escape forbidden")
|
|
2494
|
+
end
|
|
2495
|
+
if v >= 0xD800 && v <= 0xDFFF
|
|
2496
|
+
raise err(format("surrogate codepoint U+%04X in escape", v))
|
|
2497
|
+
end
|
|
2498
|
+
begin
|
|
2499
|
+
v.chr(Encoding::UTF_8)
|
|
2500
|
+
rescue RangeError
|
|
2501
|
+
raise err("unicode escape is not a scalar value")
|
|
2502
|
+
end
|
|
2503
|
+
end
|
|
2504
|
+
|
|
2505
|
+
# ---------- Heredocs ----------
|
|
2506
|
+
|
|
2507
|
+
HBody = Struct.new(:lines, :strip_depth)
|
|
2508
|
+
|
|
2509
|
+
def parse_heredoc_basic
|
|
2510
|
+
@pos += 3
|
|
2511
|
+
label = parse_heredoc_label
|
|
2512
|
+
modifiers = parse_heredoc_modifiers
|
|
2513
|
+
skip_inline_ws
|
|
2514
|
+
raise err("heredoc opener must be followed by end of line") unless consume_eol || eof?
|
|
2515
|
+
terminator = label.empty? ? '"""' : label
|
|
2516
|
+
body = collect_heredoc_body(terminator)
|
|
2517
|
+
# SPEC §basic-string escapes: surrogate codepoints (U+D800..U+DFFF)
|
|
2518
|
+
# are not valid Unicode scalars and are a parse error in `\uXXXX` /
|
|
2519
|
+
# `\UXXXXXXXX` escapes. Basic-heredoc bodies process the same
|
|
2520
|
+
# escapes as basic strings, so apply the same rejection here.
|
|
2521
|
+
validate_heredoc_basic_surrogates(body)
|
|
2522
|
+
stripped = strip_indent_and_continuations(body, true)
|
|
2523
|
+
result =
|
|
2524
|
+
begin
|
|
2525
|
+
apply_modifiers(stripped, modifiers)
|
|
2526
|
+
rescue StandardError => e
|
|
2527
|
+
raise err(e.message)
|
|
2528
|
+
end
|
|
2529
|
+
label_opt = label.empty? ? nil : label
|
|
2530
|
+
calls = modifiers.map { |m| HeredocModifierCall.new(m[:name], m[:args]) }
|
|
2531
|
+
record_form(OriginalLiteral.string(StringForm.heredoc(:basic_triple, label_opt, calls)))
|
|
2532
|
+
result.ascii_only? ? result : result.unicode_normalize(:nfc)
|
|
2533
|
+
end
|
|
2534
|
+
|
|
2535
|
+
# SPEC §basic-string escapes: a `\uXXXX` / `\UXXXXXXXX` escape whose
|
|
2536
|
+
# decoded value falls in the surrogate range U+D800..U+DFFF is a
|
|
2537
|
+
# parse error. Basic-string lexer enforces it inline; heredoc bodies
|
|
2538
|
+
# are collected raw, so we scan the body for surrogate escapes here.
|
|
2539
|
+
def validate_heredoc_basic_surrogates(body)
|
|
2540
|
+
body.lines.each do |text, line_no, line_start|
|
|
2541
|
+
bytes = text.b
|
|
2542
|
+
i = 0
|
|
2543
|
+
len = bytes.bytesize
|
|
2544
|
+
while i < len
|
|
2545
|
+
if bytes.getbyte(i) == BACKSLASH
|
|
2546
|
+
j = i
|
|
2547
|
+
while j < len && bytes.getbyte(j) == BACKSLASH
|
|
2548
|
+
j += 1
|
|
2549
|
+
end
|
|
2550
|
+
run = j - i
|
|
2551
|
+
if run.odd? && j < len
|
|
2552
|
+
intro = bytes.getbyte(j)
|
|
2553
|
+
n = if intro == LOWER_U then 4
|
|
2554
|
+
elsif intro == UPPER_U then 8
|
|
2555
|
+
else 0
|
|
2556
|
+
end
|
|
2557
|
+
if n > 0 && j + 1 + n <= len
|
|
2558
|
+
hex = bytes.byteslice(j + 1, n)
|
|
2559
|
+
ok = true
|
|
2560
|
+
k = 0
|
|
2561
|
+
while k < n
|
|
2562
|
+
unless HEX_BYTE[hex.getbyte(k)]
|
|
2563
|
+
ok = false
|
|
2564
|
+
break
|
|
2565
|
+
end
|
|
2566
|
+
k += 1
|
|
2567
|
+
end
|
|
2568
|
+
if ok
|
|
2569
|
+
cp = hex.to_i(16)
|
|
2570
|
+
if cp >= 0xD800 && cp <= 0xDFFF
|
|
2571
|
+
esc_off = j - 1
|
|
2572
|
+
column = esc_off + 1
|
|
2573
|
+
raise DecodeError.new(line_no, column,
|
|
2574
|
+
format("surrogate codepoint U+%04X in escape", cp))
|
|
2575
|
+
end
|
|
2576
|
+
end
|
|
2577
|
+
end
|
|
2578
|
+
end
|
|
2579
|
+
i = j
|
|
2580
|
+
else
|
|
2581
|
+
i += 1
|
|
2582
|
+
end
|
|
2583
|
+
end
|
|
2584
|
+
end
|
|
2585
|
+
end
|
|
2586
|
+
|
|
2587
|
+
def parse_heredoc_literal
|
|
2588
|
+
@pos += 3
|
|
2589
|
+
label = parse_heredoc_label
|
|
2590
|
+
modifiers = parse_heredoc_modifiers
|
|
2591
|
+
skip_inline_ws
|
|
2592
|
+
raise err("heredoc opener must be followed by end of line") unless consume_eol || eof?
|
|
2593
|
+
terminator = label.empty? ? "'''" : label
|
|
2594
|
+
body = collect_heredoc_body(terminator)
|
|
2595
|
+
stripped = strip_indent_and_continuations(body, false)
|
|
2596
|
+
result =
|
|
2597
|
+
begin
|
|
2598
|
+
apply_modifiers(stripped, modifiers)
|
|
2599
|
+
rescue StandardError => e
|
|
2600
|
+
raise err(e.message)
|
|
2601
|
+
end
|
|
2602
|
+
label_opt = label.empty? ? nil : label
|
|
2603
|
+
calls = modifiers.map { |m| HeredocModifierCall.new(m[:name], m[:args]) }
|
|
2604
|
+
record_form(OriginalLiteral.string(StringForm.heredoc(:literal_triple, label_opt, calls)))
|
|
2605
|
+
result
|
|
2606
|
+
end
|
|
2607
|
+
|
|
2608
|
+
def parse_heredoc_label
|
|
2609
|
+
b = @src.getbyte(@pos)
|
|
2610
|
+
return "" if b.nil? || !LABEL_START_BYTE[b]
|
|
2611
|
+
start = @pos
|
|
2612
|
+
while (bb = @src.getbyte(@pos)) && LABEL_CONT_BYTE[bb]
|
|
2613
|
+
@pos += 1
|
|
2614
|
+
end
|
|
2615
|
+
@src.byteslice(start, @pos - start).force_encoding(Encoding::UTF_8)
|
|
2616
|
+
end
|
|
2617
|
+
|
|
2618
|
+
def parse_heredoc_modifiers
|
|
2619
|
+
mods = []
|
|
2620
|
+
loop do
|
|
2621
|
+
ws_start = @pos
|
|
2622
|
+
skip_inline_ws
|
|
2623
|
+
had_ws = @pos > ws_start
|
|
2624
|
+
b = @src.getbyte(@pos)
|
|
2625
|
+
if b && LABEL_START_BYTE[b]
|
|
2626
|
+
raise err("modifier must be preceded by whitespace") unless had_ws
|
|
2627
|
+
mods << parse_one_modifier
|
|
2628
|
+
else
|
|
2629
|
+
@pos = ws_start
|
|
2630
|
+
return mods
|
|
2631
|
+
end
|
|
2632
|
+
end
|
|
2633
|
+
end
|
|
2634
|
+
|
|
2635
|
+
def parse_one_modifier
|
|
2636
|
+
ns = @pos
|
|
2637
|
+
while (b = @src.getbyte(@pos)) && LABEL_CONT_BYTE[b]
|
|
2638
|
+
@pos += 1
|
|
2639
|
+
end
|
|
2640
|
+
name = @src.byteslice(ns, @pos - ns).force_encoding(Encoding::UTF_8)
|
|
2641
|
+
raise err("modifiers require parentheses") if @src.getbyte(@pos) != 0x28 # '('
|
|
2642
|
+
@pos += 1
|
|
2643
|
+
saved = @record_forms
|
|
2644
|
+
@record_forms = false
|
|
2645
|
+
begin
|
|
2646
|
+
args = parse_modifier_call_args
|
|
2647
|
+
ensure
|
|
2648
|
+
@record_forms = saved
|
|
2649
|
+
end
|
|
2650
|
+
{ name: name, args: args }
|
|
2651
|
+
end
|
|
2652
|
+
|
|
2653
|
+
def parse_modifier_call_args
|
|
2654
|
+
args = []
|
|
2655
|
+
loop do
|
|
2656
|
+
skip_inline_ws
|
|
2657
|
+
b = @src.getbyte(@pos)
|
|
2658
|
+
if b == 0x29 # ')'
|
|
2659
|
+
@pos += 1
|
|
2660
|
+
return args
|
|
2661
|
+
end
|
|
2662
|
+
raise err("expected ',' or ')' in modifier args") if b.nil?
|
|
2663
|
+
v = parse_inline_value_or_heredoc
|
|
2664
|
+
args << v
|
|
2665
|
+
skip_inline_ws
|
|
2666
|
+
b = @src.getbyte(@pos)
|
|
2667
|
+
if b == COMMA
|
|
2668
|
+
@pos += 1
|
|
2669
|
+
elsif b == 0x29
|
|
2670
|
+
@pos += 1
|
|
2671
|
+
return args
|
|
2672
|
+
else
|
|
2673
|
+
raise err("expected ',' or ')' in modifier args")
|
|
2674
|
+
end
|
|
2675
|
+
end
|
|
2676
|
+
end
|
|
2677
|
+
|
|
2678
|
+
def collect_heredoc_body(terminator)
|
|
2679
|
+
lines = []
|
|
2680
|
+
sl = @line; sls = @line_start; sp = @pos
|
|
2681
|
+
s = @src
|
|
2682
|
+
n = @len
|
|
2683
|
+
loop do
|
|
2684
|
+
if @pos >= n
|
|
2685
|
+
raise err_at(sl, sls, sp, "unterminated heredoc")
|
|
2686
|
+
end
|
|
2687
|
+
line_begin = @pos
|
|
2688
|
+
while @pos < n
|
|
2689
|
+
b = s.getbyte(@pos)
|
|
2690
|
+
break if b == LF || b == CR
|
|
2691
|
+
@pos += 1
|
|
2692
|
+
end
|
|
2693
|
+
raw = s.byteslice(line_begin, @pos - line_begin).force_encoding(Encoding::UTF_8)
|
|
2694
|
+
this_line = @line; this_lstart = @line_start
|
|
2695
|
+
if raw.strip == terminator
|
|
2696
|
+
strip_depth = 0
|
|
2697
|
+
raw.each_char do |c|
|
|
2698
|
+
break unless c == " "
|
|
2699
|
+
strip_depth += 1
|
|
2700
|
+
end
|
|
2701
|
+
return HBody.new(lines, strip_depth)
|
|
2702
|
+
end
|
|
2703
|
+
consume_eol
|
|
2704
|
+
lines << [raw, this_line, this_lstart]
|
|
2705
|
+
end
|
|
2706
|
+
end
|
|
2707
|
+
|
|
2708
|
+
# ---------- Heredoc body processing ----------
|
|
2709
|
+
|
|
2710
|
+
def strip_indent_and_continuations(body, allow_cont)
|
|
2711
|
+
out = +""
|
|
2712
|
+
out.force_encoding(Encoding::UTF_8)
|
|
2713
|
+
first = true
|
|
2714
|
+
pending = false
|
|
2715
|
+
last_pos = [1, 0]
|
|
2716
|
+
body.lines.each do |text, line_no, line_start|
|
|
2717
|
+
last_pos = [line_no, line_start]
|
|
2718
|
+
is_blank = text.each_char.all? { |c| c == " " || c == "\t" }
|
|
2719
|
+
if is_blank
|
|
2720
|
+
stripped = ""
|
|
2721
|
+
else
|
|
2722
|
+
leading = 0
|
|
2723
|
+
text.each_char do |c|
|
|
2724
|
+
break unless c == " "
|
|
2725
|
+
leading += 1
|
|
2726
|
+
end
|
|
2727
|
+
if leading < body.strip_depth
|
|
2728
|
+
raise DecodeError.new(line_no, leading + 1,
|
|
2729
|
+
"heredoc body line indented #{leading} spaces, less than strip depth #{body.strip_depth}")
|
|
2730
|
+
end
|
|
2731
|
+
stripped = text[body.strip_depth..]
|
|
2732
|
+
end
|
|
2733
|
+
piece = stripped
|
|
2734
|
+
splice = false
|
|
2735
|
+
if allow_cont
|
|
2736
|
+
trimmed_end = piece.sub(/[ \t]+\z/, "")
|
|
2737
|
+
idx = trimmed_end.rindex("\\")
|
|
2738
|
+
if idx && idx == trimmed_end.length - 1
|
|
2739
|
+
preceding = 0
|
|
2740
|
+
k = idx - 1
|
|
2741
|
+
while k >= 0 && trimmed_end[k] == "\\"
|
|
2742
|
+
preceding += 1
|
|
2743
|
+
k -= 1
|
|
2744
|
+
end
|
|
2745
|
+
if preceding.even?
|
|
2746
|
+
piece = trimmed_end[0, idx]
|
|
2747
|
+
splice = true
|
|
2748
|
+
end
|
|
2749
|
+
end
|
|
2750
|
+
end
|
|
2751
|
+
if first
|
|
2752
|
+
out << piece
|
|
2753
|
+
first = false
|
|
2754
|
+
elsif pending
|
|
2755
|
+
trimmed_start = piece.sub(/\A[ \t]+/, "")
|
|
2756
|
+
out << trimmed_start unless is_blank
|
|
2757
|
+
else
|
|
2758
|
+
out << "\n"
|
|
2759
|
+
out << piece
|
|
2760
|
+
end
|
|
2761
|
+
pending = splice
|
|
2762
|
+
end
|
|
2763
|
+
if pending
|
|
2764
|
+
raise DecodeError.new(last_pos[0], 1, "trailing line continuation has nothing to splice to")
|
|
2765
|
+
end
|
|
2766
|
+
out
|
|
2767
|
+
end
|
|
2768
|
+
|
|
2769
|
+
def fold_paragraphs(s)
|
|
2770
|
+
paragraphs = s.split("\n\n", -1)
|
|
2771
|
+
paragraphs.map { |p| p.split("\n").reject(&:empty?).join(" ") }.join("\n")
|
|
2772
|
+
end
|
|
2773
|
+
|
|
2774
|
+
def replace_all_runs(s, char_set, replacement)
|
|
2775
|
+
out = +""
|
|
2776
|
+
i = 0
|
|
2777
|
+
n = s.length
|
|
2778
|
+
while i < n
|
|
2779
|
+
if char_set.include?(s[i])
|
|
2780
|
+
while i < n && char_set.include?(s[i])
|
|
2781
|
+
i += 1
|
|
2782
|
+
end
|
|
2783
|
+
out << replacement
|
|
2784
|
+
else
|
|
2785
|
+
out << s[i]
|
|
2786
|
+
i += 1
|
|
2787
|
+
end
|
|
2788
|
+
end
|
|
2789
|
+
out
|
|
2790
|
+
end
|
|
2791
|
+
|
|
2792
|
+
def replace_leading_run(s, char_set, replacement)
|
|
2793
|
+
e = 0
|
|
2794
|
+
while e < s.length && char_set.include?(s[e])
|
|
2795
|
+
e += 1
|
|
2796
|
+
end
|
|
2797
|
+
return s if e == 0
|
|
2798
|
+
replacement + s[e..]
|
|
2799
|
+
end
|
|
2800
|
+
|
|
2801
|
+
def replace_trailing_run(s, char_set, replacement)
|
|
2802
|
+
st = s.length
|
|
2803
|
+
while st > 0 && char_set.include?(s[st - 1])
|
|
2804
|
+
st -= 1
|
|
2805
|
+
end
|
|
2806
|
+
return s if st == s.length
|
|
2807
|
+
s[0, st] + replacement
|
|
2808
|
+
end
|
|
2809
|
+
|
|
2810
|
+
def per_line_edges(s, char_set, replacement)
|
|
2811
|
+
s.split("\n", -1).map { |l|
|
|
2812
|
+
l = replace_leading_run(l, char_set, replacement)
|
|
2813
|
+
replace_trailing_run(l, char_set, replacement)
|
|
2814
|
+
}.join("\n")
|
|
2815
|
+
end
|
|
2816
|
+
|
|
2817
|
+
def apply_trim(s, chars, where_s, replacement)
|
|
2818
|
+
return s if chars.empty?
|
|
2819
|
+
char_set = chars.each_char.to_a.uniq
|
|
2820
|
+
has_star = where_s.include?("*")
|
|
2821
|
+
has_pipe = where_s.include?("|")
|
|
2822
|
+
has_lt = where_s.include?("<")
|
|
2823
|
+
has_gt = where_s.include?(">")
|
|
2824
|
+
return s unless has_star || has_pipe || has_lt || has_gt
|
|
2825
|
+
return replace_all_runs(s, char_set, replacement) if has_star
|
|
2826
|
+
cur = s
|
|
2827
|
+
cur = per_line_edges(cur, char_set, replacement) if has_pipe
|
|
2828
|
+
cur = replace_leading_run(cur, char_set, replacement) if has_lt
|
|
2829
|
+
cur = replace_trailing_run(cur, char_set, replacement) if has_gt
|
|
2830
|
+
cur
|
|
2831
|
+
end
|
|
2832
|
+
|
|
2833
|
+
def apply_modifiers(s, mods)
|
|
2834
|
+
cur = s
|
|
2835
|
+
mods.each do |m|
|
|
2836
|
+
case m[:name]
|
|
2837
|
+
when "_fold_paragraphs"
|
|
2838
|
+
raise "fold_paragraphs() takes no arguments" unless m[:args].empty?
|
|
2839
|
+
cur = fold_paragraphs(cur)
|
|
2840
|
+
when "_trim"
|
|
2841
|
+
args = m[:args]
|
|
2842
|
+
raise "trim(chars, where, replacement = \"\") expects 2 or 3 arguments" unless args.length.between?(2, 3)
|
|
2843
|
+
chars = args[0]
|
|
2844
|
+
raise "trim: first argument (chars) must be a string" unless chars.is_a?(String)
|
|
2845
|
+
where = args[1]
|
|
2846
|
+
raise "trim: second argument (where) must be a string" unless where.is_a?(String)
|
|
2847
|
+
replacement = ""
|
|
2848
|
+
if args.length == 3
|
|
2849
|
+
raise "trim: third argument (replacement) must be a string" unless args[2].is_a?(String)
|
|
2850
|
+
replacement = args[2]
|
|
2851
|
+
end
|
|
2852
|
+
cur = apply_trim(cur, chars, where, replacement)
|
|
2853
|
+
else
|
|
2854
|
+
raise "unknown modifier: #{m[:name]}"
|
|
2855
|
+
end
|
|
2856
|
+
end
|
|
2857
|
+
cur
|
|
2858
|
+
end
|
|
2859
|
+
|
|
2860
|
+
# ---------- Flow forms ----------
|
|
2861
|
+
|
|
2862
|
+
def parse_flow_array
|
|
2863
|
+
@pos += 1 # [
|
|
2864
|
+
items = []
|
|
2865
|
+
loop do
|
|
2866
|
+
skip_flow_ws
|
|
2867
|
+
if @src.getbyte(@pos) == RBRACK
|
|
2868
|
+
@pos += 1
|
|
2869
|
+
return items
|
|
2870
|
+
end
|
|
2871
|
+
idx = items.length
|
|
2872
|
+
@path.push(idx)
|
|
2873
|
+
begin
|
|
2874
|
+
v = parse_inline_value_in_flow
|
|
2875
|
+
ensure
|
|
2876
|
+
@path.pop
|
|
2877
|
+
end
|
|
2878
|
+
items << v
|
|
2879
|
+
skip_flow_ws
|
|
2880
|
+
b = @src.getbyte(@pos)
|
|
2881
|
+
if b == COMMA
|
|
2882
|
+
@pos += 1
|
|
2883
|
+
elsif b == RBRACK
|
|
2884
|
+
@pos += 1
|
|
2885
|
+
return items
|
|
2886
|
+
elsif b.nil?
|
|
2887
|
+
raise err("unterminated flow array")
|
|
2888
|
+
else
|
|
2889
|
+
raise err("unexpected '#{b.chr}' in flow array; expected ',' or ']'")
|
|
2890
|
+
end
|
|
2891
|
+
end
|
|
2892
|
+
end
|
|
2893
|
+
|
|
2894
|
+
def parse_flow_table
|
|
2895
|
+
@pos += 1 # {
|
|
2896
|
+
t = new_table
|
|
2897
|
+
loop do
|
|
2898
|
+
skip_flow_ws
|
|
2899
|
+
if @src.getbyte(@pos) == RBRACE
|
|
2900
|
+
@pos += 1
|
|
2901
|
+
return finalize_table(t)
|
|
2902
|
+
end
|
|
2903
|
+
key = parse_key
|
|
2904
|
+
raise err("expected ':' after flow-table key") unless @src.getbyte(@pos) == COLON
|
|
2905
|
+
@pos += 1
|
|
2906
|
+
b = @src.getbyte(@pos)
|
|
2907
|
+
unless b == SP || b == TAB || b == LF || b == CR
|
|
2908
|
+
raise err("expected whitespace after ':'")
|
|
2909
|
+
end
|
|
2910
|
+
skip_flow_ws
|
|
2911
|
+
@path.push(key)
|
|
2912
|
+
begin
|
|
2913
|
+
v = parse_inline_value_in_flow
|
|
2914
|
+
ensure
|
|
2915
|
+
@path.pop
|
|
2916
|
+
end
|
|
2917
|
+
raise err("duplicate key: #{key}") if t.key?(key)
|
|
2918
|
+
t[key] = v
|
|
2919
|
+
skip_flow_ws
|
|
2920
|
+
b = @src.getbyte(@pos)
|
|
2921
|
+
if b == COMMA
|
|
2922
|
+
@pos += 1
|
|
2923
|
+
elsif b == RBRACE
|
|
2924
|
+
@pos += 1
|
|
2925
|
+
return finalize_table(t)
|
|
2926
|
+
elsif b.nil?
|
|
2927
|
+
raise err("unterminated flow table")
|
|
2928
|
+
else
|
|
2929
|
+
raise err("unexpected '#{b.chr}' in flow table; expected ',' or '}'")
|
|
2930
|
+
end
|
|
2931
|
+
end
|
|
2932
|
+
end
|
|
2933
|
+
|
|
2934
|
+
def skip_flow_ws
|
|
2935
|
+
loop do
|
|
2936
|
+
b = @src.getbyte(@pos)
|
|
2937
|
+
if b == SP || b == TAB
|
|
2938
|
+
@pos += 1
|
|
2939
|
+
elsif b == LF
|
|
2940
|
+
@pos += 1; advance_line
|
|
2941
|
+
elsif b == CR && @src.getbyte(@pos + 1) == LF
|
|
2942
|
+
@pos += 2; advance_line
|
|
2943
|
+
elsif b == HASH
|
|
2944
|
+
raise err("comments not allowed inside flow forms")
|
|
2945
|
+
elsif b == SLASH && (@src.getbyte(@pos + 1) == SLASH || @src.getbyte(@pos + 1) == STAR)
|
|
2946
|
+
raise err("comments not allowed inside flow forms")
|
|
2947
|
+
else
|
|
2948
|
+
return
|
|
2949
|
+
end
|
|
2950
|
+
end
|
|
2951
|
+
end
|
|
2952
|
+
|
|
2953
|
+
def parse_inline_value_in_flow
|
|
2954
|
+
if @src.getbyte(@pos) == DQUOTE && starts_bytes?('"""')
|
|
2955
|
+
raise err("heredocs are not allowed inside flow forms")
|
|
2956
|
+
end
|
|
2957
|
+
if @src.getbyte(@pos) == SQUOTE && starts_bytes?("'''")
|
|
2958
|
+
raise err("heredocs are not allowed inside flow forms")
|
|
2959
|
+
end
|
|
2960
|
+
parse_inline_value_or_heredoc
|
|
2961
|
+
end
|
|
2962
|
+
|
|
2963
|
+
# ---------- Post-value ----------
|
|
2964
|
+
|
|
2965
|
+
def consume_after_value(allow_eof)
|
|
2966
|
+
loop do
|
|
2967
|
+
ws_start = @pos
|
|
2968
|
+
skip_inline_ws
|
|
2969
|
+
had_ws = @pos > ws_start
|
|
2970
|
+
b = @src.getbyte(@pos)
|
|
2971
|
+
if b == HASH && !starts_bytes?("###")
|
|
2972
|
+
raise err("expected whitespace before '#' comment") unless had_ws
|
|
2973
|
+
raw = read_line_comment_to_eol
|
|
2974
|
+
@comments << AttachedComment.new(Comment.new(raw, :line), :trailing, @path.dup.freeze) unless @lite
|
|
2975
|
+
break
|
|
2976
|
+
elsif b == SLASH && @src.getbyte(@pos + 1) == SLASH
|
|
2977
|
+
raise err("expected whitespace before '//' comment") unless had_ws
|
|
2978
|
+
raw = read_line_comment_to_eol
|
|
2979
|
+
@comments << AttachedComment.new(Comment.new(raw, :line), :trailing, @path.dup.freeze) unless @lite
|
|
2980
|
+
break
|
|
2981
|
+
elsif b == SLASH && @src.getbyte(@pos + 1) == STAR
|
|
2982
|
+
raw = read_c_block_comment
|
|
2983
|
+
@comments << AttachedComment.new(Comment.new(raw, :block), :trailing, @path.dup.freeze) unless @lite
|
|
2984
|
+
next
|
|
2985
|
+
else
|
|
2986
|
+
break
|
|
2987
|
+
end
|
|
2988
|
+
end
|
|
2989
|
+
b = @src.getbyte(@pos)
|
|
2990
|
+
return if b.nil?
|
|
2991
|
+
if b == LF
|
|
2992
|
+
@pos += 1; advance_line; return
|
|
2993
|
+
end
|
|
2994
|
+
if b == CR && @src.getbyte(@pos + 1) == LF
|
|
2995
|
+
@pos += 2; advance_line; return
|
|
2996
|
+
end
|
|
2997
|
+
raise err("unexpected character '#{b < 128 ? b.chr : '?'}' after value")
|
|
2998
|
+
end
|
|
2999
|
+
|
|
3000
|
+
# ---------- Original-form recording ----------
|
|
3001
|
+
|
|
3002
|
+
def record_form(lit)
|
|
3003
|
+
return if @lite || !@record_forms
|
|
3004
|
+
@original_forms << [@path.dup.freeze, lit]
|
|
3005
|
+
end
|
|
3006
|
+
end
|
|
3007
|
+
end
|