foxtail-runtime 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,543 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Foxtail
4
+ class Bundle
5
+ # Lightweight runtime parser for FTL resources.
6
+ # Equivalent to fluent-bundle's FluentResource parser.
7
+ #
8
+ # This parser is optimized for runtime performance and produces
9
+ # Bundle::Parser::AST structures directly. Unlike Syntax::Parser, it:
10
+ # - Does not track source positions
11
+ # - Does not preserve comments
12
+ # - Uses error recovery to skip invalid entries
13
+ # - Produces `String | Array` patterns directly
14
+ #
15
+ # For validation and tooling, use Syntax::Parser instead.
16
+ class Parser
17
+ # Internal parse error for control flow
18
+ class ParseError < Error; end
19
+
20
+ # Regex patterns (equivalent to fluent-bundle/src/resource.ts)
21
+ RE_MESSAGE_START = /^(?<!\r)(-?[a-zA-Z][\w-]*) *= */m
22
+ private_constant :RE_MESSAGE_START
23
+
24
+ # fluent.js uses /(?<=\n *)\./ but Ruby does not support variable-length lookbehind.
25
+ # The skip_blank_and_check_attribute method handles newline/space skipping instead.
26
+ RE_ATTRIBUTE_START = /\.([a-zA-Z][\w-]*) *= */
27
+ private_constant :RE_ATTRIBUTE_START
28
+
29
+ RE_VARIANT_START = /\*?\[/
30
+ private_constant :RE_VARIANT_START
31
+
32
+ RE_NUMBER_LITERAL = /(-?\d+(?:\.(\d+))?)/
33
+ private_constant :RE_NUMBER_LITERAL
34
+
35
+ RE_IDENTIFIER = /([a-zA-Z][\w-]*)/
36
+ private_constant :RE_IDENTIFIER
37
+
38
+ RE_REFERENCE = /([$-])?([a-zA-Z][\w-]*)(?:\.([a-zA-Z][\w-]*))?/
39
+ private_constant :RE_REFERENCE
40
+
41
+ RE_FUNCTION_NAME = /^[A-Z][A-Z\d_-]*$/
42
+ private_constant :RE_FUNCTION_NAME
43
+
44
+ RE_TEXT_RUN = /((?:[^{}\n\r]|\r(?!\n))+)/
45
+ private_constant :RE_TEXT_RUN
46
+
47
+ RE_STRING_RUN = /((?:[^\\"\n\r]|\r(?!\n))*)/
48
+ private_constant :RE_STRING_RUN
49
+
50
+ RE_STRING_ESCAPE = /\\([\\"])/
51
+ private_constant :RE_STRING_ESCAPE
52
+
53
+ RE_UNICODE_ESCAPE = /\\u(\h{4})|\\U(\h{6})/
54
+ private_constant :RE_UNICODE_ESCAPE
55
+
56
+ RE_LEADING_NEWLINES = /^\n+/
57
+ private_constant :RE_LEADING_NEWLINES
58
+
59
+ RE_TRAILING_SPACES = / +$/
60
+ private_constant :RE_TRAILING_SPACES
61
+
62
+ RE_BLANK_LINES = / *\r?\n/
63
+ private_constant :RE_BLANK_LINES
64
+
65
+ RE_INDENT = /( *)$/
66
+ private_constant :RE_INDENT
67
+
68
+ # Token patterns
69
+ # Only accept space, CRLF, and LF as syntactical whitespace (not CR alone or tabs)
70
+ TOKEN_BRACE_OPEN = /\{(?: |\r?\n)*/
71
+ private_constant :TOKEN_BRACE_OPEN
72
+
73
+ TOKEN_BRACE_CLOSE = /(?: |\r?\n)*\}/
74
+ private_constant :TOKEN_BRACE_CLOSE
75
+
76
+ TOKEN_BRACKET_OPEN = /\[(?: |\r?\n)*/
77
+ private_constant :TOKEN_BRACKET_OPEN
78
+
79
+ TOKEN_BRACKET_CLOSE = /(?: |\r?\n)*\] */
80
+ private_constant :TOKEN_BRACKET_CLOSE
81
+
82
+ TOKEN_PAREN_OPEN = /(?: |\r?\n)*\((?: |\r?\n)*/
83
+ private_constant :TOKEN_PAREN_OPEN
84
+
85
+ TOKEN_ARROW = /(?: |\r?\n)*->(?: |\r?\n)*/
86
+ private_constant :TOKEN_ARROW
87
+
88
+ TOKEN_COLON = /(?: |\r?\n)*:(?: |\r?\n)*/
89
+ private_constant :TOKEN_COLON
90
+
91
+ TOKEN_COMMA = /(?: |\r?\n)*,?(?: |\r?\n)*/
92
+ private_constant :TOKEN_COMMA
93
+
94
+ TOKEN_BLANK = /(?: |\r?\n)+/
95
+ private_constant :TOKEN_BLANK
96
+
97
+ # Parse FTL source into an array of messages and terms
98
+ # @param source [String] FTL source text
99
+ # @return [Array<AST::Message, AST::Term>] Parsed entries
100
+ def parse(source)
101
+ @source = source
102
+ @cursor = 0
103
+ @body = []
104
+
105
+ # Iterate over message/term starts
106
+ source.scan(RE_MESSAGE_START) do |match|
107
+ id = match[0]
108
+ @cursor = Regexp.last_match.end(0)
109
+
110
+ begin
111
+ @body << parse_message(id)
112
+ rescue ParseError
113
+ # Skip to next entry on error
114
+ next
115
+ end
116
+ end
117
+
118
+ @body
119
+ end
120
+
121
+ # Test if regex matches at current cursor position
122
+ private def matches?(regex)
123
+ @source[@cursor..].match?(/\A#{regex.source}/)
124
+ end
125
+
126
+ # Consume a single character if it matches
127
+ private def consume_char(character, required: false)
128
+ if @source[@cursor] == character
129
+ @cursor += 1
130
+ return true
131
+ end
132
+
133
+ raise ParseError, "Expected #{character}" if required
134
+
135
+ false
136
+ end
137
+
138
+ # Consume a token (regex) if it matches
139
+ private def consume_token(regex, required: false)
140
+ if (match = @source[@cursor..].match(/\A#{regex.source}/))
141
+ @cursor += match[0].length
142
+ return true
143
+ end
144
+
145
+ raise ParseError, "Expected #{regex}" if required
146
+
147
+ false
148
+ end
149
+
150
+ # Match a regex and return captures, advancing cursor
151
+ private def match_regex(regex)
152
+ match = @source[@cursor..].match(/\A#{regex.source}/)
153
+ raise ParseError, "Expected #{regex}" unless match
154
+
155
+ @cursor += match[0].length
156
+ match
157
+ end
158
+
159
+ # Match a regex and return the first capture group
160
+ private def match_capture(regex) = match_regex(regex)[1]
161
+
162
+ # Parse a message or term
163
+ private def parse_message(id)
164
+ value = parse_pattern
165
+ attributes = parse_attributes
166
+
167
+ if value.nil? && (attributes.nil? || attributes.empty?)
168
+ raise ParseError, "Expected message value or attributes"
169
+ end
170
+
171
+ if id.start_with?("-")
172
+ AST::Term[id:, value:, attributes:]
173
+ else
174
+ AST::Message[id:, value:, attributes:]
175
+ end
176
+ end
177
+
178
+ # Skip blank lines and check if there's an attribute
179
+ private def skip_blank_and_check_attribute
180
+ start = @cursor
181
+
182
+ # Skip newlines
183
+ while @source[@cursor] == "\n" || @source[@cursor] == "\r"
184
+ @cursor += 2 if @source[@cursor] == "\r" && @source[@cursor + 1] == "\n"
185
+ @cursor += 1 unless @source[@cursor - 1] == "\n" && @source[@cursor - 2] == "\r"
186
+ end
187
+
188
+ # Skip spaces (indentation)
189
+ @cursor += 1 while @source[@cursor] == " "
190
+
191
+ # Check if we have an attribute start
192
+ if matches?(RE_ATTRIBUTE_START)
193
+ true
194
+ else
195
+ @cursor = start
196
+ false
197
+ end
198
+ end
199
+
200
+ # Parse attributes (.attr = value)
201
+ private def parse_attributes
202
+ attrs = {}
203
+
204
+ while skip_blank_and_check_attribute
205
+ name = match_capture(RE_ATTRIBUTE_START)
206
+ value = parse_pattern
207
+
208
+ raise ParseError, "Expected attribute value" if value.nil?
209
+
210
+ attrs[name] = value
211
+ end
212
+
213
+ attrs.empty? ? nil : attrs
214
+ end
215
+
216
+ # Parse a pattern (simple text or complex array)
217
+ private def parse_pattern
218
+ first = nil
219
+
220
+ # Try to parse simple text on the same line
221
+ first = match_capture(RE_TEXT_RUN) if matches?(RE_TEXT_RUN)
222
+
223
+ # If there's a placeable on the first line, parse complex pattern
224
+ if @source[@cursor] == "{" || @source[@cursor] == "}"
225
+ return parse_pattern_elements(first ? [first] : [], Float::INFINITY)
226
+ end
227
+
228
+ # Check for indented continuation
229
+ indent = parse_indent
230
+ if indent
231
+ if first
232
+ # Text on first line + indented continuation
233
+ return parse_pattern_elements([first, indent], indent[:length])
234
+ end
235
+
236
+ # Block pattern starting on new line
237
+ indent[:value] = indent[:value].sub(RE_LEADING_NEWLINES, "")
238
+ return parse_pattern_elements([indent], indent[:length])
239
+ end
240
+
241
+ # Just simple inline text
242
+ return first.sub(RE_TRAILING_SPACES, "") if first
243
+
244
+ nil
245
+ end
246
+
247
+ # Parse indent (newlines + spaces)
248
+ private def parse_indent
249
+ start = @cursor
250
+ blank = ""
251
+
252
+ while @source[@cursor] == "\n" || @source[@cursor] == "\r"
253
+ @cursor += 2 if @source[@cursor] == "\r" && @source[@cursor + 1] == "\n"
254
+ @cursor += 1 unless @source[@cursor - 1] == "\n" && @source[@cursor - 2] == "\r"
255
+ blank += "\n"
256
+ end
257
+
258
+ # Count leading spaces
259
+ spaces = ""
260
+ while @source[@cursor] == " "
261
+ spaces += " "
262
+ @cursor += 1
263
+ end
264
+
265
+ # Must have at least one space to be an indent
266
+ if spaces.empty?
267
+ @cursor = start
268
+ return nil
269
+ end
270
+
271
+ # Check if this continues the pattern
272
+ char = @source[@cursor]
273
+ if char == "}" || char == "." || char == "[" || char == "*"
274
+ # Special characters that end patterns
275
+ @cursor = start
276
+ nil
277
+ else
278
+ # Blank line or continuation
279
+ {value: blank + spaces, length: spaces.length}
280
+ end
281
+ end
282
+
283
+ # Parse complex pattern elements
284
+ private def parse_pattern_elements(elements, common_indent)
285
+ loop do
286
+ if matches?(RE_TEXT_RUN)
287
+ elements << match_capture(RE_TEXT_RUN)
288
+ next
289
+ end
290
+
291
+ if @source[@cursor] == "{"
292
+ elements << parse_placeable
293
+ next
294
+ end
295
+
296
+ raise ParseError, "Unbalanced closing brace" if @source[@cursor] == "}"
297
+
298
+ indent = parse_indent
299
+ if indent
300
+ elements << indent
301
+ common_indent = [common_indent, indent[:length]].min
302
+ next
303
+ end
304
+
305
+ break
306
+ end
307
+
308
+ # Trim trailing spaces from last text element
309
+ elements[-1] = elements.last.sub(RE_TRAILING_SPACES, "") if elements.last.is_a?(String)
310
+
311
+ # Dedent and flatten
312
+ dedent_elements(elements, common_indent)
313
+ end
314
+
315
+ # Dedent pattern elements by common indent
316
+ private def dedent_elements(elements, common_indent)
317
+ baked = []
318
+ elements.each do |element|
319
+ if element.is_a?(Hash) && element[:value]
320
+ # Dedent indented lines
321
+ value = element[:value]
322
+ dedented = safe_dedent(value, common_indent)
323
+ baked << dedented unless dedented.empty?
324
+ elsif element
325
+ baked << element
326
+ end
327
+ end
328
+ baked
329
+ end
330
+
331
+ # Safely dedent a value by common indent, returning original if too short
332
+ private private def safe_dedent(value, common_indent)
333
+ end_index = value.length - common_indent
334
+ return value if end_index < 0
335
+
336
+ value[0...end_index]
337
+ end
338
+
339
+ # Parse a placeable expression
340
+ private def parse_placeable
341
+ consume_token(TOKEN_BRACE_OPEN, required: true)
342
+
343
+ selector = parse_inline_expression
344
+
345
+ return selector if consume_token(TOKEN_BRACE_CLOSE)
346
+
347
+ if consume_token(TOKEN_ARROW)
348
+ variants_result = parse_variants
349
+ consume_token(TOKEN_BRACE_CLOSE, required: true)
350
+ return AST::SelectExpression[
351
+ selector:,
352
+ variants: variants_result[:variants],
353
+ star: variants_result[:star]
354
+ ]
355
+ end
356
+
357
+ raise ParseError, "Unclosed placeable"
358
+ end
359
+
360
+ # Parse an inline expression
361
+ private def parse_inline_expression
362
+ return parse_placeable if @source[@cursor] == "{"
363
+
364
+ if matches?(RE_REFERENCE)
365
+ match = match_regex(RE_REFERENCE)
366
+ sigil = match[1]
367
+ name = match[2]
368
+ attr = match[3]
369
+
370
+ return AST::VariableReference[name:] if sigil == "$"
371
+
372
+ if consume_token(TOKEN_PAREN_OPEN)
373
+ args = parse_arguments
374
+
375
+ return AST::TermReference[name:, attr:, args:] if sigil == "-"
376
+
377
+ return AST::FunctionReference[name:, args:] if RE_FUNCTION_NAME.match?(name)
378
+
379
+ raise ParseError, "Function names must be all upper-case"
380
+ end
381
+
382
+ return AST::TermReference[name:, attr:, args: []] if sigil == "-"
383
+
384
+ return AST::MessageReference[name:, attr:]
385
+ end
386
+
387
+ parse_literal
388
+ end
389
+
390
+ # Parse function/term arguments
391
+ private def parse_arguments
392
+ args = []
393
+
394
+ loop do
395
+ case @source[@cursor]
396
+ when ")"
397
+ @cursor += 1
398
+ return args
399
+ when nil
400
+ raise ParseError, "Unclosed argument list"
401
+ end
402
+
403
+ args << parse_argument
404
+ consume_token(TOKEN_COMMA)
405
+ end
406
+ end
407
+
408
+ # Parse a single argument
409
+ private def parse_argument
410
+ expr = parse_inline_expression
411
+
412
+ if expr.is_a?(AST::MessageReference) && consume_token(TOKEN_COLON)
413
+ # Named argument
414
+ return AST::NamedArgument[name: expr.name, value: parse_literal]
415
+ end
416
+
417
+ expr
418
+ end
419
+
420
+ # Skip blank lines and check if there's a variant
421
+ private def skip_blank_and_check_variant
422
+ start = @cursor
423
+
424
+ # Skip newlines
425
+ while @source[@cursor] == "\n" || @source[@cursor] == "\r"
426
+ @cursor += 2 if @source[@cursor] == "\r" && @source[@cursor + 1] == "\n"
427
+ @cursor += 1 unless @source[@cursor - 1] == "\n" && @source[@cursor - 2] == "\r"
428
+ end
429
+
430
+ # Skip spaces (indentation)
431
+ @cursor += 1 while @source[@cursor] == " "
432
+
433
+ # Check if we have a variant start
434
+ if matches?(RE_VARIANT_START)
435
+ true
436
+ else
437
+ @cursor = start
438
+ false
439
+ end
440
+ end
441
+
442
+ # Parse variants for select expression
443
+ private def parse_variants
444
+ variants = []
445
+ count = 0
446
+ star = nil
447
+
448
+ while skip_blank_and_check_variant
449
+ star = count if consume_char("*")
450
+
451
+ key = parse_variant_key
452
+ value = parse_pattern
453
+
454
+ raise ParseError, "Expected variant value" if value.nil?
455
+
456
+ variants << AST::Variant[key:, value:]
457
+ count += 1
458
+ end
459
+
460
+ # Allow empty variants for error recovery (incomplete select expression)
461
+ return {variants: [], star: nil} if count == 0
462
+
463
+ raise ParseError, "Expected default variant" if star.nil?
464
+
465
+ {variants:, star:}
466
+ end
467
+
468
+ # Parse a variant key
469
+ private def parse_variant_key
470
+ consume_token(TOKEN_BRACKET_OPEN, required: true)
471
+
472
+ key = if matches?(RE_NUMBER_LITERAL)
473
+ parse_number_literal
474
+ else
475
+ AST::StringLiteral[value: match_capture(RE_IDENTIFIER)]
476
+ end
477
+
478
+ consume_token(TOKEN_BRACKET_CLOSE, required: true)
479
+ key
480
+ end
481
+
482
+ # Parse a literal
483
+ private def parse_literal
484
+ return parse_number_literal if matches?(RE_NUMBER_LITERAL)
485
+
486
+ return parse_string_literal if @source[@cursor] == '"'
487
+
488
+ raise ParseError, "Invalid expression"
489
+ end
490
+
491
+ # Parse a number literal
492
+ private def parse_number_literal
493
+ match = match_regex(RE_NUMBER_LITERAL)
494
+ value_str = match[1]
495
+ fraction = match[2] || ""
496
+ precision = fraction.length
497
+
498
+ AST::NumberLiteral[value: Float(value_str), precision:]
499
+ end
500
+
501
+ # Parse a string literal
502
+ private def parse_string_literal
503
+ @cursor += 1 # Skip opening quote
504
+ value = ""
505
+
506
+ loop do
507
+ if (match = @source[@cursor..].match(/\A#{RE_STRING_RUN.source}/))
508
+ value += match[0]
509
+ @cursor += match[0].length
510
+ end
511
+
512
+ case @source[@cursor]
513
+ when '"'
514
+ @cursor += 1
515
+ break
516
+ when "\\"
517
+ value += parse_escape_sequence
518
+ else
519
+ raise ParseError, "Unclosed string literal"
520
+ end
521
+ end
522
+
523
+ AST::StringLiteral[value:]
524
+ end
525
+
526
+ # Parse an escape sequence
527
+ private def parse_escape_sequence
528
+ if (match = @source[@cursor..].match(/\A#{RE_UNICODE_ESCAPE.source}/))
529
+ @cursor += match[0].length
530
+ code = match[1] || match[2]
531
+ return [code.to_i(16)].pack("U")
532
+ end
533
+
534
+ if (match = @source[@cursor..].match(/\A#{RE_STRING_ESCAPE.source}/))
535
+ @cursor += match[0].length
536
+ return match[1]
537
+ end
538
+
539
+ raise ParseError, "Invalid escape sequence"
540
+ end
541
+ end
542
+ end
543
+ end