i18n-message_format 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,328 @@
1
+ # frozen_string_literal: true
2
+
3
+ module I18n
4
+ module MessageFormat
5
+ # Raised when {Parser} encounters a syntax error in a message format pattern.
6
+ class ParseError < Error
7
+ # The zero-based character position in the pattern where the error occurred.
8
+ #
9
+ # @return [Integer, nil]
10
+ attr_reader :position
11
+
12
+ # @param message [String] human-readable description of the parse error
13
+ # @param position [Integer, nil] zero-based position in the pattern string
14
+ def initialize(message, position = nil)
15
+ @position = position
16
+ super(position ? "#{message} at position #{position}" : message)
17
+ end
18
+ end
19
+
20
+ # Parses an ICU MessageFormat pattern string into an AST.
21
+ #
22
+ # The parser understands the following ICU constructs:
23
+ #
24
+ # * Simple argument: +{name}+
25
+ # * Number format: +{name, number}+ / +{name, number, style}+
26
+ # * Date format: +{name, date}+ / +{name, date, style}+
27
+ # * Time format: +{name, time}+ / +{name, time, style}+
28
+ # * Plural: +{name, plural, one {…} other {…}}+
29
+ # * Select: +{name, select, foo {…} other {…}}+
30
+ # * Select ordinal: +{name, selectordinal, one {…} other {…}}+
31
+ # * Quoted literals using single quotes (+'+)
32
+ #
33
+ # The produced AST is a flat array of {Nodes} objects.
34
+ #
35
+ # @example
36
+ # nodes = I18n::MessageFormat::Parser.new("Hello, {name}!").parse
37
+ # # => [#<TextNode value="Hello, ">, #<ArgumentNode name="name">, #<TextNode value="!">]
38
+ class Parser
39
+ # Creates a new parser for the given pattern string.
40
+ #
41
+ # @param pattern [String] the ICU MessageFormat pattern to parse
42
+ def initialize(pattern)
43
+ @pattern = pattern
44
+ @pos = 0
45
+ end
46
+
47
+ # Parses the pattern and returns its AST representation.
48
+ #
49
+ # @return [Array<Nodes::TextNode, Nodes::ArgumentNode, Nodes::NumberFormatNode,
50
+ # Nodes::DateFormatNode, Nodes::TimeFormatNode, Nodes::PluralNode,
51
+ # Nodes::SelectNode, Nodes::SelectOrdinalNode>]
52
+ # @raise [ParseError] if the pattern contains a syntax error
53
+ def parse
54
+ nodes = parse_message
55
+ nodes
56
+ end
57
+
58
+ private
59
+
60
+ def parse_message(terminate_on = nil)
61
+ nodes = []
62
+
63
+ until eof?
64
+ char = current_char
65
+
66
+ if terminate_on&.include?(char)
67
+ break
68
+ elsif char == "{"
69
+ @pos += 1
70
+ nodes << parse_argument
71
+ elsif char == "}"
72
+ raise ParseError.new("Unexpected }", @pos)
73
+ elsif char == "'"
74
+ nodes << parse_quoted_or_literal(nodes)
75
+ else
76
+ nodes << parse_text(terminate_on)
77
+ end
78
+ end
79
+
80
+ merge_adjacent_text(nodes)
81
+ end
82
+
83
+ def parse_text(terminate_on = nil)
84
+ start = @pos
85
+ while !eof? && current_char != "{" && current_char != "}" && current_char != "'" && !terminate_on&.include?(current_char)
86
+ @pos += 1
87
+ end
88
+ Nodes::TextNode.new(@pattern[start...@pos])
89
+ end
90
+
91
+ def parse_quoted_or_literal(preceding_nodes)
92
+ @pos += 1 # skip opening quote
93
+
94
+ if eof?
95
+ Nodes::TextNode.new("'")
96
+ elsif current_char == "'"
97
+ # '' => literal single quote
98
+ @pos += 1
99
+ Nodes::TextNode.new("'")
100
+ elsif current_char == "{" || current_char == "}"
101
+ # '{ or '} => literal brace, read until closing quote or end
102
+ text = +""
103
+ while !eof? && current_char != "'"
104
+ text << current_char
105
+ @pos += 1
106
+ end
107
+ @pos += 1 unless eof? # skip closing quote
108
+ Nodes::TextNode.new(text)
109
+ else
110
+ # standalone quote, treat as literal
111
+ Nodes::TextNode.new("'")
112
+ end
113
+ end
114
+
115
+ def parse_argument
116
+ skip_whitespace
117
+ name = parse_identifier
118
+ skip_whitespace
119
+
120
+ if eof?
121
+ raise ParseError.new("Unclosed argument", @pos)
122
+ end
123
+
124
+ if current_char == "}"
125
+ @pos += 1
126
+ return Nodes::ArgumentNode.new(name)
127
+ end
128
+
129
+ if current_char == ","
130
+ @pos += 1
131
+ skip_whitespace
132
+ return parse_typed_argument(name)
133
+ end
134
+
135
+ raise ParseError.new("Expected ',' or '}' in argument", @pos)
136
+ end
137
+
138
+ def parse_typed_argument(name)
139
+ type = parse_identifier
140
+ skip_whitespace
141
+
142
+ case type
143
+ when "number"
144
+ parse_number_arg(name)
145
+ when "date"
146
+ parse_date_arg(name)
147
+ when "time"
148
+ parse_time_arg(name)
149
+ when "plural"
150
+ parse_plural_arg(name)
151
+ when "select"
152
+ parse_select_arg(name)
153
+ when "selectordinal"
154
+ parse_select_ordinal_arg(name)
155
+ else
156
+ raise ParseError.new("Unknown argument type '#{type}'", @pos)
157
+ end
158
+ end
159
+
160
+ def parse_number_arg(name)
161
+ if current_char == "}"
162
+ @pos += 1
163
+ return Nodes::NumberFormatNode.new(name)
164
+ end
165
+
166
+ expect(",")
167
+ skip_whitespace
168
+ style = parse_identifier
169
+ skip_whitespace
170
+ expect("}")
171
+ Nodes::NumberFormatNode.new(name, style)
172
+ end
173
+
174
+ def parse_date_arg(name)
175
+ if current_char == "}"
176
+ @pos += 1
177
+ return Nodes::DateFormatNode.new(name)
178
+ end
179
+
180
+ expect(",")
181
+ skip_whitespace
182
+ style = parse_identifier
183
+ skip_whitespace
184
+ expect("}")
185
+ Nodes::DateFormatNode.new(name, style)
186
+ end
187
+
188
+ def parse_time_arg(name)
189
+ if current_char == "}"
190
+ @pos += 1
191
+ return Nodes::TimeFormatNode.new(name)
192
+ end
193
+
194
+ expect(",")
195
+ skip_whitespace
196
+ style = parse_identifier
197
+ skip_whitespace
198
+ expect("}")
199
+ Nodes::TimeFormatNode.new(name, style)
200
+ end
201
+
202
+ def parse_plural_arg(name)
203
+ expect(",")
204
+ skip_whitespace
205
+
206
+ offset = 0
207
+ if @pattern[@pos..].start_with?("offset:")
208
+ @pos += 7
209
+ skip_whitespace
210
+ offset = parse_number
211
+ skip_whitespace
212
+ end
213
+
214
+ branches = parse_branches
215
+ expect("}")
216
+ Nodes::PluralNode.new(name, branches, offset)
217
+ end
218
+
219
+ def parse_select_arg(name)
220
+ expect(",")
221
+ skip_whitespace
222
+ branches = parse_branches
223
+ expect("}")
224
+ Nodes::SelectNode.new(name, branches)
225
+ end
226
+
227
+ def parse_select_ordinal_arg(name)
228
+ expect(",")
229
+ skip_whitespace
230
+
231
+ offset = 0
232
+ if @pattern[@pos..].start_with?("offset:")
233
+ @pos += 7
234
+ skip_whitespace
235
+ offset = parse_number
236
+ skip_whitespace
237
+ end
238
+
239
+ branches = parse_branches
240
+ expect("}")
241
+ Nodes::SelectOrdinalNode.new(name, branches, offset)
242
+ end
243
+
244
+ def parse_branches
245
+ branches = {}
246
+
247
+ while !eof? && current_char != "}"
248
+ skip_whitespace
249
+ break if eof? || current_char == "}"
250
+
251
+ key = parse_branch_key
252
+ skip_whitespace
253
+ expect("{")
254
+ value = parse_message("}")
255
+ expect("}")
256
+ skip_whitespace
257
+
258
+ branches[key] = value
259
+ end
260
+
261
+ branches
262
+ end
263
+
264
+ def parse_branch_key
265
+ if current_char == "="
266
+ @pos += 1
267
+ :"=#{parse_number}"
268
+ else
269
+ parse_identifier.to_sym
270
+ end
271
+ end
272
+
273
+ def parse_identifier
274
+ start = @pos
275
+ while !eof? && identifier_char?(current_char)
276
+ @pos += 1
277
+ end
278
+ raise ParseError.new("Expected identifier", start) if @pos == start
279
+ @pattern[start...@pos]
280
+ end
281
+
282
+ def parse_number
283
+ start = @pos
284
+ @pos += 1 if !eof? && current_char == "-"
285
+ while !eof? && current_char.match?(/[0-9]/)
286
+ @pos += 1
287
+ end
288
+ raise ParseError.new("Expected number", start) if @pos == start
289
+ @pattern[start...@pos].to_i
290
+ end
291
+
292
+ def identifier_char?(char)
293
+ char.match?(/[a-zA-Z0-9_]/)
294
+ end
295
+
296
+ def skip_whitespace
297
+ @pos += 1 while !eof? && current_char.match?(/\s/)
298
+ end
299
+
300
+ def expect(char)
301
+ if eof? || current_char != char
302
+ raise ParseError.new("Expected '#{char}'", @pos)
303
+ end
304
+ @pos += 1
305
+ end
306
+
307
+ def current_char
308
+ @pattern[@pos]
309
+ end
310
+
311
+ def eof?
312
+ @pos >= @pattern.length
313
+ end
314
+
315
+ def merge_adjacent_text(nodes)
316
+ merged = []
317
+ nodes.each do |node|
318
+ if node.is_a?(Nodes::TextNode) && merged.last.is_a?(Nodes::TextNode)
319
+ merged.last.value = +(merged.last.value) + node.value
320
+ else
321
+ merged << node
322
+ end
323
+ end
324
+ merged
325
+ end
326
+ end
327
+ end
328
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module I18n
4
+ module MessageFormat
5
+ # The current version of the i18n-message_format gem.
6
+ VERSION = "0.1.0"
7
+ end
8
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "message_format/cache"
4
+ require_relative "message_format/nodes"
5
+ require_relative "message_format/version"
6
+
7
+ module I18n
8
+ # Top-level namespace for the i18n-message_format gem.
9
+ module MessageFormat
10
+ # Base error class for all errors raised by this gem.
11
+ class Error < StandardError; end
12
+ end
13
+ end
14
+
15
+ require_relative "message_format/parser"
16
+ require_relative "message_format/formatter"
17
+ require_relative "message_format/backend"
18
+ require_relative "message_format/ordinal_rules"
19
+
20
+ module I18n
21
+ # Provides ICU message format support for the I18n gem.
22
+ #
23
+ # Patterns follow the ICU MessageFormat syntax and support simple argument
24
+ # interpolation as well as `plural`, `select`, and `selectordinal` constructs.
25
+ #
26
+ # @example Simple argument interpolation
27
+ # I18n::MessageFormat.format("Hello, {name}!", name: "world")
28
+ # # => "Hello, world!"
29
+ #
30
+ # @example Plural
31
+ # I18n::MessageFormat.format(
32
+ # "{count, plural, one {# item} other {# items}}",
33
+ # count: 3
34
+ # )
35
+ # # => "3 items"
36
+ module MessageFormat
37
+ @cache = Cache.new
38
+
39
+ class << self
40
+ # Formats an ICU message format pattern with the given arguments.
41
+ #
42
+ # Parsed ASTs are memoized in an internal LRU cache keyed by the pattern
43
+ # string, so repeated calls with the same pattern are efficient.
44
+ #
45
+ # @param pattern [String] an ICU MessageFormat pattern string
46
+ # @param arguments [Hash] a hash of argument names (Symbol or String keys)
47
+ # to their values. May be omitted in favour of keyword arguments.
48
+ # @param locale [Symbol, String] the locale to use for pluralisation and
49
+ # number/date/time localisation. Defaults to {I18n.locale}.
50
+ # @param kwargs [Hash] keyword arguments merged into +arguments+ when
51
+ # +arguments+ is empty.
52
+ # @return [String] the formatted message
53
+ # @raise [ParseError] if +pattern+ contains a syntax error
54
+ # @raise [MissingArgumentError] if a placeholder in +pattern+ has no
55
+ # corresponding entry in +arguments+
56
+ def format(pattern, arguments = {}, locale: ::I18n.locale, **kwargs)
57
+ arguments = kwargs if arguments.empty? && !kwargs.empty?
58
+ nodes = @cache.fetch(pattern) do
59
+ Parser.new(pattern).parse
60
+ end
61
+ Formatter.new(nodes, arguments, locale).format
62
+ end
63
+
64
+ # Clears the internal parse-result cache.
65
+ #
66
+ # Useful in tests or whenever you need to reclaim memory.
67
+ #
68
+ # @return [void]
69
+ def clear_cache!
70
+ @cache.clear
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,6 @@
1
+ module I18n
2
+ module MessageFormat
3
+ VERSION: String
4
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
5
+ end
6
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: i18n-message_format
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Chris Fung
8
+ bindir: exe
9
+ cert_chain: []
10
+ date: 1980-01-01 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: i18n
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '1.0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '1.0'
26
+ description: A pure Ruby implementation of ICU Message Format that integrates with
27
+ the ruby-i18n gem via a chainable backend.
28
+ email:
29
+ - aergonaut@gmail.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - ".envrc"
35
+ - CHANGELOG.md
36
+ - CODE_OF_CONDUCT.md
37
+ - LICENSE.txt
38
+ - README.md
39
+ - Rakefile
40
+ - devenv.lock
41
+ - devenv.nix
42
+ - devenv.yaml
43
+ - docs/plans/2026-02-25-icu-message-format-design.md
44
+ - docs/plans/2026-02-25-icu-message-format-plan.md
45
+ - lib/i18n/message_format.rb
46
+ - lib/i18n/message_format/backend.rb
47
+ - lib/i18n/message_format/cache.rb
48
+ - lib/i18n/message_format/formatter.rb
49
+ - lib/i18n/message_format/nodes.rb
50
+ - lib/i18n/message_format/ordinal_rules.rb
51
+ - lib/i18n/message_format/parser.rb
52
+ - lib/i18n/message_format/version.rb
53
+ - sig/i18n/message_format.rbs
54
+ homepage: https://github.com/aergonaut/i18n-message_format
55
+ licenses:
56
+ - MIT
57
+ metadata:
58
+ homepage_uri: https://github.com/aergonaut/i18n-message_format
59
+ source_code_uri: https://github.com/aergonaut/i18n-message_format
60
+ changelog_uri: https://github.com/aergonaut/i18n-message_format/blob/main/CHANGELOG.md
61
+ rdoc_options: []
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: 3.2.0
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ requirements: []
75
+ rubygems_version: 3.7.2
76
+ specification_version: 4
77
+ summary: ICU Message Format support for Ruby i18n
78
+ test_files: []