accept_language 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,74 +1,444 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "bigdecimal"
4
-
5
3
  module AcceptLanguage
6
- # Parses Accept-Language header fields into structured data, extracting language tags
7
- # and their quality values (q-values). Validates input according to RFC 2616 specifications
8
- # and handles edge cases like malformed inputs and implicit quality values.
4
+ # = Accept-Language Header Parser
5
+ #
6
+ # Parser handles the parsing of +Accept-Language+ HTTP header field values
7
+ # as defined in RFC 2616 Section 14.4. It extracts language tags and their
8
+ # associated quality values (q-values), validates them according to the
9
+ # specification, and provides matching capabilities against application-
10
+ # supported languages.
11
+ #
12
+ # == Overview
13
+ #
14
+ # The +Accept-Language+ header field value consists of a comma-separated
15
+ # list of language ranges, each optionally accompanied by a quality value
16
+ # indicating relative preference. This parser:
17
+ #
18
+ # 1. Tokenizes the header into individual language-range entries
19
+ # 2. Extracts and validates language tags per BCP 47
20
+ # 3. Extracts and validates quality values per RFC 2616 Section 3.9
21
+ # 4. Stores valid entries for subsequent matching operations
22
+ #
23
+ # == Quality Values (q-values)
24
+ #
25
+ # Quality values express the user's relative preference for a language.
26
+ # Per RFC 2616 Section 3.9, the syntax is:
27
+ #
28
+ # qvalue = ( "0" [ "." 0*3DIGIT ] ) | ( "1" [ "." 0*3("0") ] )
29
+ #
30
+ # This means:
31
+ # - Values range from +0.000+ to +1.000+
32
+ # - Maximum of 3 decimal places
33
+ # - +0+ indicates "not acceptable"
34
+ # - +1+ indicates "most preferred" (default when omitted)
35
+ #
36
+ # Examples of valid q-values: +0+, +0.5+, +0.75+, +0.123+, +1+, +1.0+, +1.000+
37
+ #
38
+ # Examples of invalid q-values (silently ignored): +1.5+, +0.1234+, +-0.5+, +.5+
39
+ #
40
+ # == Language Tags
41
+ #
42
+ # Language tags follow the BCP 47 specification (RFC 5646), which supersedes
43
+ # the RFC 1766 reference in RFC 2616 Section 3.10. Valid tags consist of:
44
+ #
45
+ # - A primary subtag of 1-8 alphabetic characters (e.g., +en+, +zh+, +ast+)
46
+ # - Zero or more subtags of 1-8 alphanumeric characters, separated by hyphens
47
+ # - The special wildcard tag +*+ (matches any language)
48
+ #
49
+ # Examples of valid language tags:
50
+ # - +en+ (English)
51
+ # - +en-US+ (English, United States)
52
+ # - +zh-Hant-TW+ (Chinese, Traditional script, Taiwan)
53
+ # - +de-CH-1996+ (German, Switzerland, 1996 orthography)
54
+ # - +sr-Latn+ (Serbian, Latin script)
55
+ # - +*+ (wildcard)
56
+ #
57
+ # == Internal Representation
58
+ #
59
+ # Internally, quality values are stored as integers in the range 0-1000
60
+ # (multiplied by 1000) to avoid floating-point comparison issues. This is
61
+ # an implementation detail and does not affect the public API.
62
+ #
63
+ # == Thread Safety
64
+ #
65
+ # Parser instances are immutable after initialization. The +languages_range+
66
+ # hash is frozen, making Parser instances safe to share between threads.
67
+ #
68
+ # == Error Handling
69
+ #
70
+ # The parser is lenient by design to handle real-world headers that may
71
+ # not strictly conform to specifications:
72
+ #
73
+ # - Invalid language tags are silently skipped
74
+ # - Invalid quality values cause the entry to be skipped
75
+ # - Empty or +nil+ input results in an empty languages_range
76
+ # - Malformed entries (missing separators, etc.) are skipped
77
+ #
78
+ # However, the parser is strict about input types: only +String+ or +nil+
79
+ # are accepted for the +field+ parameter.
80
+ #
81
+ # @example Basic usage
82
+ # parser = AcceptLanguage::Parser.new("da, en-GB;q=0.8, en;q=0.7")
83
+ # parser.match(:en, :da)
84
+ # # => :da
85
+ #
86
+ # @example Inspecting parsed languages
87
+ # parser = AcceptLanguage::Parser.new("fr-CH;q=0.9, fr;q=0.8, en;q=0.7")
88
+ # parser.languages_range
89
+ # # => {"fr-ch"=>900, "fr"=>800, "en"=>700}
90
+ #
91
+ # @example Handling wildcards
92
+ # parser = AcceptLanguage::Parser.new("de, *;q=0.5")
93
+ # parser.match(:ja, :de)
94
+ # # => :de
9
95
  #
10
- # @example
11
- # parser = Parser.new("da, en-GB;q=0.8, en;q=0.7")
12
- # parser.match(:en, :da) # => :da
96
+ # @example Handling exclusions
97
+ # parser = AcceptLanguage::Parser.new("*, en;q=0")
98
+ # parser.match(:en, :fr)
99
+ # # => :fr
13
100
  #
14
- # @see https://tools.ietf.org/html/rfc2616#section-14.4
101
+ # @see AcceptLanguage.parse
102
+ # @see Matcher
103
+ # @see https://tools.ietf.org/html/rfc2616#section-14.4 RFC 2616 Section 14.4
104
+ # @see https://tools.ietf.org/html/rfc2616#section-3.9 RFC 2616 Section 3.9 (qvalue)
105
+ # @see https://tools.ietf.org/html/bcp47 BCP 47
15
106
  class Parser
107
+ # Default quality value (1.0) scaled to internal integer representation.
108
+ #
109
+ # When a language tag appears without an explicit quality value, it is
110
+ # assigned this default value, indicating maximum preference.
111
+ #
16
112
  # @api private
17
- DEFAULT_QUALITY = "1"
113
+ # @return [Integer] 1000 (representing q=1.0)
114
+ DEFAULT_QUALITY = 1_000
115
+
116
+ # The ASCII digit zero character, used in quality value parsing.
117
+ #
118
+ # @api private
119
+ # @return [String] "0"
120
+ DIGIT_ZERO = "0"
121
+
122
+ # The decimal point character, used in quality value parsing.
123
+ #
124
+ # @api private
125
+ # @return [String] "."
126
+ DOT = "."
127
+
128
+ # Error message raised when +field+ argument is not a String or nil.
129
+ #
130
+ # This guards against accidental non-String values being passed to the
131
+ # parser, which would cause unexpected behavior during parsing.
132
+ #
133
+ # @api private
134
+ # @return [String]
135
+ FIELD_TYPE_ERROR = "Field must be a String or nil"
136
+
137
+ # The comma character that separates language-range entries in the
138
+ # Accept-Language header field value.
139
+ #
18
140
  # @api private
141
+ # @return [String] ","
19
142
  SEPARATOR = ","
143
+
144
+ # The space character, stripped during parsing as whitespace around
145
+ # separators is optional per RFC 2616.
146
+ #
20
147
  # @api private
148
+ # @return [String] " "
21
149
  SPACE = " "
150
+
151
+ # The suffix that precedes quality values in language-range entries.
152
+ # A language entry with a quality value has the form: +langtag;q=qvalue+
153
+ #
22
154
  # @api private
155
+ # @return [String] ";q="
23
156
  SUFFIX = ";q="
157
+
158
+ # Regular expression pattern for validating quality values.
159
+ #
160
+ # Implements RFC 2616 Section 3.9 qvalue syntax:
161
+ #
162
+ # qvalue = ( "0" [ "." 0*3DIGIT ] ) | ( "1" [ "." 0*3("0") ] )
163
+ #
164
+ # This pattern accepts:
165
+ # - +0+ or +1+ (integer form)
166
+ # - +0.+ followed by 1-3 digits (e.g., +0.5+, +0.75+, +0.123+)
167
+ # - +1.+ followed by 1-3 zeros (e.g., +1.0+, +1.00+, +1.000+)
168
+ #
24
169
  # @api private
25
- QVALUE_PATTERN = /\A(?:0(?:\.[0-9]{1,3})?|1(?:\.0{1,3})?|\.[0-9]{1,3})\z/
170
+ # @return [Regexp]
171
+ #
172
+ # @example Valid matches
173
+ # QVALUE_PATTERN.match?("0") # => true
174
+ # QVALUE_PATTERN.match?("0.5") # => true
175
+ # QVALUE_PATTERN.match?("0.123") # => true
176
+ # QVALUE_PATTERN.match?("1") # => true
177
+ # QVALUE_PATTERN.match?("1.0") # => true
178
+ # QVALUE_PATTERN.match?("1.000") # => true
179
+ #
180
+ # @example Invalid (no match)
181
+ # QVALUE_PATTERN.match?("0.1234") # => false (too many decimals)
182
+ # QVALUE_PATTERN.match?("1.5") # => false (> 1)
183
+ # QVALUE_PATTERN.match?("2") # => false (> 1)
184
+ # QVALUE_PATTERN.match?(".5") # => false (missing leading digit)
185
+ # QVALUE_PATTERN.match?("1.001") # => false (1.x must be zeros only)
186
+ QVALUE_PATTERN = /\A(?:0(?:\.[0-9]{1,3})?|1(?:\.0{1,3})?)\z/
187
+
188
+ # Regular expression pattern for validating language tags.
189
+ #
190
+ # Supports BCP 47 (RFC 5646) language tags, which supersede the RFC 1766
191
+ # tags referenced in RFC 2616 Section 3.10.
192
+ #
193
+ # == Pattern Structure
194
+ #
195
+ # The pattern accepts either:
196
+ # - The wildcard character +*+
197
+ # - A primary subtag (1-8 ALPHA) followed by zero or more subtags
198
+ # (each 1-8 ALPHANUM, preceded by a hyphen)
199
+ #
200
+ # == BCP 47 vs RFC 1766
201
+ #
202
+ # RFC 2616 Section 3.10 references RFC 1766, which only allowed alphabetic
203
+ # characters in subtags. However, BCP 47 (the current standard) permits
204
+ # alphanumeric subtags to support:
205
+ #
206
+ # - Year-based variant subtags (e.g., +1996+ in +de-CH-1996+)
207
+ # - Numeric region codes (e.g., +419+ for Latin America)
208
+ # - Script subtags with numbers (rare but valid)
209
+ #
210
+ # This implementation follows BCP 47 for maximum compatibility with
211
+ # modern language tags.
212
+ #
26
213
  # @api private
214
+ # @return [Regexp]
215
+ #
216
+ # @example Valid language tags
217
+ # LANGTAG_PATTERN.match?("en") # => true
218
+ # LANGTAG_PATTERN.match?("en-US") # => true
219
+ # LANGTAG_PATTERN.match?("zh-Hant-TW") # => true
220
+ # LANGTAG_PATTERN.match?("de-CH-1996") # => true
221
+ # LANGTAG_PATTERN.match?("*") # => true
222
+ #
223
+ # @example Invalid language tags
224
+ # LANGTAG_PATTERN.match?("") # => false (empty)
225
+ # LANGTAG_PATTERN.match?("toolongprimary") # => false (> 8 chars)
226
+ # LANGTAG_PATTERN.match?("en_US") # => false (underscore)
227
+ # LANGTAG_PATTERN.match?("123") # => false (numeric primary)
27
228
  LANGTAG_PATTERN = /\A(?:\*|[a-zA-Z]{1,8}(?:-[a-zA-Z0-9]{1,8})*)\z/
28
229
 
230
+ # The parsed language preferences extracted from the Accept-Language header.
231
+ #
232
+ # This hash maps downcased language tags to their quality values (scaled
233
+ # to integers 0-1000). Tags are stored in lowercase for case-insensitive
234
+ # matching.
235
+ #
29
236
  # @api private
30
- # @return [Hash<String, BigDecimal>] Parsed language tags and their quality values
237
+ # @return [Hash{String => Integer}] language tags mapped to quality values
238
+ #
239
+ # @example
240
+ # parser = Parser.new("en-GB;q=0.8, fr;q=0.9, de")
241
+ # parser.languages_range
242
+ # # => {"en-gb"=>800, "fr"=>900, "de"=>1000}
31
243
  attr_reader :languages_range
32
244
 
33
- # Initializes a new Parser instance by importing and processing the given Accept-Language header field.
245
+ # Creates a new Parser instance by parsing the given Accept-Language
246
+ # header field value.
34
247
  #
35
- # @param field [String] The Accept-Language header field to parse.
248
+ # The parser extracts all valid language-range entries from the header,
249
+ # validates their language tags and quality values, and stores them for
250
+ # subsequent matching operations.
251
+ #
252
+ # == Parsing Process
253
+ #
254
+ # 1. Validate that input is a String or nil
255
+ # 2. Convert nil to empty string
256
+ # 3. Normalize to lowercase for case-insensitive matching
257
+ # 4. Remove all spaces (whitespace is insignificant per RFC 2616)
258
+ # 5. Split on commas to get individual entries
259
+ # 6. For each entry:
260
+ # a. Split on +;q=+ to separate tag from quality
261
+ # b. Validate the language tag
262
+ # c. Validate and parse the quality value (default 1.0 if absent)
263
+ # d. Store valid entries in the languages_range hash
264
+ #
265
+ # @param field [String, nil] the Accept-Language header field value.
266
+ # Common sources include +request.env["HTTP_ACCEPT_LANGUAGE"]+ in Rack
267
+ # applications or +request.headers["Accept-Language"]+ in Rails.
268
+ # When +nil+ is passed (header absent), it is treated as an empty string.
269
+ #
270
+ # @raise [TypeError] if +field+ is neither a String nor nil
271
+ #
272
+ # @example Standard header
273
+ # Parser.new("en-US, en;q=0.9, fr;q=0.8")
274
+ #
275
+ # @example With wildcard
276
+ # Parser.new("fr-FR, fr;q=0.9, *;q=0.5")
277
+ #
278
+ # @example With exclusion
279
+ # Parser.new("*, en;q=0")
280
+ #
281
+ # @example Empty or nil input
282
+ # Parser.new("") # languages_range => {}
283
+ # Parser.new(nil) # languages_range => {}
284
+ #
285
+ # @example Malformed input (invalid entries skipped)
286
+ # Parser.new("en, invalid;;q=0.5, fr;q=0.8")
287
+ # # languages_range => {"en"=>1000, "fr"=>800}
288
+ #
289
+ # @see #languages_range
36
290
  def initialize(field)
291
+ raise ::TypeError, FIELD_TYPE_ERROR unless field.nil? || field.is_a?(::String)
292
+
37
293
  @languages_range = import(field)
38
294
  end
39
295
 
40
- # Finds the best matching language from available options based on user preferences.
41
- # Considers quality values and language tag specificity (e.g., "en-US" vs "en").
296
+ # Finds the best matching language from the available options based on
297
+ # the user's preferences expressed in the Accept-Language header.
298
+ #
299
+ # This method delegates to {Matcher} to perform the actual matching,
300
+ # which considers:
301
+ #
302
+ # 1. **Quality values**: Higher q-values indicate stronger preference
303
+ # 2. **Declaration order**: When q-values are equal, earlier declaration wins
304
+ # 3. **Prefix matching**: +en+ matches +en-US+, +en-GB+, etc.
305
+ # 4. **Wildcards**: +*+ matches any language not explicitly listed
306
+ # 5. **Exclusions**: +q=0+ explicitly excludes a language
42
307
  #
43
- # @param available_langtags [Array<String, Symbol>] Languages supported by your application
44
- # @return [String, Symbol, nil] Best matching language tag or nil if no match found
308
+ # == Matching Algorithm
45
309
  #
46
- # @example Match against specific language options
47
- # parser.match("en", "fr", "de") # => "en" if English is preferred
48
- # @example Match with region-specific tags
49
- # parser.match("en-US", "en-GB", "fr") # => "en-GB" if British English is preferred
310
+ # 1. Remove any available languages that are explicitly excluded (+q=0+)
311
+ # 2. Iterate through preferred languages in descending quality order
312
+ # 3. For each preferred language, find the first available language that:
313
+ # - Exactly matches the preferred tag, OR
314
+ # - Has the preferred tag as a prefix (followed by a hyphen)
315
+ # 4. For wildcards, match any available language not already matched
316
+ # 5. Return the first match found, or +nil+ if no match exists
317
+ #
318
+ # == Return Value Preservation
319
+ #
320
+ # The method returns the language tag exactly as provided in the
321
+ # +available_langtags+ argument, preserving the original case. This is
322
+ # important for direct use with +I18n.locale+ and similar APIs.
323
+ #
324
+ # @param available_langtags [Array<Symbol>] the languages your
325
+ # application supports. These are typically your +I18n.available_locales+
326
+ # or a similar list.
327
+ #
328
+ # @return [Symbol, nil] the best matching language tag from the
329
+ # available options, in its original form as passed to this method.
330
+ # Returns +nil+ if no acceptable match is found.
331
+ #
332
+ # @raise [TypeError] if any element in +available_langtags+ is not a Symbol
333
+ #
334
+ # @example Basic matching
335
+ # parser = Parser.new("da, en-GB;q=0.8, en;q=0.7")
336
+ # parser.match(:en, :da)
337
+ # # => :da
338
+ #
339
+ # @example Regional variant matching
340
+ # parser = Parser.new("en-GB, en;q=0.9")
341
+ # parser.match(:en, :"en-GB", :"en-US")
342
+ # # => :"en-GB"
343
+ #
344
+ # @example Prefix matching
345
+ # parser = Parser.new("en")
346
+ # parser.match(:"en-US", :"en-GB")
347
+ # # => :"en-US" (first match wins)
348
+ #
349
+ # @example No match found
350
+ # parser = Parser.new("ja, zh")
351
+ # parser.match(:en, :fr, :de)
352
+ # # => nil
353
+ #
354
+ # @example Wildcard matching
355
+ # parser = Parser.new("en, *;q=0.5")
356
+ # parser.match(:fr)
357
+ # # => :fr (matched by wildcard)
358
+ #
359
+ # @example Exclusion
360
+ # parser = Parser.new("*, en;q=0")
361
+ # parser.match(:en, :fr)
362
+ # # => :fr (en is excluded)
363
+ #
364
+ # @example With I18n
365
+ # parser = Parser.new(request.env["HTTP_ACCEPT_LANGUAGE"])
366
+ # locale = parser.match(*I18n.available_locales) || I18n.default_locale
367
+ # I18n.locale = locale
368
+ #
369
+ # @see Matcher
370
+ # @see https://tools.ietf.org/html/rfc2616#section-14.4 RFC 2616 Section 14.4
50
371
  def match(*available_langtags)
51
372
  Matcher.new(**languages_range).call(*available_langtags)
52
373
  end
53
374
 
54
375
  private
55
376
 
377
+ # Parses the Accept-Language header field value into a hash of language
378
+ # tags and their quality values.
379
+ #
380
+ # @param field [String, nil] the raw header field value
381
+ # @return [Hash{String => Integer}] downcased language tags mapped to
382
+ # quality values (0-1000)
56
383
  def import(field)
57
- "#{field}".delete(SPACE).split(SEPARATOR).inject({}) do |hash, lang|
384
+ "#{field}".downcase.delete(SPACE).split(SEPARATOR).each_with_object({}) do |lang, hash|
58
385
  tag, quality = lang.split(SUFFIX)
59
- next hash unless valid_tag?(tag)
386
+ next unless valid_tag?(tag)
60
387
 
61
- quality = DEFAULT_QUALITY if quality.nil?
62
- next hash unless valid_quality?(quality)
388
+ quality_value = parse_quality(quality)
389
+ next if quality_value.nil?
63
390
 
64
- hash.merge(tag => BigDecimal(quality))
391
+ hash[tag] = quality_value
65
392
  end
66
393
  end
67
394
 
395
+ # Parses and validates a quality value string.
396
+ #
397
+ # @param quality [String, nil] the quality value string (without the ";q=" prefix)
398
+ # @return [Integer, nil] the quality value scaled to 0-1000, or nil if invalid
399
+ def parse_quality(quality)
400
+ return DEFAULT_QUALITY if quality.nil?
401
+ return unless valid_quality?(quality)
402
+
403
+ qvalue_to_integer(quality)
404
+ end
405
+
406
+ # Converts a validated qvalue string to an integer in the range 0-1000.
407
+ #
408
+ # The conversion algorithm:
409
+ # 1. Remove the decimal point (if present)
410
+ # 2. Pad with zeros on the right to 4 characters
411
+ # 3. Convert to integer
412
+ #
413
+ # This effectively multiplies the decimal value by 1000, avoiding
414
+ # floating-point arithmetic entirely.
415
+ #
416
+ # @param quality [String] a validated qvalue string (e.g., "1", "0.8", "0.123")
417
+ # @return [Integer] the quality value scaled to 0-1000
418
+ #
419
+ # @example Conversion examples
420
+ # qvalue_to_integer("1") # => 1000 ("1" -> "1000" -> 1000)
421
+ # qvalue_to_integer("1.0") # => 1000 ("10" -> "1000" -> 1000)
422
+ # qvalue_to_integer("0.8") # => 800 ("08" -> "0800" -> 800)
423
+ # qvalue_to_integer("0.85") # => 850 ("085" -> "0850" -> 850)
424
+ # qvalue_to_integer("0.123") # => 123 ("0123" -> "0123" -> 123)
425
+ # qvalue_to_integer("0") # => 0 ("0" -> "0000" -> 0)
426
+ def qvalue_to_integer(quality)
427
+ quality.delete(DOT).ljust(4, DIGIT_ZERO).to_i
428
+ end
429
+
430
+ # Validates a quality value string against RFC 2616 Section 3.9.
431
+ #
432
+ # @param quality [String] the quality value to validate
433
+ # @return [Boolean] true if the quality value is valid
68
434
  def valid_quality?(quality)
69
435
  quality.match?(QVALUE_PATTERN)
70
436
  end
71
437
 
438
+ # Validates a language tag against BCP 47.
439
+ #
440
+ # @param tag [String, nil] the language tag to validate
441
+ # @return [Boolean] true if the tag is valid (including wildcard)
72
442
  def valid_tag?(tag)
73
443
  return false if tag.nil?
74
444