accept_language 2.1.1 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +110 -40
- data/lib/accept_language/matcher.rb +315 -35
- data/lib/accept_language/parser.rb +394 -37
- data/lib/accept_language.rb +252 -12
- metadata +10 -20
|
@@ -1,87 +1,444 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "bigdecimal"
|
|
4
|
-
|
|
5
3
|
module AcceptLanguage
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
#
|
|
4
|
+
# = Accept-Language Header Parser
|
|
5
|
+
#
|
|
6
|
+
# Parser handles the parsing of +Accept-Language+ HTTP header field values
|
|
7
|
+
# as defined in RFC 2616 Section 14.4. It extracts language tags and their
|
|
8
|
+
# associated quality values (q-values), validates them according to the
|
|
9
|
+
# specification, and provides matching capabilities against application-
|
|
10
|
+
# supported languages.
|
|
11
|
+
#
|
|
12
|
+
# == Overview
|
|
13
|
+
#
|
|
14
|
+
# The +Accept-Language+ header field value consists of a comma-separated
|
|
15
|
+
# list of language ranges, each optionally accompanied by a quality value
|
|
16
|
+
# indicating relative preference. This parser:
|
|
17
|
+
#
|
|
18
|
+
# 1. Tokenizes the header into individual language-range entries
|
|
19
|
+
# 2. Extracts and validates language tags per BCP 47
|
|
20
|
+
# 3. Extracts and validates quality values per RFC 2616 Section 3.9
|
|
21
|
+
# 4. Stores valid entries for subsequent matching operations
|
|
22
|
+
#
|
|
23
|
+
# == Quality Values (q-values)
|
|
24
|
+
#
|
|
25
|
+
# Quality values express the user's relative preference for a language.
|
|
26
|
+
# Per RFC 2616 Section 3.9, the syntax is:
|
|
27
|
+
#
|
|
28
|
+
# qvalue = ( "0" [ "." 0*3DIGIT ] ) | ( "1" [ "." 0*3("0") ] )
|
|
29
|
+
#
|
|
30
|
+
# This means:
|
|
31
|
+
# - Values range from +0.000+ to +1.000+
|
|
32
|
+
# - Maximum of 3 decimal places
|
|
33
|
+
# - +0+ indicates "not acceptable"
|
|
34
|
+
# - +1+ indicates "most preferred" (default when omitted)
|
|
35
|
+
#
|
|
36
|
+
# Examples of valid q-values: +0+, +0.5+, +0.75+, +0.123+, +1+, +1.0+, +1.000+
|
|
37
|
+
#
|
|
38
|
+
# Examples of invalid q-values (silently ignored): +1.5+, +0.1234+, +-0.5+, +.5+
|
|
39
|
+
#
|
|
40
|
+
# == Language Tags
|
|
41
|
+
#
|
|
42
|
+
# Language tags follow the BCP 47 specification (RFC 5646), which supersedes
|
|
43
|
+
# the RFC 1766 reference in RFC 2616 Section 3.10. Valid tags consist of:
|
|
44
|
+
#
|
|
45
|
+
# - A primary subtag of 1-8 alphabetic characters (e.g., +en+, +zh+, +ast+)
|
|
46
|
+
# - Zero or more subtags of 1-8 alphanumeric characters, separated by hyphens
|
|
47
|
+
# - The special wildcard tag +*+ (matches any language)
|
|
48
|
+
#
|
|
49
|
+
# Examples of valid language tags:
|
|
50
|
+
# - +en+ (English)
|
|
51
|
+
# - +en-US+ (English, United States)
|
|
52
|
+
# - +zh-Hant-TW+ (Chinese, Traditional script, Taiwan)
|
|
53
|
+
# - +de-CH-1996+ (German, Switzerland, 1996 orthography)
|
|
54
|
+
# - +sr-Latn+ (Serbian, Latin script)
|
|
55
|
+
# - +*+ (wildcard)
|
|
56
|
+
#
|
|
57
|
+
# == Internal Representation
|
|
58
|
+
#
|
|
59
|
+
# Internally, quality values are stored as integers in the range 0-1000
|
|
60
|
+
# (multiplied by 1000) to avoid floating-point comparison issues. This is
|
|
61
|
+
# an implementation detail and does not affect the public API.
|
|
62
|
+
#
|
|
63
|
+
# == Thread Safety
|
|
64
|
+
#
|
|
65
|
+
# Parser instances are immutable after initialization. The +languages_range+
|
|
66
|
+
# hash is frozen, making Parser instances safe to share between threads.
|
|
67
|
+
#
|
|
68
|
+
# == Error Handling
|
|
69
|
+
#
|
|
70
|
+
# The parser is lenient by design to handle real-world headers that may
|
|
71
|
+
# not strictly conform to specifications:
|
|
72
|
+
#
|
|
73
|
+
# - Invalid language tags are silently skipped
|
|
74
|
+
# - Invalid quality values cause the entry to be skipped
|
|
75
|
+
# - Empty or +nil+ input results in an empty languages_range
|
|
76
|
+
# - Malformed entries (missing separators, etc.) are skipped
|
|
77
|
+
#
|
|
78
|
+
# However, the parser is strict about input types: only +String+ or +nil+
|
|
79
|
+
# are accepted for the +field+ parameter.
|
|
80
|
+
#
|
|
81
|
+
# @example Basic usage
|
|
82
|
+
# parser = AcceptLanguage::Parser.new("da, en-GB;q=0.8, en;q=0.7")
|
|
83
|
+
# parser.match(:en, :da)
|
|
84
|
+
# # => :da
|
|
85
|
+
#
|
|
86
|
+
# @example Inspecting parsed languages
|
|
87
|
+
# parser = AcceptLanguage::Parser.new("fr-CH;q=0.9, fr;q=0.8, en;q=0.7")
|
|
88
|
+
# parser.languages_range
|
|
89
|
+
# # => {"fr-ch"=>900, "fr"=>800, "en"=>700}
|
|
90
|
+
#
|
|
91
|
+
# @example Handling wildcards
|
|
92
|
+
# parser = AcceptLanguage::Parser.new("de, *;q=0.5")
|
|
93
|
+
# parser.match(:ja, :de)
|
|
94
|
+
# # => :de
|
|
9
95
|
#
|
|
10
|
-
# @example
|
|
11
|
-
# parser = Parser.new("
|
|
12
|
-
# parser.match(:en, :
|
|
96
|
+
# @example Handling exclusions
|
|
97
|
+
# parser = AcceptLanguage::Parser.new("*, en;q=0")
|
|
98
|
+
# parser.match(:en, :fr)
|
|
99
|
+
# # => :fr
|
|
13
100
|
#
|
|
14
|
-
# @see
|
|
101
|
+
# @see AcceptLanguage.parse
|
|
102
|
+
# @see Matcher
|
|
103
|
+
# @see https://tools.ietf.org/html/rfc2616#section-14.4 RFC 2616 Section 14.4
|
|
104
|
+
# @see https://tools.ietf.org/html/rfc2616#section-3.9 RFC 2616 Section 3.9 (qvalue)
|
|
105
|
+
# @see https://tools.ietf.org/html/bcp47 BCP 47
|
|
15
106
|
class Parser
|
|
107
|
+
# Default quality value (1.0) scaled to internal integer representation.
|
|
108
|
+
#
|
|
109
|
+
# When a language tag appears without an explicit quality value, it is
|
|
110
|
+
# assigned this default value, indicating maximum preference.
|
|
111
|
+
#
|
|
16
112
|
# @api private
|
|
17
|
-
|
|
113
|
+
# @return [Integer] 1000 (representing q=1.0)
|
|
114
|
+
DEFAULT_QUALITY = 1_000
|
|
115
|
+
|
|
116
|
+
# The ASCII digit zero character, used in quality value parsing.
|
|
117
|
+
#
|
|
118
|
+
# @api private
|
|
119
|
+
# @return [String] "0"
|
|
120
|
+
DIGIT_ZERO = "0"
|
|
121
|
+
|
|
122
|
+
# The decimal point character, used in quality value parsing.
|
|
123
|
+
#
|
|
124
|
+
# @api private
|
|
125
|
+
# @return [String] "."
|
|
126
|
+
DOT = "."
|
|
127
|
+
|
|
128
|
+
# Error message raised when +field+ argument is not a String or nil.
|
|
129
|
+
#
|
|
130
|
+
# This guards against accidental non-String values being passed to the
|
|
131
|
+
# parser, which would cause unexpected behavior during parsing.
|
|
132
|
+
#
|
|
133
|
+
# @api private
|
|
134
|
+
# @return [String]
|
|
135
|
+
FIELD_TYPE_ERROR = "Field must be a String or nil"
|
|
136
|
+
|
|
137
|
+
# The comma character that separates language-range entries in the
|
|
138
|
+
# Accept-Language header field value.
|
|
139
|
+
#
|
|
18
140
|
# @api private
|
|
141
|
+
# @return [String] ","
|
|
19
142
|
SEPARATOR = ","
|
|
143
|
+
|
|
144
|
+
# The space character, stripped during parsing as whitespace around
|
|
145
|
+
# separators is optional per RFC 2616.
|
|
146
|
+
#
|
|
20
147
|
# @api private
|
|
148
|
+
# @return [String] " "
|
|
21
149
|
SPACE = " "
|
|
150
|
+
|
|
151
|
+
# The suffix that precedes quality values in language-range entries.
|
|
152
|
+
# A language entry with a quality value has the form: +langtag;q=qvalue+
|
|
153
|
+
#
|
|
22
154
|
# @api private
|
|
155
|
+
# @return [String] ";q="
|
|
23
156
|
SUFFIX = ";q="
|
|
24
|
-
|
|
25
|
-
#
|
|
157
|
+
|
|
158
|
+
# Regular expression pattern for validating quality values.
|
|
159
|
+
#
|
|
160
|
+
# Implements RFC 2616 Section 3.9 qvalue syntax:
|
|
161
|
+
#
|
|
26
162
|
# qvalue = ( "0" [ "." 0*3DIGIT ] ) | ( "1" [ "." 0*3("0") ] )
|
|
163
|
+
#
|
|
164
|
+
# This pattern accepts:
|
|
165
|
+
# - +0+ or +1+ (integer form)
|
|
166
|
+
# - +0.+ followed by 1-3 digits (e.g., +0.5+, +0.75+, +0.123+)
|
|
167
|
+
# - +1.+ followed by 1-3 zeros (e.g., +1.0+, +1.00+, +1.000+)
|
|
168
|
+
#
|
|
169
|
+
# @api private
|
|
170
|
+
# @return [Regexp]
|
|
171
|
+
#
|
|
172
|
+
# @example Valid matches
|
|
173
|
+
# QVALUE_PATTERN.match?("0") # => true
|
|
174
|
+
# QVALUE_PATTERN.match?("0.5") # => true
|
|
175
|
+
# QVALUE_PATTERN.match?("0.123") # => true
|
|
176
|
+
# QVALUE_PATTERN.match?("1") # => true
|
|
177
|
+
# QVALUE_PATTERN.match?("1.0") # => true
|
|
178
|
+
# QVALUE_PATTERN.match?("1.000") # => true
|
|
179
|
+
#
|
|
180
|
+
# @example Invalid (no match)
|
|
181
|
+
# QVALUE_PATTERN.match?("0.1234") # => false (too many decimals)
|
|
182
|
+
# QVALUE_PATTERN.match?("1.5") # => false (> 1)
|
|
183
|
+
# QVALUE_PATTERN.match?("2") # => false (> 1)
|
|
184
|
+
# QVALUE_PATTERN.match?(".5") # => false (missing leading digit)
|
|
185
|
+
# QVALUE_PATTERN.match?("1.001") # => false (1.x must be zeros only)
|
|
27
186
|
QVALUE_PATTERN = /\A(?:0(?:\.[0-9]{1,3})?|1(?:\.0{1,3})?)\z/
|
|
187
|
+
|
|
188
|
+
# Regular expression pattern for validating language tags.
|
|
189
|
+
#
|
|
190
|
+
# Supports BCP 47 (RFC 5646) language tags, which supersede the RFC 1766
|
|
191
|
+
# tags referenced in RFC 2616 Section 3.10.
|
|
192
|
+
#
|
|
193
|
+
# == Pattern Structure
|
|
194
|
+
#
|
|
195
|
+
# The pattern accepts either:
|
|
196
|
+
# - The wildcard character +*+
|
|
197
|
+
# - A primary subtag (1-8 ALPHA) followed by zero or more subtags
|
|
198
|
+
# (each 1-8 ALPHANUM, preceded by a hyphen)
|
|
199
|
+
#
|
|
200
|
+
# == BCP 47 vs RFC 1766
|
|
201
|
+
#
|
|
202
|
+
# RFC 2616 Section 3.10 references RFC 1766, which only allowed alphabetic
|
|
203
|
+
# characters in subtags. However, BCP 47 (the current standard) permits
|
|
204
|
+
# alphanumeric subtags to support:
|
|
205
|
+
#
|
|
206
|
+
# - Year-based variant subtags (e.g., +1996+ in +de-CH-1996+)
|
|
207
|
+
# - Numeric region codes (e.g., +419+ for Latin America)
|
|
208
|
+
# - Script subtags with numbers (rare but valid)
|
|
209
|
+
#
|
|
210
|
+
# This implementation follows BCP 47 for maximum compatibility with
|
|
211
|
+
# modern language tags.
|
|
212
|
+
#
|
|
28
213
|
# @api private
|
|
29
|
-
#
|
|
214
|
+
# @return [Regexp]
|
|
30
215
|
#
|
|
31
|
-
#
|
|
32
|
-
#
|
|
33
|
-
#
|
|
34
|
-
#
|
|
216
|
+
# @example Valid language tags
|
|
217
|
+
# LANGTAG_PATTERN.match?("en") # => true
|
|
218
|
+
# LANGTAG_PATTERN.match?("en-US") # => true
|
|
219
|
+
# LANGTAG_PATTERN.match?("zh-Hant-TW") # => true
|
|
220
|
+
# LANGTAG_PATTERN.match?("de-CH-1996") # => true
|
|
221
|
+
# LANGTAG_PATTERN.match?("*") # => true
|
|
35
222
|
#
|
|
36
|
-
#
|
|
37
|
-
#
|
|
38
|
-
#
|
|
39
|
-
#
|
|
223
|
+
# @example Invalid language tags
|
|
224
|
+
# LANGTAG_PATTERN.match?("") # => false (empty)
|
|
225
|
+
# LANGTAG_PATTERN.match?("toolongprimary") # => false (> 8 chars)
|
|
226
|
+
# LANGTAG_PATTERN.match?("en_US") # => false (underscore)
|
|
227
|
+
# LANGTAG_PATTERN.match?("123") # => false (numeric primary)
|
|
40
228
|
LANGTAG_PATTERN = /\A(?:\*|[a-zA-Z]{1,8}(?:-[a-zA-Z0-9]{1,8})*)\z/
|
|
41
229
|
|
|
230
|
+
# The parsed language preferences extracted from the Accept-Language header.
|
|
231
|
+
#
|
|
232
|
+
# This hash maps downcased language tags to their quality values (scaled
|
|
233
|
+
# to integers 0-1000). Tags are stored in lowercase for case-insensitive
|
|
234
|
+
# matching.
|
|
235
|
+
#
|
|
42
236
|
# @api private
|
|
43
|
-
# @return [Hash
|
|
237
|
+
# @return [Hash{String => Integer}] language tags mapped to quality values
|
|
238
|
+
#
|
|
239
|
+
# @example
|
|
240
|
+
# parser = Parser.new("en-GB;q=0.8, fr;q=0.9, de")
|
|
241
|
+
# parser.languages_range
|
|
242
|
+
# # => {"en-gb"=>800, "fr"=>900, "de"=>1000}
|
|
44
243
|
attr_reader :languages_range
|
|
45
244
|
|
|
46
|
-
#
|
|
245
|
+
# Creates a new Parser instance by parsing the given Accept-Language
|
|
246
|
+
# header field value.
|
|
47
247
|
#
|
|
48
|
-
#
|
|
248
|
+
# The parser extracts all valid language-range entries from the header,
|
|
249
|
+
# validates their language tags and quality values, and stores them for
|
|
250
|
+
# subsequent matching operations.
|
|
251
|
+
#
|
|
252
|
+
# == Parsing Process
|
|
253
|
+
#
|
|
254
|
+
# 1. Validate that input is a String or nil
|
|
255
|
+
# 2. Convert nil to empty string
|
|
256
|
+
# 3. Normalize to lowercase for case-insensitive matching
|
|
257
|
+
# 4. Remove all spaces (whitespace is insignificant per RFC 2616)
|
|
258
|
+
# 5. Split on commas to get individual entries
|
|
259
|
+
# 6. For each entry:
|
|
260
|
+
# a. Split on +;q=+ to separate tag from quality
|
|
261
|
+
# b. Validate the language tag
|
|
262
|
+
# c. Validate and parse the quality value (default 1.0 if absent)
|
|
263
|
+
# d. Store valid entries in the languages_range hash
|
|
264
|
+
#
|
|
265
|
+
# @param field [String, nil] the Accept-Language header field value.
|
|
266
|
+
# Common sources include +request.env["HTTP_ACCEPT_LANGUAGE"]+ in Rack
|
|
267
|
+
# applications or +request.headers["Accept-Language"]+ in Rails.
|
|
268
|
+
# When +nil+ is passed (header absent), it is treated as an empty string.
|
|
269
|
+
#
|
|
270
|
+
# @raise [TypeError] if +field+ is neither a String nor nil
|
|
271
|
+
#
|
|
272
|
+
# @example Standard header
|
|
273
|
+
# Parser.new("en-US, en;q=0.9, fr;q=0.8")
|
|
274
|
+
#
|
|
275
|
+
# @example With wildcard
|
|
276
|
+
# Parser.new("fr-FR, fr;q=0.9, *;q=0.5")
|
|
277
|
+
#
|
|
278
|
+
# @example With exclusion
|
|
279
|
+
# Parser.new("*, en;q=0")
|
|
280
|
+
#
|
|
281
|
+
# @example Empty or nil input
|
|
282
|
+
# Parser.new("") # languages_range => {}
|
|
283
|
+
# Parser.new(nil) # languages_range => {}
|
|
284
|
+
#
|
|
285
|
+
# @example Malformed input (invalid entries skipped)
|
|
286
|
+
# Parser.new("en, invalid;;q=0.5, fr;q=0.8")
|
|
287
|
+
# # languages_range => {"en"=>1000, "fr"=>800}
|
|
288
|
+
#
|
|
289
|
+
# @see #languages_range
|
|
49
290
|
def initialize(field)
|
|
291
|
+
raise ::TypeError, FIELD_TYPE_ERROR unless field.nil? || field.is_a?(::String)
|
|
292
|
+
|
|
50
293
|
@languages_range = import(field)
|
|
51
294
|
end
|
|
52
295
|
|
|
53
|
-
# Finds the best matching language from available options based on
|
|
54
|
-
#
|
|
296
|
+
# Finds the best matching language from the available options based on
|
|
297
|
+
# the user's preferences expressed in the Accept-Language header.
|
|
298
|
+
#
|
|
299
|
+
# This method delegates to {Matcher} to perform the actual matching,
|
|
300
|
+
# which considers:
|
|
301
|
+
#
|
|
302
|
+
# 1. **Quality values**: Higher q-values indicate stronger preference
|
|
303
|
+
# 2. **Declaration order**: When q-values are equal, earlier declaration wins
|
|
304
|
+
# 3. **Prefix matching**: +en+ matches +en-US+, +en-GB+, etc.
|
|
305
|
+
# 4. **Wildcards**: +*+ matches any language not explicitly listed
|
|
306
|
+
# 5. **Exclusions**: +q=0+ explicitly excludes a language
|
|
55
307
|
#
|
|
56
|
-
#
|
|
57
|
-
# @return [String, Symbol, nil] Best matching language tag or nil if no match found
|
|
308
|
+
# == Matching Algorithm
|
|
58
309
|
#
|
|
59
|
-
#
|
|
60
|
-
#
|
|
61
|
-
#
|
|
62
|
-
#
|
|
310
|
+
# 1. Remove any available languages that are explicitly excluded (+q=0+)
|
|
311
|
+
# 2. Iterate through preferred languages in descending quality order
|
|
312
|
+
# 3. For each preferred language, find the first available language that:
|
|
313
|
+
# - Exactly matches the preferred tag, OR
|
|
314
|
+
# - Has the preferred tag as a prefix (followed by a hyphen)
|
|
315
|
+
# 4. For wildcards, match any available language not already matched
|
|
316
|
+
# 5. Return the first match found, or +nil+ if no match exists
|
|
317
|
+
#
|
|
318
|
+
# == Return Value Preservation
|
|
319
|
+
#
|
|
320
|
+
# The method returns the language tag exactly as provided in the
|
|
321
|
+
# +available_langtags+ argument, preserving the original case. This is
|
|
322
|
+
# important for direct use with +I18n.locale+ and similar APIs.
|
|
323
|
+
#
|
|
324
|
+
# @param available_langtags [Array<Symbol>] the languages your
|
|
325
|
+
# application supports. These are typically your +I18n.available_locales+
|
|
326
|
+
# or a similar list.
|
|
327
|
+
#
|
|
328
|
+
# @return [Symbol, nil] the best matching language tag from the
|
|
329
|
+
# available options, in its original form as passed to this method.
|
|
330
|
+
# Returns +nil+ if no acceptable match is found.
|
|
331
|
+
#
|
|
332
|
+
# @raise [TypeError] if any element in +available_langtags+ is not a Symbol
|
|
333
|
+
#
|
|
334
|
+
# @example Basic matching
|
|
335
|
+
# parser = Parser.new("da, en-GB;q=0.8, en;q=0.7")
|
|
336
|
+
# parser.match(:en, :da)
|
|
337
|
+
# # => :da
|
|
338
|
+
#
|
|
339
|
+
# @example Regional variant matching
|
|
340
|
+
# parser = Parser.new("en-GB, en;q=0.9")
|
|
341
|
+
# parser.match(:en, :"en-GB", :"en-US")
|
|
342
|
+
# # => :"en-GB"
|
|
343
|
+
#
|
|
344
|
+
# @example Prefix matching
|
|
345
|
+
# parser = Parser.new("en")
|
|
346
|
+
# parser.match(:"en-US", :"en-GB")
|
|
347
|
+
# # => :"en-US" (first match wins)
|
|
348
|
+
#
|
|
349
|
+
# @example No match found
|
|
350
|
+
# parser = Parser.new("ja, zh")
|
|
351
|
+
# parser.match(:en, :fr, :de)
|
|
352
|
+
# # => nil
|
|
353
|
+
#
|
|
354
|
+
# @example Wildcard matching
|
|
355
|
+
# parser = Parser.new("en, *;q=0.5")
|
|
356
|
+
# parser.match(:fr)
|
|
357
|
+
# # => :fr (matched by wildcard)
|
|
358
|
+
#
|
|
359
|
+
# @example Exclusion
|
|
360
|
+
# parser = Parser.new("*, en;q=0")
|
|
361
|
+
# parser.match(:en, :fr)
|
|
362
|
+
# # => :fr (en is excluded)
|
|
363
|
+
#
|
|
364
|
+
# @example With I18n
|
|
365
|
+
# parser = Parser.new(request.env["HTTP_ACCEPT_LANGUAGE"])
|
|
366
|
+
# locale = parser.match(*I18n.available_locales) || I18n.default_locale
|
|
367
|
+
# I18n.locale = locale
|
|
368
|
+
#
|
|
369
|
+
# @see Matcher
|
|
370
|
+
# @see https://tools.ietf.org/html/rfc2616#section-14.4 RFC 2616 Section 14.4
|
|
63
371
|
def match(*available_langtags)
|
|
64
372
|
Matcher.new(**languages_range).call(*available_langtags)
|
|
65
373
|
end
|
|
66
374
|
|
|
67
375
|
private
|
|
68
376
|
|
|
377
|
+
# Parses the Accept-Language header field value into a hash of language
|
|
378
|
+
# tags and their quality values.
|
|
379
|
+
#
|
|
380
|
+
# @param field [String, nil] the raw header field value
|
|
381
|
+
# @return [Hash{String => Integer}] downcased language tags mapped to
|
|
382
|
+
# quality values (0-1000)
|
|
69
383
|
def import(field)
|
|
70
|
-
"#{field}".downcase.delete(SPACE).split(SEPARATOR).
|
|
384
|
+
"#{field}".downcase.delete(SPACE).split(SEPARATOR).each_with_object({}) do |lang, hash|
|
|
71
385
|
tag, quality = lang.split(SUFFIX)
|
|
72
|
-
next
|
|
386
|
+
next unless valid_tag?(tag)
|
|
73
387
|
|
|
74
|
-
|
|
75
|
-
next
|
|
388
|
+
quality_value = parse_quality(quality)
|
|
389
|
+
next if quality_value.nil?
|
|
76
390
|
|
|
77
|
-
hash
|
|
391
|
+
hash[tag] = quality_value
|
|
78
392
|
end
|
|
79
393
|
end
|
|
80
394
|
|
|
395
|
+
# Parses and validates a quality value string.
|
|
396
|
+
#
|
|
397
|
+
# @param quality [String, nil] the quality value string (without the ";q=" prefix)
|
|
398
|
+
# @return [Integer, nil] the quality value scaled to 0-1000, or nil if invalid
|
|
399
|
+
def parse_quality(quality)
|
|
400
|
+
return DEFAULT_QUALITY if quality.nil?
|
|
401
|
+
return unless valid_quality?(quality)
|
|
402
|
+
|
|
403
|
+
qvalue_to_integer(quality)
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
# Converts a validated qvalue string to an integer in the range 0-1000.
|
|
407
|
+
#
|
|
408
|
+
# The conversion algorithm:
|
|
409
|
+
# 1. Remove the decimal point (if present)
|
|
410
|
+
# 2. Pad with zeros on the right to 4 characters
|
|
411
|
+
# 3. Convert to integer
|
|
412
|
+
#
|
|
413
|
+
# This effectively multiplies the decimal value by 1000, avoiding
|
|
414
|
+
# floating-point arithmetic entirely.
|
|
415
|
+
#
|
|
416
|
+
# @param quality [String] a validated qvalue string (e.g., "1", "0.8", "0.123")
|
|
417
|
+
# @return [Integer] the quality value scaled to 0-1000
|
|
418
|
+
#
|
|
419
|
+
# @example Conversion examples
|
|
420
|
+
# qvalue_to_integer("1") # => 1000 ("1" -> "1000" -> 1000)
|
|
421
|
+
# qvalue_to_integer("1.0") # => 1000 ("10" -> "1000" -> 1000)
|
|
422
|
+
# qvalue_to_integer("0.8") # => 800 ("08" -> "0800" -> 800)
|
|
423
|
+
# qvalue_to_integer("0.85") # => 850 ("085" -> "0850" -> 850)
|
|
424
|
+
# qvalue_to_integer("0.123") # => 123 ("0123" -> "0123" -> 123)
|
|
425
|
+
# qvalue_to_integer("0") # => 0 ("0" -> "0000" -> 0)
|
|
426
|
+
def qvalue_to_integer(quality)
|
|
427
|
+
quality.delete(DOT).ljust(4, DIGIT_ZERO).to_i
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
# Validates a quality value string against RFC 2616 Section 3.9.
|
|
431
|
+
#
|
|
432
|
+
# @param quality [String] the quality value to validate
|
|
433
|
+
# @return [Boolean] true if the quality value is valid
|
|
81
434
|
def valid_quality?(quality)
|
|
82
435
|
quality.match?(QVALUE_PATTERN)
|
|
83
436
|
end
|
|
84
437
|
|
|
438
|
+
# Validates a language tag against BCP 47.
|
|
439
|
+
#
|
|
440
|
+
# @param tag [String, nil] the language tag to validate
|
|
441
|
+
# @return [Boolean] true if the tag is valid (including wildcard)
|
|
85
442
|
def valid_tag?(tag)
|
|
86
443
|
return false if tag.nil?
|
|
87
444
|
|