ndr_support 3.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +14 -0
- data/.rubocop.yml +27 -0
- data/.ruby-version +1 -0
- data/.travis.yml +22 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Guardfile +16 -0
- data/LICENSE.txt +21 -0
- data/README.md +91 -0
- data/Rakefile +12 -0
- data/code_safety.yml +258 -0
- data/gemfiles/Gemfile.rails32 +6 -0
- data/gemfiles/Gemfile.rails32.lock +108 -0
- data/gemfiles/Gemfile.rails41 +6 -0
- data/gemfiles/Gemfile.rails41.lock +111 -0
- data/gemfiles/Gemfile.rails42 +6 -0
- data/gemfiles/Gemfile.rails42.lock +111 -0
- data/lib/ndr_support.rb +21 -0
- data/lib/ndr_support/array.rb +52 -0
- data/lib/ndr_support/concerns/working_days.rb +94 -0
- data/lib/ndr_support/date_and_time_extensions.rb +103 -0
- data/lib/ndr_support/daterange.rb +196 -0
- data/lib/ndr_support/fixnum/calculations.rb +15 -0
- data/lib/ndr_support/fixnum/julian_date_conversions.rb +14 -0
- data/lib/ndr_support/hash.rb +52 -0
- data/lib/ndr_support/integer.rb +12 -0
- data/lib/ndr_support/nil.rb +38 -0
- data/lib/ndr_support/ourdate.rb +97 -0
- data/lib/ndr_support/ourtime.rb +51 -0
- data/lib/ndr_support/regexp_range.rb +65 -0
- data/lib/ndr_support/safe_file.rb +185 -0
- data/lib/ndr_support/safe_path.rb +268 -0
- data/lib/ndr_support/string/cleaning.rb +136 -0
- data/lib/ndr_support/string/conversions.rb +137 -0
- data/lib/ndr_support/tasks.rb +1 -0
- data/lib/ndr_support/time/conversions.rb +13 -0
- data/lib/ndr_support/utf8_encoding.rb +72 -0
- data/lib/ndr_support/utf8_encoding/control_characters.rb +53 -0
- data/lib/ndr_support/utf8_encoding/force_binary.rb +44 -0
- data/lib/ndr_support/utf8_encoding/object_support.rb +31 -0
- data/lib/ndr_support/version.rb +5 -0
- data/lib/ndr_support/yaml/serialization_migration.rb +65 -0
- data/lib/tasks/audit_code.rake +423 -0
- data/ndr_support.gemspec +39 -0
- data/test/array_test.rb +20 -0
- data/test/concerns/working_days_test.rb +122 -0
- data/test/daterange_test.rb +194 -0
- data/test/fixnum/calculations_test.rb +28 -0
- data/test/hash_test.rb +84 -0
- data/test/integer_test.rb +14 -0
- data/test/nil_test.rb +40 -0
- data/test/ourdate_test.rb +27 -0
- data/test/ourtime_test.rb +27 -0
- data/test/regexp_range_test.rb +135 -0
- data/test/resources/filesystem_paths.yml +37 -0
- data/test/safe_file_test.rb +597 -0
- data/test/safe_path_test.rb +168 -0
- data/test/string/cleaning_test.rb +176 -0
- data/test/string/conversions_test.rb +353 -0
- data/test/test_helper.rb +41 -0
- data/test/time/conversions_test.rb +15 -0
- data/test/utf8_encoding/control_characters_test.rb +84 -0
- data/test/utf8_encoding/force_binary_test.rb +64 -0
- data/test/utf8_encoding_test.rb +170 -0
- data/test/yaml/serialization_test.rb +145 -0
- metadata +295 -0
@@ -0,0 +1,136 @@
|
|
1
|
+
class String
|
2
|
+
INVALID_CONTROL_CHARS = /[\x00-\x08\x0b-\x0c\x0e-\x1f]/
|
3
|
+
ROMAN_ONE_TO_FIVE_MAPPING = { 'I' => '1', 'II' => '2', 'III' => '3', 'IIII' => '4', 'IV' => '4', 'V' => '5' }
|
4
|
+
|
5
|
+
# Used for comparing addresses
|
6
|
+
def squash
|
7
|
+
upcase.delete('^A-Z0-9')
|
8
|
+
end
|
9
|
+
|
10
|
+
# Show postcode in various formats.
|
11
|
+
# Parameter "option" can be :user, :compact, :db
|
12
|
+
def postcodeize(option = :user)
|
13
|
+
nspce = delete(' ').upcase
|
14
|
+
unless nspce.blank? || /([A-Z][0-9]|[A-Z][0-9][0-9]|[A-Z][0-9][A-Z]|[A-Z][A-Z][0-9]|[A-Z][A-Z][0-9][0-9]|[A-Z][A-Z][0-9][A-Z])[0-9][A-Z][A-Z]$/ =~ nspce
|
15
|
+
return self # Don't change old-style or malformed postcodes
|
16
|
+
end
|
17
|
+
case option
|
18
|
+
when :compact
|
19
|
+
nspce
|
20
|
+
when :db
|
21
|
+
case nspce.length
|
22
|
+
when 5 then nspce.insert(-4, ' ')
|
23
|
+
when 6 then nspce.insert(-4, ' ')
|
24
|
+
else nspce
|
25
|
+
end
|
26
|
+
else # anything else, including :user --> friendly format
|
27
|
+
nspce.length < 5 ? nspce : nspce.insert(-4, ' ')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def clean(what)
|
32
|
+
case what
|
33
|
+
when :nhsnumber
|
34
|
+
self.delete('^0-9')[0..9]
|
35
|
+
when :postcode, :get_postcode
|
36
|
+
self.postcodeize(:db)
|
37
|
+
when :lpi
|
38
|
+
self.upcase.delete('^0-9A-Z')
|
39
|
+
when :sex
|
40
|
+
# SECURE: BNS 2012-10-09: But may behave oddly for multi-line input
|
41
|
+
if self =~ /^M|1/i
|
42
|
+
'1'
|
43
|
+
elsif self =~ /^F|2/i
|
44
|
+
'2'
|
45
|
+
else
|
46
|
+
'0'
|
47
|
+
end
|
48
|
+
when :sex_c
|
49
|
+
if self =~ /^M|1/i
|
50
|
+
'M'
|
51
|
+
elsif self =~ /^F|2/i
|
52
|
+
'F'
|
53
|
+
else
|
54
|
+
''
|
55
|
+
end
|
56
|
+
when :name
|
57
|
+
substitutions = {
|
58
|
+
'.' => '',
|
59
|
+
/,|;/ => ' ',
|
60
|
+
/\s{2,}/ => ' ',
|
61
|
+
'`' => '\''
|
62
|
+
}
|
63
|
+
substitutions.inject(self.upcase) { |str, scheme| str.gsub(*scheme) }.strip
|
64
|
+
when :ethniccategory
|
65
|
+
replace_ethniccategory = {
|
66
|
+
'0' => '0',
|
67
|
+
'1' => 'M',
|
68
|
+
'2' => 'N',
|
69
|
+
'3' => 'H',
|
70
|
+
'4' => 'J',
|
71
|
+
'5' => 'K',
|
72
|
+
'6' => 'R',
|
73
|
+
'7' => '8',
|
74
|
+
'&' => 'X',
|
75
|
+
' ' => 'X',
|
76
|
+
'99' => 'X'
|
77
|
+
}
|
78
|
+
replace_ethniccategory[self] || self.upcase
|
79
|
+
when :code
|
80
|
+
self.split(/ |,|;/).map do |code|
|
81
|
+
code.blank? ? next : code.gsub('.', '')
|
82
|
+
end.compact.join(' ')
|
83
|
+
when :code_icd
|
84
|
+
# regexp = /[A-Z][0-9]{2}(\.(X|[0-9]{1,2})|[0-9]?)( *(D|A)( |,|;|$))/
|
85
|
+
codes = self.upcase.split(/ |,|;/).delete_if { |x| x.squash.blank? }
|
86
|
+
cleaned_codes = []
|
87
|
+
codes.each do |code|
|
88
|
+
if code == 'D' || code == 'A'
|
89
|
+
cleaned_codes[-1] += code
|
90
|
+
else
|
91
|
+
cleaned_codes << code
|
92
|
+
end
|
93
|
+
end
|
94
|
+
cleaned_codes.join(' ')
|
95
|
+
when :code_opcs
|
96
|
+
clean_code_opcs
|
97
|
+
when :hospitalnumber
|
98
|
+
self[-1..-1] =~ /\d/ ? self : self[0..-2]
|
99
|
+
when :xmlsafe, :make_xml_safe
|
100
|
+
self.strip_xml_unsafe_characters
|
101
|
+
when :roman5
|
102
|
+
# This deromanises roman numerals between 1 and 5
|
103
|
+
self.gsub(/[IV]+/i) { |match| ROMAN_ONE_TO_FIVE_MAPPING[match.upcase] }
|
104
|
+
when :tnmcategory
|
105
|
+
self.sub!(/\A[tnm]/i, '')
|
106
|
+
if self =~ /\Ax\z/i
|
107
|
+
self.upcase
|
108
|
+
else
|
109
|
+
self.downcase
|
110
|
+
end
|
111
|
+
else
|
112
|
+
self.gsub(' ?', ' ')
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def strip_xml_unsafe_characters
|
117
|
+
self.gsub(String::INVALID_CONTROL_CHARS, '')
|
118
|
+
end
|
119
|
+
|
120
|
+
def xml_unsafe?
|
121
|
+
self =~ String::INVALID_CONTROL_CHARS
|
122
|
+
end
|
123
|
+
|
124
|
+
private
|
125
|
+
|
126
|
+
def clean_code_opcs
|
127
|
+
split(/ |,|;/).map do |code|
|
128
|
+
db_code = code.squash
|
129
|
+
if 4 == db_code.length || db_code =~ /CZ00[12]/
|
130
|
+
db_code
|
131
|
+
else
|
132
|
+
next
|
133
|
+
end
|
134
|
+
end.compact.join(' ')
|
135
|
+
end
|
136
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'active_support/core_ext/string/conversions'
|
3
|
+
require 'ndr_support/daterange'
|
4
|
+
require 'ndr_support/ourdate'
|
5
|
+
require 'ndr_support/ourtime'
|
6
|
+
|
7
|
+
# Forward-port ParseDate to Ruby 1.9.x and beyond.
|
8
|
+
# We only use this in String#to_date, but keep the logic
|
9
|
+
# encapsulated for testing purposes - the behaviour of
|
10
|
+
# Date._parse has been known to change.
|
11
|
+
unless defined?(::ParseDate)
|
12
|
+
class ParseDate
|
13
|
+
def self.parsedate(str, comp = false)
|
14
|
+
Date._parse(str, comp).
|
15
|
+
values_at(:year, :mon, :mday, :hour, :min, :sec, :zone, :wday)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class String
|
21
|
+
SOUNDEX_CHARS = 'BPFVCSKGJQXZDTLMNR'
|
22
|
+
SOUNDEX_NUMS = '111122222222334556'
|
23
|
+
SOUNDEX_CHARS_EX = '^' + SOUNDEX_CHARS
|
24
|
+
SOUNDEX_CHARS_DEL = '^A-Z'
|
25
|
+
|
26
|
+
# desc: http://en.wikipedia.org/wiki/Soundex
|
27
|
+
def soundex(census = true)
|
28
|
+
str = upcase.delete(SOUNDEX_CHARS_DEL).squeeze
|
29
|
+
|
30
|
+
str[0..0] + str[1..-1].
|
31
|
+
delete(SOUNDEX_CHARS_EX).
|
32
|
+
tr(SOUNDEX_CHARS, SOUNDEX_NUMS)[0..(census ? 2 : -1)].
|
33
|
+
squeeze[0..(census ? 2 : -1)].
|
34
|
+
ljust(3, '0') rescue ''
|
35
|
+
end
|
36
|
+
|
37
|
+
def sounds_like(other)
|
38
|
+
soundex == other.soundex
|
39
|
+
end
|
40
|
+
|
41
|
+
def date1
|
42
|
+
Daterange.new(self).date1
|
43
|
+
end
|
44
|
+
|
45
|
+
def date2
|
46
|
+
Daterange.new(self).date2
|
47
|
+
end
|
48
|
+
|
49
|
+
def thedate
|
50
|
+
Ourdate.new(self).thedate
|
51
|
+
end
|
52
|
+
|
53
|
+
def thetime
|
54
|
+
Ourtime.new(self).thetime
|
55
|
+
end
|
56
|
+
|
57
|
+
# Convert "SMITH JD" into "Smith JD"
|
58
|
+
def surname_and_initials
|
59
|
+
a = split
|
60
|
+
initials = a.pop
|
61
|
+
a.collect(&:capitalize).join(' ') + ' ' + initials
|
62
|
+
end
|
63
|
+
|
64
|
+
# Like titleize but copes with Scottish and Irish names.
|
65
|
+
def surnameize
|
66
|
+
s = slice(0, 2).upcase
|
67
|
+
if s == 'MC' || s == "O'"
|
68
|
+
s.titleize + slice(2..-1).titleize
|
69
|
+
else
|
70
|
+
titleize
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Show NHS numbers with spaces
|
75
|
+
def nhs_numberize
|
76
|
+
return self unless length == 10
|
77
|
+
self[0..2] + ' ' + self[3..5] + ' ' + self[6..9]
|
78
|
+
end
|
79
|
+
|
80
|
+
# truncate a string, with a HTML … at the end
|
81
|
+
def truncate_hellip(n)
|
82
|
+
length > n ? slice(0, n - 1) + '…' : self
|
83
|
+
end
|
84
|
+
|
85
|
+
# Try to convert the string value into a date.
|
86
|
+
# If given a pattern, use it to parse date, otherwise use default setting to parse it
|
87
|
+
def to_date(pattern = nil)
|
88
|
+
return '' if empty? # TODO: check if this is used... :/
|
89
|
+
return nil if blank?
|
90
|
+
|
91
|
+
pattern = '%d%m%Y' if 'ddmmyyyy' == pattern
|
92
|
+
|
93
|
+
if pattern == 'yyyymmdd' || pattern == 'yyyymmdd_ons'
|
94
|
+
# Workaround for ONS dates (with missing day / month): revert to old
|
95
|
+
# parsing behaviour. (Instead, EDeathRecord should substitute a Daterange)
|
96
|
+
# TODO: Move all death parsing to format 'yyyymmdd_ons'
|
97
|
+
return nil if self =~ /\A([0-9]{4}00[0-9]{2}|[0-9]{6}00)\Z/
|
98
|
+
pattern = '%Y%m%d'
|
99
|
+
end
|
100
|
+
|
101
|
+
if self =~ /\A([0-9][0-9]?)[.]([0-9][0-9]?)[.]([0-9][0-9][0-9][0-9])\Z/ # dd.mm.yyyy
|
102
|
+
return date1 # Uses Daterange to consistently parse our displayed date format
|
103
|
+
end
|
104
|
+
|
105
|
+
if pattern.to_s.include?('%')
|
106
|
+
# Use Date.strptime if the pattern contains a percent sign
|
107
|
+
parsed_date = DateTime.strptime(self, pattern)
|
108
|
+
Ourdate.build_datetime(parsed_date.year, parsed_date.month, parsed_date.day)
|
109
|
+
else
|
110
|
+
# Use '.' rather than '/' as a separator for more consistent parsing:
|
111
|
+
year, month, day, *_ = ParseDate.parsedate(gsub('/', '.'))
|
112
|
+
|
113
|
+
if ['yyyy/dd/mm', 'mm/dd/yyyy'].include?(pattern)
|
114
|
+
month, day = day, month
|
115
|
+
elsif 8 == length && self !~ /\d{8}/
|
116
|
+
# dd/mm/yy, rather than yyyymmdd
|
117
|
+
year, day = day, year
|
118
|
+
year += 100 if year <= Ourdate.today.year % 100
|
119
|
+
year += 1900
|
120
|
+
elsif 9 == length
|
121
|
+
# dd/mmm/yy, rare case.
|
122
|
+
year += 100 if year <= Ourdate.today.year % 100
|
123
|
+
year += 1900
|
124
|
+
end
|
125
|
+
|
126
|
+
Ourdate.build_datetime(year, month, day)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# Try to convert the string value into boolean
|
131
|
+
def to_boolean
|
132
|
+
# SECURE: BNS 2012-10-09: But may behave oddly for multi-line input
|
133
|
+
return true if self == true || self =~ (/^(true|t|yes|y|1)$/i)
|
134
|
+
return false if self == false || self.nil? || self =~ (/^(false|f|no|n|0)$/i)
|
135
|
+
fail ArgumentError, "invalid value for Boolean: \"#{self}\""
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
load 'tasks/audit_code.rake'
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class Time
|
2
|
+
# Ruby 1.9 defines Time#to_time natively (as part of the
|
3
|
+
# stdlib Time, rather than core Time), but it returns
|
4
|
+
# the time in the local timezone. ActiveSupport contains
|
5
|
+
# the following definition, but it is only actually used
|
6
|
+
# by Ruby 1.8.7. We wish to continue with that behaviour,
|
7
|
+
# as local time zones have caused problems with our
|
8
|
+
# Time#to_s format (which either formats as '%d.%m.%Y %H:%M'
|
9
|
+
# or '%d.%m.%Y').
|
10
|
+
def to_time
|
11
|
+
self
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'ndr_support/utf8_encoding/control_characters'
|
2
|
+
require 'ndr_support/utf8_encoding/force_binary'
|
3
|
+
require 'ndr_support/utf8_encoding/object_support'
|
4
|
+
|
5
|
+
# Provides encoding support to be used for file / rawtext handling.
|
6
|
+
module UTF8Encoding
|
7
|
+
include ControlCharacters
|
8
|
+
include ForceBinary
|
9
|
+
include ObjectSupport
|
10
|
+
|
11
|
+
# Raised when we cannot ensure a string is valid UTF-8
|
12
|
+
class UTF8CoercionError < EncodingError; end
|
13
|
+
|
14
|
+
# Our known source encodings, in order of preference:
|
15
|
+
AUTO_ENCODINGS = %w( UTF-8 UTF-16 Windows-1252 )
|
16
|
+
# How should unmappable characters be escaped, when forcing encoding?
|
17
|
+
REPLACEMENT_SCHEME = lambda { |char| '0x' + char.ord.to_s(16).rjust(2, '0') }
|
18
|
+
|
19
|
+
# Returns a new string with valid UTF-8 encoding,
|
20
|
+
# or raises an exception if encoding fails.
|
21
|
+
def ensure_utf8(string, source_encoding = nil)
|
22
|
+
ensure_utf8!(string.dup, source_encoding)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Attempts to encode `string` to UTF-8, in place.
|
26
|
+
# Returns `string`, or raises an exception.
|
27
|
+
def ensure_utf8!(string, source_encoding = nil)
|
28
|
+
# A list of encodings we should try from:
|
29
|
+
candidates = source_encoding ? Array.wrap(source_encoding) : AUTO_ENCODINGS
|
30
|
+
|
31
|
+
# Attempt to coerce the string to UTF-8, from one of the source
|
32
|
+
# candidates (in order of preference):
|
33
|
+
apply_candidates!(string, candidates)
|
34
|
+
|
35
|
+
unless string.valid_encoding?
|
36
|
+
# None of our candidate source encodings worked, so fail:
|
37
|
+
fail(UTF8CoercionError, "Attempted to use: #{candidates}")
|
38
|
+
end
|
39
|
+
|
40
|
+
string
|
41
|
+
end
|
42
|
+
|
43
|
+
# Returns a UTF-8 version of `string`, escaping any unmappable characters.
|
44
|
+
def coerce_utf8(string, source_encoding = nil)
|
45
|
+
coerce_utf8!(string.dup, source_encoding)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Coerces `string` to UTF-8, in place, escaping any unmappable characters.
|
49
|
+
def coerce_utf8!(string, source_encoding = nil)
|
50
|
+
# Try normally first...
|
51
|
+
ensure_utf8!(string, source_encoding)
|
52
|
+
rescue UTF8CoercionError
|
53
|
+
# ...before going back-to-basics, and replacing things that don't map:
|
54
|
+
string.encode!('UTF-8', 'BINARY', :fallback => REPLACEMENT_SCHEME)
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def apply_candidates!(string, candidates)
|
60
|
+
candidates.detect do |encoding|
|
61
|
+
begin
|
62
|
+
# Attempt to encode as UTF-8 from source `encoding`:
|
63
|
+
string.encode!('UTF-8', encoding)
|
64
|
+
# If that worked, we're done; otherwise, move on.
|
65
|
+
string.valid_encoding?
|
66
|
+
rescue EncodingError
|
67
|
+
# If that failed really badly, move on:
|
68
|
+
false
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'ndr_support/utf8_encoding'
|
2
|
+
|
3
|
+
module UTF8Encoding
|
4
|
+
# Allows any supported object to have control characters
|
5
|
+
# escaped, using standard replacement scheme.
|
6
|
+
module ControlCharacters
|
7
|
+
# The range of characters we consider:
|
8
|
+
CONTROL_CHARACTERS = /[\x00-\x1f]|\x7f/
|
9
|
+
# Exceptions that are allowed:
|
10
|
+
ALLOWED_CONTROL_CHARACTERS = %W( \x09 \x0a \x0d )
|
11
|
+
|
12
|
+
# Recursively escape any control characters in `object`.
|
13
|
+
def escape_control_chars_in_object!(object)
|
14
|
+
case object
|
15
|
+
when String
|
16
|
+
escape_control_chars!(object)
|
17
|
+
when Hash
|
18
|
+
escape_control_chars_in_hash!(object)
|
19
|
+
when Array
|
20
|
+
escape_control_chars_in_array!(object)
|
21
|
+
else
|
22
|
+
object
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Returns a copy of `string`, with any control characters escaped.
|
27
|
+
def escape_control_chars(string)
|
28
|
+
escape_control_chars!(string.dup)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Escapes in-place any control characters in `string`, before returning it.
|
32
|
+
def escape_control_chars!(string)
|
33
|
+
string.gsub!(CONTROL_CHARACTERS) do |character|
|
34
|
+
if ALLOWED_CONTROL_CHARACTERS.include?(character)
|
35
|
+
character
|
36
|
+
else
|
37
|
+
UTF8Encoding::REPLACEMENT_SCHEME[character]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
string
|
41
|
+
end
|
42
|
+
|
43
|
+
# Escape control characters in values of the given `hash`.
|
44
|
+
def escape_control_chars_in_hash!(hash)
|
45
|
+
hash.each_value { |value| escape_control_chars_in_object!(value) }
|
46
|
+
end
|
47
|
+
|
48
|
+
# Escape control characters in elements of the given `array`.
|
49
|
+
def escape_control_chars_in_array!(array)
|
50
|
+
array.each { |element| escape_control_chars_in_object!(element) }
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'ndr_support/utf8_encoding'
|
2
|
+
|
3
|
+
module UTF8Encoding
|
4
|
+
# Allows any supported object to have any high-ascii string
|
5
|
+
# content to be force-encoded from UTF-8 to BINARY (/ASCII-8BIT).
|
6
|
+
# This ensures that any serialisation to YAML, using Psych,
|
7
|
+
# can be stored in other encodings. (Psych by default emits
|
8
|
+
# UTF-8 YAML, which might not survive being stored in a Windows-1252
|
9
|
+
# database, for example.)
|
10
|
+
module ForceBinary
|
11
|
+
# Recursively ensure the correct encoding is being used:
|
12
|
+
def binary_encode_any_high_ascii(object)
|
13
|
+
case object
|
14
|
+
when String
|
15
|
+
binary_encode_if_any_high_ascii(object)
|
16
|
+
when Hash
|
17
|
+
binary_encode_any_high_ascii_in_hash(object)
|
18
|
+
when Array
|
19
|
+
binary_encode_any_high_ascii_in_array(object)
|
20
|
+
else
|
21
|
+
object
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
# Returns a BINARY-encoded version of `string`, if is cannot be represented as 7bit ASCII.
|
28
|
+
def binary_encode_if_any_high_ascii(string)
|
29
|
+
string = ensure_utf8(string)
|
30
|
+
string.force_encoding('BINARY') if string.bytes.detect { |byte| byte > 127 }
|
31
|
+
string
|
32
|
+
end
|
33
|
+
|
34
|
+
# Ensures all values of the given `hash` are BINARY-encoded, if necessary.
|
35
|
+
def binary_encode_any_high_ascii_in_hash(hash)
|
36
|
+
Hash[hash.map { |key, value| [key, binary_encode_any_high_ascii(value)] }]
|
37
|
+
end
|
38
|
+
|
39
|
+
# Ensures all values of the given `array` are BINARY-encoded, if necessary.
|
40
|
+
def binary_encode_any_high_ascii_in_array(array)
|
41
|
+
array.map { |element| binary_encode_any_high_ascii(element) }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|