ndr_support 3.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +14 -0
- data/.rubocop.yml +27 -0
- data/.ruby-version +1 -0
- data/.travis.yml +22 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Guardfile +16 -0
- data/LICENSE.txt +21 -0
- data/README.md +91 -0
- data/Rakefile +12 -0
- data/code_safety.yml +258 -0
- data/gemfiles/Gemfile.rails32 +6 -0
- data/gemfiles/Gemfile.rails32.lock +108 -0
- data/gemfiles/Gemfile.rails41 +6 -0
- data/gemfiles/Gemfile.rails41.lock +111 -0
- data/gemfiles/Gemfile.rails42 +6 -0
- data/gemfiles/Gemfile.rails42.lock +111 -0
- data/lib/ndr_support.rb +21 -0
- data/lib/ndr_support/array.rb +52 -0
- data/lib/ndr_support/concerns/working_days.rb +94 -0
- data/lib/ndr_support/date_and_time_extensions.rb +103 -0
- data/lib/ndr_support/daterange.rb +196 -0
- data/lib/ndr_support/fixnum/calculations.rb +15 -0
- data/lib/ndr_support/fixnum/julian_date_conversions.rb +14 -0
- data/lib/ndr_support/hash.rb +52 -0
- data/lib/ndr_support/integer.rb +12 -0
- data/lib/ndr_support/nil.rb +38 -0
- data/lib/ndr_support/ourdate.rb +97 -0
- data/lib/ndr_support/ourtime.rb +51 -0
- data/lib/ndr_support/regexp_range.rb +65 -0
- data/lib/ndr_support/safe_file.rb +185 -0
- data/lib/ndr_support/safe_path.rb +268 -0
- data/lib/ndr_support/string/cleaning.rb +136 -0
- data/lib/ndr_support/string/conversions.rb +137 -0
- data/lib/ndr_support/tasks.rb +1 -0
- data/lib/ndr_support/time/conversions.rb +13 -0
- data/lib/ndr_support/utf8_encoding.rb +72 -0
- data/lib/ndr_support/utf8_encoding/control_characters.rb +53 -0
- data/lib/ndr_support/utf8_encoding/force_binary.rb +44 -0
- data/lib/ndr_support/utf8_encoding/object_support.rb +31 -0
- data/lib/ndr_support/version.rb +5 -0
- data/lib/ndr_support/yaml/serialization_migration.rb +65 -0
- data/lib/tasks/audit_code.rake +423 -0
- data/ndr_support.gemspec +39 -0
- data/test/array_test.rb +20 -0
- data/test/concerns/working_days_test.rb +122 -0
- data/test/daterange_test.rb +194 -0
- data/test/fixnum/calculations_test.rb +28 -0
- data/test/hash_test.rb +84 -0
- data/test/integer_test.rb +14 -0
- data/test/nil_test.rb +40 -0
- data/test/ourdate_test.rb +27 -0
- data/test/ourtime_test.rb +27 -0
- data/test/regexp_range_test.rb +135 -0
- data/test/resources/filesystem_paths.yml +37 -0
- data/test/safe_file_test.rb +597 -0
- data/test/safe_path_test.rb +168 -0
- data/test/string/cleaning_test.rb +176 -0
- data/test/string/conversions_test.rb +353 -0
- data/test/test_helper.rb +41 -0
- data/test/time/conversions_test.rb +15 -0
- data/test/utf8_encoding/control_characters_test.rb +84 -0
- data/test/utf8_encoding/force_binary_test.rb +64 -0
- data/test/utf8_encoding_test.rb +170 -0
- data/test/yaml/serialization_test.rb +145 -0
- metadata +295 -0
@@ -0,0 +1,136 @@
|
|
1
|
+
class String
|
2
|
+
INVALID_CONTROL_CHARS = /[\x00-\x08\x0b-\x0c\x0e-\x1f]/
|
3
|
+
ROMAN_ONE_TO_FIVE_MAPPING = { 'I' => '1', 'II' => '2', 'III' => '3', 'IIII' => '4', 'IV' => '4', 'V' => '5' }
|
4
|
+
|
5
|
+
# Used for comparing addresses
|
6
|
+
def squash
|
7
|
+
upcase.delete('^A-Z0-9')
|
8
|
+
end
|
9
|
+
|
10
|
+
# Show postcode in various formats.
|
11
|
+
# Parameter "option" can be :user, :compact, :db
|
12
|
+
def postcodeize(option = :user)
|
13
|
+
nspce = delete(' ').upcase
|
14
|
+
unless nspce.blank? || /([A-Z][0-9]|[A-Z][0-9][0-9]|[A-Z][0-9][A-Z]|[A-Z][A-Z][0-9]|[A-Z][A-Z][0-9][0-9]|[A-Z][A-Z][0-9][A-Z])[0-9][A-Z][A-Z]$/ =~ nspce
|
15
|
+
return self # Don't change old-style or malformed postcodes
|
16
|
+
end
|
17
|
+
case option
|
18
|
+
when :compact
|
19
|
+
nspce
|
20
|
+
when :db
|
21
|
+
case nspce.length
|
22
|
+
when 5 then nspce.insert(-4, ' ')
|
23
|
+
when 6 then nspce.insert(-4, ' ')
|
24
|
+
else nspce
|
25
|
+
end
|
26
|
+
else # anything else, including :user --> friendly format
|
27
|
+
nspce.length < 5 ? nspce : nspce.insert(-4, ' ')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def clean(what)
|
32
|
+
case what
|
33
|
+
when :nhsnumber
|
34
|
+
self.delete('^0-9')[0..9]
|
35
|
+
when :postcode, :get_postcode
|
36
|
+
self.postcodeize(:db)
|
37
|
+
when :lpi
|
38
|
+
self.upcase.delete('^0-9A-Z')
|
39
|
+
when :sex
|
40
|
+
# SECURE: BNS 2012-10-09: But may behave oddly for multi-line input
|
41
|
+
if self =~ /^M|1/i
|
42
|
+
'1'
|
43
|
+
elsif self =~ /^F|2/i
|
44
|
+
'2'
|
45
|
+
else
|
46
|
+
'0'
|
47
|
+
end
|
48
|
+
when :sex_c
|
49
|
+
if self =~ /^M|1/i
|
50
|
+
'M'
|
51
|
+
elsif self =~ /^F|2/i
|
52
|
+
'F'
|
53
|
+
else
|
54
|
+
''
|
55
|
+
end
|
56
|
+
when :name
|
57
|
+
substitutions = {
|
58
|
+
'.' => '',
|
59
|
+
/,|;/ => ' ',
|
60
|
+
/\s{2,}/ => ' ',
|
61
|
+
'`' => '\''
|
62
|
+
}
|
63
|
+
substitutions.inject(self.upcase) { |str, scheme| str.gsub(*scheme) }.strip
|
64
|
+
when :ethniccategory
|
65
|
+
replace_ethniccategory = {
|
66
|
+
'0' => '0',
|
67
|
+
'1' => 'M',
|
68
|
+
'2' => 'N',
|
69
|
+
'3' => 'H',
|
70
|
+
'4' => 'J',
|
71
|
+
'5' => 'K',
|
72
|
+
'6' => 'R',
|
73
|
+
'7' => '8',
|
74
|
+
'&' => 'X',
|
75
|
+
' ' => 'X',
|
76
|
+
'99' => 'X'
|
77
|
+
}
|
78
|
+
replace_ethniccategory[self] || self.upcase
|
79
|
+
when :code
|
80
|
+
self.split(/ |,|;/).map do |code|
|
81
|
+
code.blank? ? next : code.gsub('.', '')
|
82
|
+
end.compact.join(' ')
|
83
|
+
when :code_icd
|
84
|
+
# regexp = /[A-Z][0-9]{2}(\.(X|[0-9]{1,2})|[0-9]?)( *(D|A)( |,|;|$))/
|
85
|
+
codes = self.upcase.split(/ |,|;/).delete_if { |x| x.squash.blank? }
|
86
|
+
cleaned_codes = []
|
87
|
+
codes.each do |code|
|
88
|
+
if code == 'D' || code == 'A'
|
89
|
+
cleaned_codes[-1] += code
|
90
|
+
else
|
91
|
+
cleaned_codes << code
|
92
|
+
end
|
93
|
+
end
|
94
|
+
cleaned_codes.join(' ')
|
95
|
+
when :code_opcs
|
96
|
+
clean_code_opcs
|
97
|
+
when :hospitalnumber
|
98
|
+
self[-1..-1] =~ /\d/ ? self : self[0..-2]
|
99
|
+
when :xmlsafe, :make_xml_safe
|
100
|
+
self.strip_xml_unsafe_characters
|
101
|
+
when :roman5
|
102
|
+
# This deromanises roman numerals between 1 and 5
|
103
|
+
self.gsub(/[IV]+/i) { |match| ROMAN_ONE_TO_FIVE_MAPPING[match.upcase] }
|
104
|
+
when :tnmcategory
|
105
|
+
self.sub!(/\A[tnm]/i, '')
|
106
|
+
if self =~ /\Ax\z/i
|
107
|
+
self.upcase
|
108
|
+
else
|
109
|
+
self.downcase
|
110
|
+
end
|
111
|
+
else
|
112
|
+
self.gsub(' ?', ' ')
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def strip_xml_unsafe_characters
|
117
|
+
self.gsub(String::INVALID_CONTROL_CHARS, '')
|
118
|
+
end
|
119
|
+
|
120
|
+
def xml_unsafe?
|
121
|
+
self =~ String::INVALID_CONTROL_CHARS
|
122
|
+
end
|
123
|
+
|
124
|
+
private
|
125
|
+
|
126
|
+
def clean_code_opcs
|
127
|
+
split(/ |,|;/).map do |code|
|
128
|
+
db_code = code.squash
|
129
|
+
if 4 == db_code.length || db_code =~ /CZ00[12]/
|
130
|
+
db_code
|
131
|
+
else
|
132
|
+
next
|
133
|
+
end
|
134
|
+
end.compact.join(' ')
|
135
|
+
end
|
136
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'active_support/core_ext/string/conversions'
|
3
|
+
require 'ndr_support/daterange'
|
4
|
+
require 'ndr_support/ourdate'
|
5
|
+
require 'ndr_support/ourtime'
|
6
|
+
|
7
|
+
# Forward-port ParseDate to Ruby 1.9.x and beyond.
|
8
|
+
# We only use this in String#to_date, but keep the logic
|
9
|
+
# encapsulated for testing purposes - the behaviour of
|
10
|
+
# Date._parse has been known to change.
|
11
|
+
unless defined?(::ParseDate)
|
12
|
+
class ParseDate
|
13
|
+
def self.parsedate(str, comp = false)
|
14
|
+
Date._parse(str, comp).
|
15
|
+
values_at(:year, :mon, :mday, :hour, :min, :sec, :zone, :wday)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class String
|
21
|
+
SOUNDEX_CHARS = 'BPFVCSKGJQXZDTLMNR'
|
22
|
+
SOUNDEX_NUMS = '111122222222334556'
|
23
|
+
SOUNDEX_CHARS_EX = '^' + SOUNDEX_CHARS
|
24
|
+
SOUNDEX_CHARS_DEL = '^A-Z'
|
25
|
+
|
26
|
+
# desc: http://en.wikipedia.org/wiki/Soundex
|
27
|
+
def soundex(census = true)
|
28
|
+
str = upcase.delete(SOUNDEX_CHARS_DEL).squeeze
|
29
|
+
|
30
|
+
str[0..0] + str[1..-1].
|
31
|
+
delete(SOUNDEX_CHARS_EX).
|
32
|
+
tr(SOUNDEX_CHARS, SOUNDEX_NUMS)[0..(census ? 2 : -1)].
|
33
|
+
squeeze[0..(census ? 2 : -1)].
|
34
|
+
ljust(3, '0') rescue ''
|
35
|
+
end
|
36
|
+
|
37
|
+
def sounds_like(other)
|
38
|
+
soundex == other.soundex
|
39
|
+
end
|
40
|
+
|
41
|
+
def date1
|
42
|
+
Daterange.new(self).date1
|
43
|
+
end
|
44
|
+
|
45
|
+
def date2
|
46
|
+
Daterange.new(self).date2
|
47
|
+
end
|
48
|
+
|
49
|
+
def thedate
|
50
|
+
Ourdate.new(self).thedate
|
51
|
+
end
|
52
|
+
|
53
|
+
def thetime
|
54
|
+
Ourtime.new(self).thetime
|
55
|
+
end
|
56
|
+
|
57
|
+
# Convert "SMITH JD" into "Smith JD"
|
58
|
+
def surname_and_initials
|
59
|
+
a = split
|
60
|
+
initials = a.pop
|
61
|
+
a.collect(&:capitalize).join(' ') + ' ' + initials
|
62
|
+
end
|
63
|
+
|
64
|
+
# Like titleize but copes with Scottish and Irish names.
|
65
|
+
def surnameize
|
66
|
+
s = slice(0, 2).upcase
|
67
|
+
if s == 'MC' || s == "O'"
|
68
|
+
s.titleize + slice(2..-1).titleize
|
69
|
+
else
|
70
|
+
titleize
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Show NHS numbers with spaces
|
75
|
+
def nhs_numberize
|
76
|
+
return self unless length == 10
|
77
|
+
self[0..2] + ' ' + self[3..5] + ' ' + self[6..9]
|
78
|
+
end
|
79
|
+
|
80
|
+
# truncate a string, with a HTML … at the end
|
81
|
+
def truncate_hellip(n)
|
82
|
+
length > n ? slice(0, n - 1) + '…' : self
|
83
|
+
end
|
84
|
+
|
85
|
+
# Try to convert the string value into a date.
|
86
|
+
# If given a pattern, use it to parse date, otherwise use default setting to parse it
|
87
|
+
def to_date(pattern = nil)
|
88
|
+
return '' if empty? # TODO: check if this is used... :/
|
89
|
+
return nil if blank?
|
90
|
+
|
91
|
+
pattern = '%d%m%Y' if 'ddmmyyyy' == pattern
|
92
|
+
|
93
|
+
if pattern == 'yyyymmdd' || pattern == 'yyyymmdd_ons'
|
94
|
+
# Workaround for ONS dates (with missing day / month): revert to old
|
95
|
+
# parsing behaviour. (Instead, EDeathRecord should substitute a Daterange)
|
96
|
+
# TODO: Move all death parsing to format 'yyyymmdd_ons'
|
97
|
+
return nil if self =~ /\A([0-9]{4}00[0-9]{2}|[0-9]{6}00)\Z/
|
98
|
+
pattern = '%Y%m%d'
|
99
|
+
end
|
100
|
+
|
101
|
+
if self =~ /\A([0-9][0-9]?)[.]([0-9][0-9]?)[.]([0-9][0-9][0-9][0-9])\Z/ # dd.mm.yyyy
|
102
|
+
return date1 # Uses Daterange to consistently parse our displayed date format
|
103
|
+
end
|
104
|
+
|
105
|
+
if pattern.to_s.include?('%')
|
106
|
+
# Use Date.strptime if the pattern contains a percent sign
|
107
|
+
parsed_date = DateTime.strptime(self, pattern)
|
108
|
+
Ourdate.build_datetime(parsed_date.year, parsed_date.month, parsed_date.day)
|
109
|
+
else
|
110
|
+
# Use '.' rather than '/' as a separator for more consistent parsing:
|
111
|
+
year, month, day, *_ = ParseDate.parsedate(gsub('/', '.'))
|
112
|
+
|
113
|
+
if ['yyyy/dd/mm', 'mm/dd/yyyy'].include?(pattern)
|
114
|
+
month, day = day, month
|
115
|
+
elsif 8 == length && self !~ /\d{8}/
|
116
|
+
# dd/mm/yy, rather than yyyymmdd
|
117
|
+
year, day = day, year
|
118
|
+
year += 100 if year <= Ourdate.today.year % 100
|
119
|
+
year += 1900
|
120
|
+
elsif 9 == length
|
121
|
+
# dd/mmm/yy, rare case.
|
122
|
+
year += 100 if year <= Ourdate.today.year % 100
|
123
|
+
year += 1900
|
124
|
+
end
|
125
|
+
|
126
|
+
Ourdate.build_datetime(year, month, day)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# Try to convert the string value into boolean
|
131
|
+
def to_boolean
|
132
|
+
# SECURE: BNS 2012-10-09: But may behave oddly for multi-line input
|
133
|
+
return true if self == true || self =~ (/^(true|t|yes|y|1)$/i)
|
134
|
+
return false if self == false || self.nil? || self =~ (/^(false|f|no|n|0)$/i)
|
135
|
+
fail ArgumentError, "invalid value for Boolean: \"#{self}\""
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
load 'tasks/audit_code.rake'
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class Time
|
2
|
+
# Ruby 1.9 defines Time#to_time natively (as part of the
|
3
|
+
# stdlib Time, rather than core Time), but it returns
|
4
|
+
# the time in the local timezone. ActiveSupport contains
|
5
|
+
# the following definition, but it is only actually used
|
6
|
+
# by Ruby 1.8.7. We wish to continue with that behaviour,
|
7
|
+
# as local time zones have caused problems with our
|
8
|
+
# Time#to_s format (which either formats as '%d.%m.%Y %H:%M'
|
9
|
+
# or '%d.%m.%Y').
|
10
|
+
def to_time
|
11
|
+
self
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'ndr_support/utf8_encoding/control_characters'
|
2
|
+
require 'ndr_support/utf8_encoding/force_binary'
|
3
|
+
require 'ndr_support/utf8_encoding/object_support'
|
4
|
+
|
5
|
+
# Provides encoding support to be used for file / rawtext handling.
|
6
|
+
module UTF8Encoding
|
7
|
+
include ControlCharacters
|
8
|
+
include ForceBinary
|
9
|
+
include ObjectSupport
|
10
|
+
|
11
|
+
# Raised when we cannot ensure a string is valid UTF-8
|
12
|
+
class UTF8CoercionError < EncodingError; end
|
13
|
+
|
14
|
+
# Our known source encodings, in order of preference:
|
15
|
+
AUTO_ENCODINGS = %w( UTF-8 UTF-16 Windows-1252 )
|
16
|
+
# How should unmappable characters be escaped, when forcing encoding?
|
17
|
+
REPLACEMENT_SCHEME = lambda { |char| '0x' + char.ord.to_s(16).rjust(2, '0') }
|
18
|
+
|
19
|
+
# Returns a new string with valid UTF-8 encoding,
|
20
|
+
# or raises an exception if encoding fails.
|
21
|
+
def ensure_utf8(string, source_encoding = nil)
|
22
|
+
ensure_utf8!(string.dup, source_encoding)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Attempts to encode `string` to UTF-8, in place.
|
26
|
+
# Returns `string`, or raises an exception.
|
27
|
+
def ensure_utf8!(string, source_encoding = nil)
|
28
|
+
# A list of encodings we should try from:
|
29
|
+
candidates = source_encoding ? Array.wrap(source_encoding) : AUTO_ENCODINGS
|
30
|
+
|
31
|
+
# Attempt to coerce the string to UTF-8, from one of the source
|
32
|
+
# candidates (in order of preference):
|
33
|
+
apply_candidates!(string, candidates)
|
34
|
+
|
35
|
+
unless string.valid_encoding?
|
36
|
+
# None of our candidate source encodings worked, so fail:
|
37
|
+
fail(UTF8CoercionError, "Attempted to use: #{candidates}")
|
38
|
+
end
|
39
|
+
|
40
|
+
string
|
41
|
+
end
|
42
|
+
|
43
|
+
# Returns a UTF-8 version of `string`, escaping any unmappable characters.
|
44
|
+
def coerce_utf8(string, source_encoding = nil)
|
45
|
+
coerce_utf8!(string.dup, source_encoding)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Coerces `string` to UTF-8, in place, escaping any unmappable characters.
|
49
|
+
def coerce_utf8!(string, source_encoding = nil)
|
50
|
+
# Try normally first...
|
51
|
+
ensure_utf8!(string, source_encoding)
|
52
|
+
rescue UTF8CoercionError
|
53
|
+
# ...before going back-to-basics, and replacing things that don't map:
|
54
|
+
string.encode!('UTF-8', 'BINARY', :fallback => REPLACEMENT_SCHEME)
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def apply_candidates!(string, candidates)
|
60
|
+
candidates.detect do |encoding|
|
61
|
+
begin
|
62
|
+
# Attempt to encode as UTF-8 from source `encoding`:
|
63
|
+
string.encode!('UTF-8', encoding)
|
64
|
+
# If that worked, we're done; otherwise, move on.
|
65
|
+
string.valid_encoding?
|
66
|
+
rescue EncodingError
|
67
|
+
# If that failed really badly, move on:
|
68
|
+
false
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'ndr_support/utf8_encoding'
|
2
|
+
|
3
|
+
module UTF8Encoding
|
4
|
+
# Allows any supported object to have control characters
|
5
|
+
# escaped, using standard replacement scheme.
|
6
|
+
module ControlCharacters
|
7
|
+
# The range of characters we consider:
|
8
|
+
CONTROL_CHARACTERS = /[\x00-\x1f]|\x7f/
|
9
|
+
# Exceptions that are allowed:
|
10
|
+
ALLOWED_CONTROL_CHARACTERS = %W( \x09 \x0a \x0d )
|
11
|
+
|
12
|
+
# Recursively escape any control characters in `object`.
|
13
|
+
def escape_control_chars_in_object!(object)
|
14
|
+
case object
|
15
|
+
when String
|
16
|
+
escape_control_chars!(object)
|
17
|
+
when Hash
|
18
|
+
escape_control_chars_in_hash!(object)
|
19
|
+
when Array
|
20
|
+
escape_control_chars_in_array!(object)
|
21
|
+
else
|
22
|
+
object
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Returns a copy of `string`, with any control characters escaped.
|
27
|
+
def escape_control_chars(string)
|
28
|
+
escape_control_chars!(string.dup)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Escapes in-place any control characters in `string`, before returning it.
|
32
|
+
def escape_control_chars!(string)
|
33
|
+
string.gsub!(CONTROL_CHARACTERS) do |character|
|
34
|
+
if ALLOWED_CONTROL_CHARACTERS.include?(character)
|
35
|
+
character
|
36
|
+
else
|
37
|
+
UTF8Encoding::REPLACEMENT_SCHEME[character]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
string
|
41
|
+
end
|
42
|
+
|
43
|
+
# Escape control characters in values of the given `hash`.
|
44
|
+
def escape_control_chars_in_hash!(hash)
|
45
|
+
hash.each_value { |value| escape_control_chars_in_object!(value) }
|
46
|
+
end
|
47
|
+
|
48
|
+
# Escape control characters in elements of the given `array`.
|
49
|
+
def escape_control_chars_in_array!(array)
|
50
|
+
array.each { |element| escape_control_chars_in_object!(element) }
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'ndr_support/utf8_encoding'
|
2
|
+
|
3
|
+
module UTF8Encoding
|
4
|
+
# Allows any supported object to have any high-ascii string
|
5
|
+
# content to be force-encoded from UTF-8 to BINARY (/ASCII-8BIT).
|
6
|
+
# This ensures that any serialisation to YAML, using Psych,
|
7
|
+
# can be stored in other encodings. (Psych by default emits
|
8
|
+
# UTF-8 YAML, which might not survive being stored in a Windows-1252
|
9
|
+
# database, for example.)
|
10
|
+
module ForceBinary
|
11
|
+
# Recursively ensure the correct encoding is being used:
|
12
|
+
def binary_encode_any_high_ascii(object)
|
13
|
+
case object
|
14
|
+
when String
|
15
|
+
binary_encode_if_any_high_ascii(object)
|
16
|
+
when Hash
|
17
|
+
binary_encode_any_high_ascii_in_hash(object)
|
18
|
+
when Array
|
19
|
+
binary_encode_any_high_ascii_in_array(object)
|
20
|
+
else
|
21
|
+
object
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
# Returns a BINARY-encoded version of `string`, if is cannot be represented as 7bit ASCII.
|
28
|
+
def binary_encode_if_any_high_ascii(string)
|
29
|
+
string = ensure_utf8(string)
|
30
|
+
string.force_encoding('BINARY') if string.bytes.detect { |byte| byte > 127 }
|
31
|
+
string
|
32
|
+
end
|
33
|
+
|
34
|
+
# Ensures all values of the given `hash` are BINARY-encoded, if necessary.
|
35
|
+
def binary_encode_any_high_ascii_in_hash(hash)
|
36
|
+
Hash[hash.map { |key, value| [key, binary_encode_any_high_ascii(value)] }]
|
37
|
+
end
|
38
|
+
|
39
|
+
# Ensures all values of the given `array` are BINARY-encoded, if necessary.
|
40
|
+
def binary_encode_any_high_ascii_in_array(array)
|
41
|
+
array.map { |element| binary_encode_any_high_ascii(element) }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|