ndr_support 3.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +14 -0
  3. data/.rubocop.yml +27 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +22 -0
  6. data/CODE_OF_CONDUCT.md +13 -0
  7. data/Gemfile +4 -0
  8. data/Guardfile +16 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +91 -0
  11. data/Rakefile +12 -0
  12. data/code_safety.yml +258 -0
  13. data/gemfiles/Gemfile.rails32 +6 -0
  14. data/gemfiles/Gemfile.rails32.lock +108 -0
  15. data/gemfiles/Gemfile.rails41 +6 -0
  16. data/gemfiles/Gemfile.rails41.lock +111 -0
  17. data/gemfiles/Gemfile.rails42 +6 -0
  18. data/gemfiles/Gemfile.rails42.lock +111 -0
  19. data/lib/ndr_support.rb +21 -0
  20. data/lib/ndr_support/array.rb +52 -0
  21. data/lib/ndr_support/concerns/working_days.rb +94 -0
  22. data/lib/ndr_support/date_and_time_extensions.rb +103 -0
  23. data/lib/ndr_support/daterange.rb +196 -0
  24. data/lib/ndr_support/fixnum/calculations.rb +15 -0
  25. data/lib/ndr_support/fixnum/julian_date_conversions.rb +14 -0
  26. data/lib/ndr_support/hash.rb +52 -0
  27. data/lib/ndr_support/integer.rb +12 -0
  28. data/lib/ndr_support/nil.rb +38 -0
  29. data/lib/ndr_support/ourdate.rb +97 -0
  30. data/lib/ndr_support/ourtime.rb +51 -0
  31. data/lib/ndr_support/regexp_range.rb +65 -0
  32. data/lib/ndr_support/safe_file.rb +185 -0
  33. data/lib/ndr_support/safe_path.rb +268 -0
  34. data/lib/ndr_support/string/cleaning.rb +136 -0
  35. data/lib/ndr_support/string/conversions.rb +137 -0
  36. data/lib/ndr_support/tasks.rb +1 -0
  37. data/lib/ndr_support/time/conversions.rb +13 -0
  38. data/lib/ndr_support/utf8_encoding.rb +72 -0
  39. data/lib/ndr_support/utf8_encoding/control_characters.rb +53 -0
  40. data/lib/ndr_support/utf8_encoding/force_binary.rb +44 -0
  41. data/lib/ndr_support/utf8_encoding/object_support.rb +31 -0
  42. data/lib/ndr_support/version.rb +5 -0
  43. data/lib/ndr_support/yaml/serialization_migration.rb +65 -0
  44. data/lib/tasks/audit_code.rake +423 -0
  45. data/ndr_support.gemspec +39 -0
  46. data/test/array_test.rb +20 -0
  47. data/test/concerns/working_days_test.rb +122 -0
  48. data/test/daterange_test.rb +194 -0
  49. data/test/fixnum/calculations_test.rb +28 -0
  50. data/test/hash_test.rb +84 -0
  51. data/test/integer_test.rb +14 -0
  52. data/test/nil_test.rb +40 -0
  53. data/test/ourdate_test.rb +27 -0
  54. data/test/ourtime_test.rb +27 -0
  55. data/test/regexp_range_test.rb +135 -0
  56. data/test/resources/filesystem_paths.yml +37 -0
  57. data/test/safe_file_test.rb +597 -0
  58. data/test/safe_path_test.rb +168 -0
  59. data/test/string/cleaning_test.rb +176 -0
  60. data/test/string/conversions_test.rb +353 -0
  61. data/test/test_helper.rb +41 -0
  62. data/test/time/conversions_test.rb +15 -0
  63. data/test/utf8_encoding/control_characters_test.rb +84 -0
  64. data/test/utf8_encoding/force_binary_test.rb +64 -0
  65. data/test/utf8_encoding_test.rb +170 -0
  66. data/test/yaml/serialization_test.rb +145 -0
  67. metadata +295 -0
@@ -0,0 +1,41 @@
1
+ require 'simplecov'
2
+ SimpleCov.start
3
+
4
+ require 'minitest/autorun'
5
+ require 'minitest/unit'
6
+ require 'mocha/mini_test'
7
+
8
+ require 'active_record'
9
+ require 'active_support/time'
10
+ require 'ndr_support'
11
+ require 'tmpdir'
12
+
13
+ NdrSupport.apply_era_date_formats!
14
+
15
+ # We do not use Rails' preferred time zone support, as this would
16
+ # require all dates to be stored in UTC in the database.
17
+ # Thus a birth date of 1975-06-01 would be stored as 1975-05-31 23.00.00.
18
+ # Instead, we want to store all times in local time.
19
+ ActiveRecord::Base.default_timezone = :local
20
+ ActiveRecord::Base.time_zone_aware_attributes = false
21
+
22
+ SafePath.configure! File.dirname(__FILE__) + '/resources/filesystem_paths.yml'
23
+
24
+ # Borrowed from ActiveSupport::TestCase
25
+ module Minitest
26
+ class Test
27
+ # Allow declarive test syntax:
28
+ def self.test(name, &block)
29
+ test_name = "test_#{name.gsub(/\s+/, '_')}".to_sym
30
+ defined = method_defined? test_name
31
+ fail "#{test_name} is already defined in #{self}" if defined
32
+ if block_given?
33
+ define_method(test_name, &block)
34
+ else
35
+ define_method(test_name) do
36
+ flunk "No implementation provided for #{name}"
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,15 @@
1
+ require 'test_helper'
2
+
3
+ class Time::ConversionsTest < Minitest::Test
4
+ test 'to_time should return same object' do
5
+ yaml = '2015-08-06 00:00:00 Z'
6
+
7
+ time = yaml.to_time
8
+ time2 = time.to_time
9
+
10
+ assert Time === time, 'time was not a Time'
11
+ assert Time === time2, 'time2 was not a Time'
12
+
13
+ assert_equal time.object_id, time2.object_id
14
+ end
15
+ end
@@ -0,0 +1,84 @@
1
+ require 'test_helper'
2
+
3
+ # Tests Utf8Encoding::ControlCharacters module.
4
+ class ControlCharactersTest < Minitest::Test
5
+ include UTF8Encoding
6
+
7
+ test 'control char identification' do
8
+ (0..255).each do |code|
9
+ expected = code == 127 || (code < 32 && [9, 10, 13].exclude?(code)) ? 4 : 1
10
+ actual = escape_control_chars(code.chr).length
11
+
12
+ assert_equal expected, actual, "unexpected escaping for value: #{code} (#{code.chr})"
13
+ end
14
+ end
15
+
16
+ test 'escape_control_chars with harmless string' do
17
+ string = 'null \x00 characters suck'
18
+ expected = 'null \x00 characters suck'
19
+ actual = escape_control_chars(string)
20
+
21
+ assert_equal expected, actual
22
+ refute actual.object_id == string.object_id, 'should not have modified in place'
23
+ end
24
+
25
+ test 'escape_control_chars! with harmless string' do
26
+ string = 'null \x00 characters suck'
27
+ expected = 'null \x00 characters suck'
28
+ actual = escape_control_chars!(string)
29
+
30
+ assert_equal expected, actual
31
+ assert_equal actual.object_id, string.object_id
32
+ end
33
+
34
+ test 'escape_control_chars with unprintable control characters' do
35
+ string = "null \x00 \x7F characters suck"
36
+ expected = 'null 0x00 0x7f characters suck'
37
+ actual = escape_control_chars(string)
38
+
39
+ assert_equal expected, actual
40
+ refute actual.object_id == string.object_id, 'should not have modified in place'
41
+ end
42
+
43
+ test 'escape_control_chars! with unprintable control characters' do
44
+ string = "null \x00 characters suck"
45
+ expected = 'null 0x00 characters suck'
46
+ actual = escape_control_chars!(string)
47
+
48
+ assert_equal expected, actual
49
+ assert_equal string.object_id, actual.object_id
50
+ end
51
+
52
+ test 'escape_control_chars! with printable control characters' do
53
+ string = "null \x00 characters \r\n really \t suck \x07\x07\x07"
54
+ expected = "null 0x00 characters \r\n really \t suck 0x070x070x07" # ring ring ring
55
+
56
+ assert_equal expected, escape_control_chars!(string)
57
+ end
58
+
59
+ test 'escape_control_chars_in_object! with array' do
60
+ array = %W( hello\tcruel \x00 world!\n \x07 )
61
+ expected = %W( hello\tcruel 0x00 world!\n 0x07 )
62
+ actual = escape_control_chars_in_object!(array)
63
+
64
+ assert_equal expected, actual
65
+ assert_equal array.object_id, actual.object_id
66
+ end
67
+
68
+ test 'escape_control_chars_in_object! with hash' do
69
+ hash = { :a => "hello\tcruel", :b => "\x00", :c => "world!\n", :d => "\x07" }
70
+ expected = { :a => "hello\tcruel", :b => '0x00', :c => "world!\n", :d => '0x07' }
71
+ actual = escape_control_chars_in_object!(hash)
72
+
73
+ assert_equal expected, actual
74
+ assert_equal hash.object_id, actual.object_id
75
+ end
76
+
77
+ test 'escape_control_chars_in_object! with PORO' do
78
+ object = Object.new
79
+ escaped = escape_control_chars_in_object!(object)
80
+
81
+ assert_equal object, escaped
82
+ assert_equal object.object_id, escaped.object_id
83
+ end
84
+ end
@@ -0,0 +1,64 @@
1
+ # encoding: UTF-8
2
+ require 'test_helper'
3
+
4
+ # Tests Utf8Encoding::ForceBinary module.
5
+ class ForceBinaryTest < Minitest::Test
6
+ include UTF8Encoding
7
+
8
+ test 'binary_encode_any_high_ascii with low-ascii string' do
9
+ input = 'manana manana'
10
+
11
+ assert_equal 'UTF-8', input.encoding.name
12
+ assert input.valid_encoding?
13
+
14
+ output = binary_encode_any_high_ascii(input)
15
+
16
+ refute_equal input.object_id, output.object_id
17
+
18
+ assert_equal input.bytes.to_a, output.bytes.to_a
19
+ assert_equal 'UTF-8', output.encoding.name
20
+ assert output.valid_encoding?
21
+ end
22
+
23
+ test 'binary_encode_any_high_ascii with high-ascii string' do
24
+ input = 'mañana mañana'
25
+
26
+ assert_equal 'UTF-8', input.encoding.name
27
+ assert input.valid_encoding?
28
+
29
+ output = binary_encode_any_high_ascii(input)
30
+
31
+ refute_equal input.object_id, output.object_id
32
+
33
+ assert_equal input.bytes.to_a, output.bytes.to_a
34
+ assert_equal 'ASCII-8BIT', output.encoding.name
35
+ assert output.valid_encoding?
36
+ end
37
+
38
+ test 'binary_encode_any_high_ascii with array' do
39
+ input = %w(mañana manana)
40
+ output = binary_encode_any_high_ascii(input)
41
+
42
+ refute_equal input.object_id, output.object_id
43
+
44
+ assert_equal %w(UTF-8 UTF-8), input.map { |str| str.encoding.name }
45
+ assert_equal %w(ASCII-8BIT UTF-8), output.map { |str| str.encoding.name }
46
+ end
47
+
48
+ test 'binary_encode_any_high_ascii with hash' do
49
+ input = { :with => 'mañana', :without => 'manana' }
50
+ output = binary_encode_any_high_ascii(input)
51
+
52
+ refute_equal input.object_id, output.object_id
53
+
54
+ assert_equal 'ASCII-8BIT', output[:with].encoding.name
55
+ assert_equal 'UTF-8', output[:without].encoding.name
56
+ end
57
+
58
+ test 'binary_encode_any_high_ascii with other object' do
59
+ input = /mañana mañana/
60
+ output = binary_encode_any_high_ascii(input)
61
+
62
+ assert_equal input.object_id, output.object_id, 'should have returned same object'
63
+ end
64
+ end
@@ -0,0 +1,170 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'test_helper'
4
+
5
+ class Utf8EncodingTest < Minitest::Test
6
+ extend UTF8Encoding
7
+ include UTF8Encoding
8
+
9
+ test 'ensure_utf8 should return a new string' do
10
+ string1 = 'hello'
11
+ string2 = ensure_utf8(string1)
12
+
13
+ refute string1.object_id == string2.object_id
14
+ end
15
+
16
+ test 'ensure_utf8! should return the same string' do
17
+ string1 = 'hello'
18
+ string2 = ensure_utf8!(string1)
19
+
20
+ assert string1.object_id == string2.object_id
21
+ end
22
+
23
+ test 'ensure_utf8_object! should work with arrays' do
24
+ array = []
25
+ expects(:ensure_utf8_array!).with(array).returns(array)
26
+ assert_equal array, ensure_utf8_object!(array)
27
+ end
28
+
29
+ test 'ensure_utf8_array! should work on elements' do
30
+ element1 = 'hello'
31
+ element2 = :world
32
+ array = [element1, element2]
33
+
34
+ expects(:ensure_utf8_object!).with(element1)
35
+ expects(:ensure_utf8_object!).with(element2)
36
+
37
+ assert_equal array, ensure_utf8_array!(array)
38
+ end
39
+
40
+ test 'ensure_utf8_object! should work with hashes' do
41
+ hash = {}
42
+ expects(:ensure_utf8_hash!).with(hash).returns(hash)
43
+ assert_equal hash, ensure_utf8_object!(hash)
44
+ end
45
+
46
+ test 'ensure_utf8_hash! should work on values' do
47
+ element1 = 'hello'
48
+ element2 = :world
49
+ hash = { element1 => element2 }
50
+
51
+ expects(:ensure_utf8_object!).with(element1).never
52
+ expects(:ensure_utf8_object!).with(element2)
53
+
54
+ assert_equal hash, ensure_utf8_hash!(hash)
55
+ end
56
+
57
+ test 'ensure_utf8_object! should work with strings' do
58
+ string = ''
59
+ expects(:ensure_utf8!).with(string).returns(string)
60
+ assert_equal string, ensure_utf8_object!(string)
61
+ end
62
+
63
+ test 'coerce_utf8 should return a new string' do
64
+ string1 = 'hello'
65
+ string2 = coerce_utf8(string1)
66
+
67
+ refute string1.object_id == string2.object_id
68
+ end
69
+
70
+ test 'coerce_utf8! should return the same string' do
71
+ string1 = 'hello'
72
+ string2 = coerce_utf8!(string1)
73
+
74
+ assert string1.object_id == string2.object_id
75
+ end
76
+
77
+ test 'ensure_utf8 should convert low bytes to UTF-8 if possible' do
78
+ string1 = 'hello'.force_encoding('Windows-1252')
79
+ string2 = ensure_utf8!(string1)
80
+
81
+ assert_equal string1, string2
82
+ assert_equal 'UTF-8', string2.encoding.name
83
+ end
84
+
85
+ test 'ensure_utf8 should convert high bytes to UTF-8 if possible' do
86
+ string1 = "dash \x96 dash".force_encoding('Windows-1252')
87
+ assert_equal 11, string1.bytes.to_a.length
88
+ assert_equal 11, string1.chars.to_a.length
89
+
90
+ assert string1.valid_encoding?
91
+
92
+ string2 = ensure_utf8(string1)
93
+ assert_equal 13, string2.bytes.to_a.length
94
+ assert_equal 11, string2.chars.to_a.length
95
+
96
+ assert_equal 'UTF-8', string2.encoding.name
97
+ assert string2.valid_encoding?
98
+ end
99
+
100
+ test 'ensure_utf8 should prefer a given encoding' do
101
+ string1 = "japan \x8e\xa6 ese"
102
+ assert_equal 12, string1.bytes.to_a.length
103
+ assert_equal 12, string1.chars.to_a.length
104
+
105
+ string2 = ensure_utf8(string1, 'EUC-JP')
106
+ assert_equal 13, string2.bytes.to_a.length
107
+ assert_equal 11, string2.chars.to_a.length
108
+
109
+ # "halfwidth katakana letter wo":
110
+ assert_equal [239, 189, 166], string2.bytes.to_a[6...9]
111
+
112
+ assert_equal 'UTF-8', string2.encoding.name
113
+ assert string2.valid_encoding?
114
+ end
115
+
116
+ test 'ensure_utf8 should handle UTF-16 strings (using bom)' do
117
+ alpha_beta_le = ensure_utf8("\xff\xfe\xb1\x03\xb2\x03") # Little endian
118
+ alpha_beta_be = ensure_utf8("\xfe\xff\x03\xb1\x03\xb2") # Big endian
119
+
120
+ assert_equal Encoding.find('UTF-8'), alpha_beta_le.encoding
121
+ assert_equal Encoding.find('UTF-8'), alpha_beta_be.encoding
122
+
123
+ assert_equal 2, alpha_beta_le.chars.to_a.length
124
+ assert_equal 2, alpha_beta_be.chars.to_a.length
125
+
126
+ assert_equal alpha_beta_be, alpha_beta_le
127
+ end
128
+
129
+ test 'EncodingError defined' do
130
+ assert defined?(EncodingError)
131
+ end
132
+
133
+ test 'UTF8CoercionError defined' do
134
+ assert defined?(UTF8Encoding::UTF8CoercionError)
135
+ end
136
+
137
+ test 'ensure_utf8 should fail if unable to derive encoding' do
138
+ assert_raises(UTF8Encoding::UTF8CoercionError) do
139
+ # Not going to work with UTF-8 or Windows-1252:
140
+ ensure_utf8("rubbish \x90 rubbish")
141
+ end
142
+ end
143
+
144
+ test 'coerce_utf8 should escape unmappable values' do
145
+ expected = 'rubbish 0x90 rubbish'
146
+ actual = coerce_utf8("rubbish \x90 rubbish")
147
+
148
+ assert_equal expected, actual
149
+ assert actual.valid_encoding?
150
+ assert_equal Encoding.find('UTF-8'), actual.encoding
151
+ end
152
+
153
+ test 'coerce_utf8 should use given source encoding' do
154
+ input = "maybe \xc0 rubbish"
155
+ win_expected = 'maybe À rubbish'
156
+ utf_expected = 'maybe 0xc0 rubbish'
157
+
158
+ win_actual = coerce_utf8(input, 'Windows-1252')
159
+
160
+ assert_equal win_expected, win_actual
161
+ assert win_actual.valid_encoding?
162
+ assert_equal Encoding.find('UTF-8'), win_actual.encoding
163
+
164
+ utf_actual = coerce_utf8(input, 'UTF-8')
165
+
166
+ assert_equal utf_expected, utf_actual
167
+ assert utf_actual.valid_encoding?
168
+ assert_equal Encoding.find('UTF-8'), utf_actual.encoding
169
+ end
170
+ end
@@ -0,0 +1,145 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'test_helper'
4
+
5
+ class SerializationTest < Minitest::Test
6
+ include NdrSupport::YAML::SerializationMigration
7
+
8
+ test 'should serialize then deserialize an object correctly' do
9
+ hash = { :a => 1 }
10
+ assert_equal hash, load_yaml(dump_yaml(hash))
11
+ end
12
+
13
+ test 'should handle syck-encoded characters' do
14
+ assert_syck_1_8_yaml_loads_correctly
15
+ end
16
+
17
+ test 'should handle binary yaml with control chars' do
18
+ # irb> "\xC2\xA1null \x00 characters \r\n suck!".to_yaml
19
+ yaml = "--- !binary |-\n wqFudWxsIAAgY2hhcmFjdGVycyANCiBzdWNrIQ==\n"
20
+ assert_equal "¡null 0x00 characters \r\n suck!", load_yaml(yaml)
21
+
22
+ # irb> {fulltext: "\xC2\xA1null \x00 characters \r\n suck!"}.to_yaml
23
+ yamled_hash = "---\n:fulltext: !binary |-\n wqFudWxsIAAgY2hhcmFjdGVycyANCiBzdWNrIQ==\n"
24
+ assert_equal({ :fulltext => "¡null 0x00 characters \r\n suck!" }, load_yaml(yamled_hash))
25
+ end
26
+
27
+ # Psych doesn't always base64-encode control characters:
28
+ test 'should handle non-binary yaml with control chars' do
29
+ #irb> Psych.dump("control \x01 char \n whoops!")
30
+ chr_1_yaml = "--- ! \"control \\x01 char \\n whoops!\"\n"
31
+ assert_equal "control 0x01 char \n whoops!", load_yaml(chr_1_yaml)
32
+ end
33
+
34
+ test 'load_yaml should not coerce to UTF-8 by default' do
35
+ assert_yaml_coercion_behaviour
36
+ end
37
+
38
+ test 'dump_yaml should produce encoding-portable YAML' do
39
+ original_object = { :basic => 'manana', :complex => 'mañana' }
40
+ yaml_produced = dump_yaml(original_object)
41
+ reloaded_object = load_yaml(yaml_produced)
42
+
43
+ assert yaml_produced =~ /basic: manana/, 'binary-encoded more than was necessary'
44
+
45
+ refute yaml_produced.bytes.detect { |byte| byte > 127 }, 'yaml has high-ascii'
46
+ assert reloaded_object.inspect.bytes.detect { |byte| byte > 127 }
47
+ assert_equal original_object, reloaded_object
48
+ end
49
+
50
+ test 'encoding-portable YAML should be loadable' do
51
+ original_object = { :basic => 'manana', :complex => 'mañana' }
52
+ yaml_produced = dump_yaml(original_object)
53
+
54
+ reloaded_object = load_yaml(yaml_produced)
55
+ assert_equal original_object, reloaded_object
56
+ end
57
+
58
+ test 'time-like objects should serialise correctly with psych' do
59
+ assert_timey_wimey_stuff
60
+ end
61
+
62
+ private
63
+
64
+ def assert_timey_wimey_stuff
65
+ assert_times
66
+ assert_dates
67
+ assert_datetimes
68
+ assert_datetimes_with_zones
69
+ end
70
+
71
+ def assert_times
72
+ # Dumped by 1.9.3 syck, within era.
73
+ loaded = YAML.load("--- !timestamp 2014-03-01\n")
74
+ assert [Date, Time].include?(loaded.class), '1.9.3 era timestamp class'
75
+ assert_equal 2014, loaded.year, '1.9.3 era timestamp year'
76
+ assert_equal 3, loaded.month, '1.9.3 era timestamp month'
77
+ assert_equal 1, loaded.day, '1.9.3 era timestamp day'
78
+ end
79
+
80
+ def assert_dates
81
+ date = Date.new(2014, 3, 1)
82
+
83
+ # Dumped by 1.8.7 syck, within era.
84
+ loaded = YAML.load("--- 2014-03-01\n")
85
+ assert_equal date, loaded, '1.8.7 era date'
86
+ end
87
+
88
+ def assert_datetimes
89
+ datetime = DateTime.new(2014, 3, 1, 12, 45, 15)
90
+ loaded = YAML.load(datetime.to_yaml)
91
+
92
+ assert [DateTime, Time].include?(loaded.class), 'datetime class'
93
+ assert_equal datetime, loaded.to_datetime
94
+ assert_equal datetime.to_time, loaded.to_time
95
+ end
96
+
97
+ def assert_datetimes_with_zones
98
+ bst_datetime = DateTime.new(2014, 4, 1, 0, 0, 0, '+1')
99
+ bst_loaded = load_yaml(bst_datetime.to_yaml)
100
+
101
+ assert [DateTime, Time].include?(bst_loaded.class), 'bst datetime class'
102
+ assert_equal bst_datetime, bst_loaded.to_datetime
103
+ assert_equal bst_datetime.to_time, bst_loaded.to_time
104
+
105
+ gmt_datetime = DateTime.new(2014, 3, 1, 0, 0, 0, '+0')
106
+ gmt_loaded = load_yaml(gmt_datetime.to_yaml)
107
+
108
+ assert [DateTime, Time].include?(gmt_loaded.class), 'gmt datetime class'
109
+ assert_equal gmt_datetime, gmt_loaded.to_datetime
110
+ assert_equal gmt_datetime.to_time, gmt_loaded.to_time
111
+ end
112
+
113
+ def assert_syck_1_8_yaml_loads_correctly
114
+ yaml = "--- \nname: Dr. Doctor\000\000\000 \ndiagnosis: \"CIN 1 \\xE2\\x80\\x93 CIN 2\"\n"
115
+ hash = load_yaml(yaml)
116
+
117
+ # The null chars should be escaped:
118
+ assert_equal 'Dr. Doctor0x000x000x00', hash['name']
119
+
120
+ # The dash should be 3 bytes, but recognised as one char:
121
+ assert_equal 15, hash['diagnosis'].bytes.to_a.length
122
+
123
+ assert_syck_1_8_handles_encoding(hash)
124
+ end
125
+
126
+ def assert_syck_1_8_handles_encoding(hash)
127
+ assert_equal 13, hash['diagnosis'].chars.to_a.length
128
+
129
+ assert_equal 'UTF-8', hash['diagnosis'].encoding.name
130
+ assert hash['diagnosis'].valid_encoding?
131
+ end
132
+
133
+ def assert_yaml_coercion_behaviour
134
+ # UTF-8, with an unmappable byte too:
135
+ yaml = "---\nfulltextreport: \"Here is \\xE2\\x80\\x93 a weird \\x9D char\"\n"
136
+
137
+ # By default, we'd expect the (serialised) \x9D
138
+ assert_raises(UTF8Encoding::UTF8CoercionError) { load_yaml(yaml) }
139
+
140
+ # With the optional second argument, we can force an escape:
141
+ hash = load_yaml(yaml, true)
142
+ assert_equal 'Here is – a weird 0x9d char', hash['fulltextreport']
143
+ assert_equal 'UTF-8', hash['fulltextreport'].encoding.name
144
+ end
145
+ end