ndr_support 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +14 -0
  3. data/.rubocop.yml +27 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +22 -0
  6. data/CODE_OF_CONDUCT.md +13 -0
  7. data/Gemfile +4 -0
  8. data/Guardfile +16 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +91 -0
  11. data/Rakefile +12 -0
  12. data/code_safety.yml +258 -0
  13. data/gemfiles/Gemfile.rails32 +6 -0
  14. data/gemfiles/Gemfile.rails32.lock +108 -0
  15. data/gemfiles/Gemfile.rails41 +6 -0
  16. data/gemfiles/Gemfile.rails41.lock +111 -0
  17. data/gemfiles/Gemfile.rails42 +6 -0
  18. data/gemfiles/Gemfile.rails42.lock +111 -0
  19. data/lib/ndr_support.rb +21 -0
  20. data/lib/ndr_support/array.rb +52 -0
  21. data/lib/ndr_support/concerns/working_days.rb +94 -0
  22. data/lib/ndr_support/date_and_time_extensions.rb +103 -0
  23. data/lib/ndr_support/daterange.rb +196 -0
  24. data/lib/ndr_support/fixnum/calculations.rb +15 -0
  25. data/lib/ndr_support/fixnum/julian_date_conversions.rb +14 -0
  26. data/lib/ndr_support/hash.rb +52 -0
  27. data/lib/ndr_support/integer.rb +12 -0
  28. data/lib/ndr_support/nil.rb +38 -0
  29. data/lib/ndr_support/ourdate.rb +97 -0
  30. data/lib/ndr_support/ourtime.rb +51 -0
  31. data/lib/ndr_support/regexp_range.rb +65 -0
  32. data/lib/ndr_support/safe_file.rb +185 -0
  33. data/lib/ndr_support/safe_path.rb +268 -0
  34. data/lib/ndr_support/string/cleaning.rb +136 -0
  35. data/lib/ndr_support/string/conversions.rb +137 -0
  36. data/lib/ndr_support/tasks.rb +1 -0
  37. data/lib/ndr_support/time/conversions.rb +13 -0
  38. data/lib/ndr_support/utf8_encoding.rb +72 -0
  39. data/lib/ndr_support/utf8_encoding/control_characters.rb +53 -0
  40. data/lib/ndr_support/utf8_encoding/force_binary.rb +44 -0
  41. data/lib/ndr_support/utf8_encoding/object_support.rb +31 -0
  42. data/lib/ndr_support/version.rb +5 -0
  43. data/lib/ndr_support/yaml/serialization_migration.rb +65 -0
  44. data/lib/tasks/audit_code.rake +423 -0
  45. data/ndr_support.gemspec +39 -0
  46. data/test/array_test.rb +20 -0
  47. data/test/concerns/working_days_test.rb +122 -0
  48. data/test/daterange_test.rb +194 -0
  49. data/test/fixnum/calculations_test.rb +28 -0
  50. data/test/hash_test.rb +84 -0
  51. data/test/integer_test.rb +14 -0
  52. data/test/nil_test.rb +40 -0
  53. data/test/ourdate_test.rb +27 -0
  54. data/test/ourtime_test.rb +27 -0
  55. data/test/regexp_range_test.rb +135 -0
  56. data/test/resources/filesystem_paths.yml +37 -0
  57. data/test/safe_file_test.rb +597 -0
  58. data/test/safe_path_test.rb +168 -0
  59. data/test/string/cleaning_test.rb +176 -0
  60. data/test/string/conversions_test.rb +353 -0
  61. data/test/test_helper.rb +41 -0
  62. data/test/time/conversions_test.rb +15 -0
  63. data/test/utf8_encoding/control_characters_test.rb +84 -0
  64. data/test/utf8_encoding/force_binary_test.rb +64 -0
  65. data/test/utf8_encoding_test.rb +170 -0
  66. data/test/yaml/serialization_test.rb +145 -0
  67. metadata +295 -0
@@ -0,0 +1,136 @@
1
+ class String
2
+ INVALID_CONTROL_CHARS = /[\x00-\x08\x0b-\x0c\x0e-\x1f]/
3
+ ROMAN_ONE_TO_FIVE_MAPPING = { 'I' => '1', 'II' => '2', 'III' => '3', 'IIII' => '4', 'IV' => '4', 'V' => '5' }
4
+
5
+ # Used for comparing addresses
6
+ def squash
7
+ upcase.delete('^A-Z0-9')
8
+ end
9
+
10
+ # Show postcode in various formats.
11
+ # Parameter "option" can be :user, :compact, :db
12
+ def postcodeize(option = :user)
13
+ nspce = delete(' ').upcase
14
+ unless nspce.blank? || /([A-Z][0-9]|[A-Z][0-9][0-9]|[A-Z][0-9][A-Z]|[A-Z][A-Z][0-9]|[A-Z][A-Z][0-9][0-9]|[A-Z][A-Z][0-9][A-Z])[0-9][A-Z][A-Z]$/ =~ nspce
15
+ return self # Don't change old-style or malformed postcodes
16
+ end
17
+ case option
18
+ when :compact
19
+ nspce
20
+ when :db
21
+ case nspce.length
22
+ when 5 then nspce.insert(-4, ' ')
23
+ when 6 then nspce.insert(-4, ' ')
24
+ else nspce
25
+ end
26
+ else # anything else, including :user --> friendly format
27
+ nspce.length < 5 ? nspce : nspce.insert(-4, ' ')
28
+ end
29
+ end
30
+
31
+ def clean(what)
32
+ case what
33
+ when :nhsnumber
34
+ self.delete('^0-9')[0..9]
35
+ when :postcode, :get_postcode
36
+ self.postcodeize(:db)
37
+ when :lpi
38
+ self.upcase.delete('^0-9A-Z')
39
+ when :sex
40
+ # SECURE: BNS 2012-10-09: But may behave oddly for multi-line input
41
+ if self =~ /^M|1/i
42
+ '1'
43
+ elsif self =~ /^F|2/i
44
+ '2'
45
+ else
46
+ '0'
47
+ end
48
+ when :sex_c
49
+ if self =~ /^M|1/i
50
+ 'M'
51
+ elsif self =~ /^F|2/i
52
+ 'F'
53
+ else
54
+ ''
55
+ end
56
+ when :name
57
+ substitutions = {
58
+ '.' => '',
59
+ /,|;/ => ' ',
60
+ /\s{2,}/ => ' ',
61
+ '`' => '\''
62
+ }
63
+ substitutions.inject(self.upcase) { |str, scheme| str.gsub(*scheme) }.strip
64
+ when :ethniccategory
65
+ replace_ethniccategory = {
66
+ '0' => '0',
67
+ '1' => 'M',
68
+ '2' => 'N',
69
+ '3' => 'H',
70
+ '4' => 'J',
71
+ '5' => 'K',
72
+ '6' => 'R',
73
+ '7' => '8',
74
+ '&' => 'X',
75
+ ' ' => 'X',
76
+ '99' => 'X'
77
+ }
78
+ replace_ethniccategory[self] || self.upcase
79
+ when :code
80
+ self.split(/ |,|;/).map do |code|
81
+ code.blank? ? next : code.gsub('.', '')
82
+ end.compact.join(' ')
83
+ when :code_icd
84
+ # regexp = /[A-Z][0-9]{2}(\.(X|[0-9]{1,2})|[0-9]?)( *(D|A)( |,|;|$))/
85
+ codes = self.upcase.split(/ |,|;/).delete_if { |x| x.squash.blank? }
86
+ cleaned_codes = []
87
+ codes.each do |code|
88
+ if code == 'D' || code == 'A'
89
+ cleaned_codes[-1] += code
90
+ else
91
+ cleaned_codes << code
92
+ end
93
+ end
94
+ cleaned_codes.join(' ')
95
+ when :code_opcs
96
+ clean_code_opcs
97
+ when :hospitalnumber
98
+ self[-1..-1] =~ /\d/ ? self : self[0..-2]
99
+ when :xmlsafe, :make_xml_safe
100
+ self.strip_xml_unsafe_characters
101
+ when :roman5
102
+ # This deromanises roman numerals between 1 and 5
103
+ self.gsub(/[IV]+/i) { |match| ROMAN_ONE_TO_FIVE_MAPPING[match.upcase] }
104
+ when :tnmcategory
105
+ self.sub!(/\A[tnm]/i, '')
106
+ if self =~ /\Ax\z/i
107
+ self.upcase
108
+ else
109
+ self.downcase
110
+ end
111
+ else
112
+ self.gsub(' ?', ' ')
113
+ end
114
+ end
115
+
116
+ def strip_xml_unsafe_characters
117
+ self.gsub(String::INVALID_CONTROL_CHARS, '')
118
+ end
119
+
120
+ def xml_unsafe?
121
+ self =~ String::INVALID_CONTROL_CHARS
122
+ end
123
+
124
+ private
125
+
126
+ def clean_code_opcs
127
+ split(/ |,|;/).map do |code|
128
+ db_code = code.squash
129
+ if 4 == db_code.length || db_code =~ /CZ00[12]/
130
+ db_code
131
+ else
132
+ next
133
+ end
134
+ end.compact.join(' ')
135
+ end
136
+ end
@@ -0,0 +1,137 @@
1
+ # encoding: utf-8
2
+ require 'active_support/core_ext/string/conversions'
3
+ require 'ndr_support/daterange'
4
+ require 'ndr_support/ourdate'
5
+ require 'ndr_support/ourtime'
6
+
7
+ # Forward-port ParseDate to Ruby 1.9.x and beyond.
8
+ # We only use this in String#to_date, but keep the logic
9
+ # encapsulated for testing purposes - the behaviour of
10
+ # Date._parse has been known to change.
11
+ unless defined?(::ParseDate)
12
+ class ParseDate
13
+ def self.parsedate(str, comp = false)
14
+ Date._parse(str, comp).
15
+ values_at(:year, :mon, :mday, :hour, :min, :sec, :zone, :wday)
16
+ end
17
+ end
18
+ end
19
+
20
+ class String
21
+ SOUNDEX_CHARS = 'BPFVCSKGJQXZDTLMNR'
22
+ SOUNDEX_NUMS = '111122222222334556'
23
+ SOUNDEX_CHARS_EX = '^' + SOUNDEX_CHARS
24
+ SOUNDEX_CHARS_DEL = '^A-Z'
25
+
26
+ # desc: http://en.wikipedia.org/wiki/Soundex
27
+ def soundex(census = true)
28
+ str = upcase.delete(SOUNDEX_CHARS_DEL).squeeze
29
+
30
+ str[0..0] + str[1..-1].
31
+ delete(SOUNDEX_CHARS_EX).
32
+ tr(SOUNDEX_CHARS, SOUNDEX_NUMS)[0..(census ? 2 : -1)].
33
+ squeeze[0..(census ? 2 : -1)].
34
+ ljust(3, '0') rescue ''
35
+ end
36
+
37
+ def sounds_like(other)
38
+ soundex == other.soundex
39
+ end
40
+
41
+ def date1
42
+ Daterange.new(self).date1
43
+ end
44
+
45
+ def date2
46
+ Daterange.new(self).date2
47
+ end
48
+
49
+ def thedate
50
+ Ourdate.new(self).thedate
51
+ end
52
+
53
+ def thetime
54
+ Ourtime.new(self).thetime
55
+ end
56
+
57
+ # Convert "SMITH JD" into "Smith JD"
58
+ def surname_and_initials
59
+ a = split
60
+ initials = a.pop
61
+ a.collect(&:capitalize).join(' ') + ' ' + initials
62
+ end
63
+
64
+ # Like titleize but copes with Scottish and Irish names.
65
+ def surnameize
66
+ s = slice(0, 2).upcase
67
+ if s == 'MC' || s == "O'"
68
+ s.titleize + slice(2..-1).titleize
69
+ else
70
+ titleize
71
+ end
72
+ end
73
+
74
+ # Show NHS numbers with spaces
75
+ def nhs_numberize
76
+ return self unless length == 10
77
+ self[0..2] + ' ' + self[3..5] + ' ' + self[6..9]
78
+ end
79
+
80
+ # truncate a string, with a HTML &hellip; at the end
81
+ def truncate_hellip(n)
82
+ length > n ? slice(0, n - 1) + '&hellip;' : self
83
+ end
84
+
85
+ # Try to convert the string value into a date.
86
+ # If given a pattern, use it to parse date, otherwise use default setting to parse it
87
+ def to_date(pattern = nil)
88
+ return '' if empty? # TODO: check if this is used... :/
89
+ return nil if blank?
90
+
91
+ pattern = '%d%m%Y' if 'ddmmyyyy' == pattern
92
+
93
+ if pattern == 'yyyymmdd' || pattern == 'yyyymmdd_ons'
94
+ # Workaround for ONS dates (with missing day / month): revert to old
95
+ # parsing behaviour. (Instead, EDeathRecord should substitute a Daterange)
96
+ # TODO: Move all death parsing to format 'yyyymmdd_ons'
97
+ return nil if self =~ /\A([0-9]{4}00[0-9]{2}|[0-9]{6}00)\Z/
98
+ pattern = '%Y%m%d'
99
+ end
100
+
101
+ if self =~ /\A([0-9][0-9]?)[.]([0-9][0-9]?)[.]([0-9][0-9][0-9][0-9])\Z/ # dd.mm.yyyy
102
+ return date1 # Uses Daterange to consistently parse our displayed date format
103
+ end
104
+
105
+ if pattern.to_s.include?('%')
106
+ # Use Date.strptime if the pattern contains a percent sign
107
+ parsed_date = DateTime.strptime(self, pattern)
108
+ Ourdate.build_datetime(parsed_date.year, parsed_date.month, parsed_date.day)
109
+ else
110
+ # Use '.' rather than '/' as a separator for more consistent parsing:
111
+ year, month, day, *_ = ParseDate.parsedate(gsub('/', '.'))
112
+
113
+ if ['yyyy/dd/mm', 'mm/dd/yyyy'].include?(pattern)
114
+ month, day = day, month
115
+ elsif 8 == length && self !~ /\d{8}/
116
+ # dd/mm/yy, rather than yyyymmdd
117
+ year, day = day, year
118
+ year += 100 if year <= Ourdate.today.year % 100
119
+ year += 1900
120
+ elsif 9 == length
121
+ # dd/mmm/yy, rare case.
122
+ year += 100 if year <= Ourdate.today.year % 100
123
+ year += 1900
124
+ end
125
+
126
+ Ourdate.build_datetime(year, month, day)
127
+ end
128
+ end
129
+
130
+ # Try to convert the string value into boolean
131
+ def to_boolean
132
+ # SECURE: BNS 2012-10-09: But may behave oddly for multi-line input
133
+ return true if self == true || self =~ (/^(true|t|yes|y|1)$/i)
134
+ return false if self == false || self.nil? || self =~ (/^(false|f|no|n|0)$/i)
135
+ fail ArgumentError, "invalid value for Boolean: \"#{self}\""
136
+ end
137
+ end
@@ -0,0 +1 @@
1
+ load 'tasks/audit_code.rake'
@@ -0,0 +1,13 @@
1
+ class Time
2
+ # Ruby 1.9 defines Time#to_time natively (as part of the
3
+ # stdlib Time, rather than core Time), but it returns
4
+ # the time in the local timezone. ActiveSupport contains
5
+ # the following definition, but it is only actually used
6
+ # by Ruby 1.8.7. We wish to continue with that behaviour,
7
+ # as local time zones have caused problems with our
8
+ # Time#to_s format (which either formats as '%d.%m.%Y %H:%M'
9
+ # or '%d.%m.%Y').
10
+ def to_time
11
+ self
12
+ end
13
+ end
@@ -0,0 +1,72 @@
1
+ require 'ndr_support/utf8_encoding/control_characters'
2
+ require 'ndr_support/utf8_encoding/force_binary'
3
+ require 'ndr_support/utf8_encoding/object_support'
4
+
5
+ # Provides encoding support to be used for file / rawtext handling.
6
+ module UTF8Encoding
7
+ include ControlCharacters
8
+ include ForceBinary
9
+ include ObjectSupport
10
+
11
+ # Raised when we cannot ensure a string is valid UTF-8
12
+ class UTF8CoercionError < EncodingError; end
13
+
14
+ # Our known source encodings, in order of preference:
15
+ AUTO_ENCODINGS = %w( UTF-8 UTF-16 Windows-1252 )
16
+ # How should unmappable characters be escaped, when forcing encoding?
17
+ REPLACEMENT_SCHEME = lambda { |char| '0x' + char.ord.to_s(16).rjust(2, '0') }
18
+
19
+ # Returns a new string with valid UTF-8 encoding,
20
+ # or raises an exception if encoding fails.
21
+ def ensure_utf8(string, source_encoding = nil)
22
+ ensure_utf8!(string.dup, source_encoding)
23
+ end
24
+
25
+ # Attempts to encode `string` to UTF-8, in place.
26
+ # Returns `string`, or raises an exception.
27
+ def ensure_utf8!(string, source_encoding = nil)
28
+ # A list of encodings we should try from:
29
+ candidates = source_encoding ? Array.wrap(source_encoding) : AUTO_ENCODINGS
30
+
31
+ # Attempt to coerce the string to UTF-8, from one of the source
32
+ # candidates (in order of preference):
33
+ apply_candidates!(string, candidates)
34
+
35
+ unless string.valid_encoding?
36
+ # None of our candidate source encodings worked, so fail:
37
+ fail(UTF8CoercionError, "Attempted to use: #{candidates}")
38
+ end
39
+
40
+ string
41
+ end
42
+
43
+ # Returns a UTF-8 version of `string`, escaping any unmappable characters.
44
+ def coerce_utf8(string, source_encoding = nil)
45
+ coerce_utf8!(string.dup, source_encoding)
46
+ end
47
+
48
+ # Coerces `string` to UTF-8, in place, escaping any unmappable characters.
49
+ def coerce_utf8!(string, source_encoding = nil)
50
+ # Try normally first...
51
+ ensure_utf8!(string, source_encoding)
52
+ rescue UTF8CoercionError
53
+ # ...before going back-to-basics, and replacing things that don't map:
54
+ string.encode!('UTF-8', 'BINARY', :fallback => REPLACEMENT_SCHEME)
55
+ end
56
+
57
+ private
58
+
59
+ def apply_candidates!(string, candidates)
60
+ candidates.detect do |encoding|
61
+ begin
62
+ # Attempt to encode as UTF-8 from source `encoding`:
63
+ string.encode!('UTF-8', encoding)
64
+ # If that worked, we're done; otherwise, move on.
65
+ string.valid_encoding?
66
+ rescue EncodingError
67
+ # If that failed really badly, move on:
68
+ false
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,53 @@
1
+ require 'ndr_support/utf8_encoding'
2
+
3
+ module UTF8Encoding
4
+ # Allows any supported object to have control characters
5
+ # escaped, using standard replacement scheme.
6
+ module ControlCharacters
7
+ # The range of characters we consider:
8
+ CONTROL_CHARACTERS = /[\x00-\x1f]|\x7f/
9
+ # Exceptions that are allowed:
10
+ ALLOWED_CONTROL_CHARACTERS = %W( \x09 \x0a \x0d )
11
+
12
+ # Recursively escape any control characters in `object`.
13
+ def escape_control_chars_in_object!(object)
14
+ case object
15
+ when String
16
+ escape_control_chars!(object)
17
+ when Hash
18
+ escape_control_chars_in_hash!(object)
19
+ when Array
20
+ escape_control_chars_in_array!(object)
21
+ else
22
+ object
23
+ end
24
+ end
25
+
26
+ # Returns a copy of `string`, with any control characters escaped.
27
+ def escape_control_chars(string)
28
+ escape_control_chars!(string.dup)
29
+ end
30
+
31
+ # Escapes in-place any control characters in `string`, before returning it.
32
+ def escape_control_chars!(string)
33
+ string.gsub!(CONTROL_CHARACTERS) do |character|
34
+ if ALLOWED_CONTROL_CHARACTERS.include?(character)
35
+ character
36
+ else
37
+ UTF8Encoding::REPLACEMENT_SCHEME[character]
38
+ end
39
+ end
40
+ string
41
+ end
42
+
43
+ # Escape control characters in values of the given `hash`.
44
+ def escape_control_chars_in_hash!(hash)
45
+ hash.each_value { |value| escape_control_chars_in_object!(value) }
46
+ end
47
+
48
+ # Escape control characters in elements of the given `array`.
49
+ def escape_control_chars_in_array!(array)
50
+ array.each { |element| escape_control_chars_in_object!(element) }
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,44 @@
1
+ require 'ndr_support/utf8_encoding'
2
+
3
+ module UTF8Encoding
4
+ # Allows any supported object to have any high-ascii string
5
+ # content to be force-encoded from UTF-8 to BINARY (/ASCII-8BIT).
6
+ # This ensures that any serialisation to YAML, using Psych,
7
+ # can be stored in other encodings. (Psych by default emits
8
+ # UTF-8 YAML, which might not survive being stored in a Windows-1252
9
+ # database, for example.)
10
+ module ForceBinary
11
+ # Recursively ensure the correct encoding is being used:
12
+ def binary_encode_any_high_ascii(object)
13
+ case object
14
+ when String
15
+ binary_encode_if_any_high_ascii(object)
16
+ when Hash
17
+ binary_encode_any_high_ascii_in_hash(object)
18
+ when Array
19
+ binary_encode_any_high_ascii_in_array(object)
20
+ else
21
+ object
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ # Returns a BINARY-encoded version of `string`, if is cannot be represented as 7bit ASCII.
28
+ def binary_encode_if_any_high_ascii(string)
29
+ string = ensure_utf8(string)
30
+ string.force_encoding('BINARY') if string.bytes.detect { |byte| byte > 127 }
31
+ string
32
+ end
33
+
34
+ # Ensures all values of the given `hash` are BINARY-encoded, if necessary.
35
+ def binary_encode_any_high_ascii_in_hash(hash)
36
+ Hash[hash.map { |key, value| [key, binary_encode_any_high_ascii(value)] }]
37
+ end
38
+
39
+ # Ensures all values of the given `array` are BINARY-encoded, if necessary.
40
+ def binary_encode_any_high_ascii_in_array(array)
41
+ array.map { |element| binary_encode_any_high_ascii(element) }
42
+ end
43
+ end
44
+ end