ndr_support 3.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +14 -0
  3. data/.rubocop.yml +27 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +22 -0
  6. data/CODE_OF_CONDUCT.md +13 -0
  7. data/Gemfile +4 -0
  8. data/Guardfile +16 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +91 -0
  11. data/Rakefile +12 -0
  12. data/code_safety.yml +258 -0
  13. data/gemfiles/Gemfile.rails32 +6 -0
  14. data/gemfiles/Gemfile.rails32.lock +108 -0
  15. data/gemfiles/Gemfile.rails41 +6 -0
  16. data/gemfiles/Gemfile.rails41.lock +111 -0
  17. data/gemfiles/Gemfile.rails42 +6 -0
  18. data/gemfiles/Gemfile.rails42.lock +111 -0
  19. data/lib/ndr_support.rb +21 -0
  20. data/lib/ndr_support/array.rb +52 -0
  21. data/lib/ndr_support/concerns/working_days.rb +94 -0
  22. data/lib/ndr_support/date_and_time_extensions.rb +103 -0
  23. data/lib/ndr_support/daterange.rb +196 -0
  24. data/lib/ndr_support/fixnum/calculations.rb +15 -0
  25. data/lib/ndr_support/fixnum/julian_date_conversions.rb +14 -0
  26. data/lib/ndr_support/hash.rb +52 -0
  27. data/lib/ndr_support/integer.rb +12 -0
  28. data/lib/ndr_support/nil.rb +38 -0
  29. data/lib/ndr_support/ourdate.rb +97 -0
  30. data/lib/ndr_support/ourtime.rb +51 -0
  31. data/lib/ndr_support/regexp_range.rb +65 -0
  32. data/lib/ndr_support/safe_file.rb +185 -0
  33. data/lib/ndr_support/safe_path.rb +268 -0
  34. data/lib/ndr_support/string/cleaning.rb +136 -0
  35. data/lib/ndr_support/string/conversions.rb +137 -0
  36. data/lib/ndr_support/tasks.rb +1 -0
  37. data/lib/ndr_support/time/conversions.rb +13 -0
  38. data/lib/ndr_support/utf8_encoding.rb +72 -0
  39. data/lib/ndr_support/utf8_encoding/control_characters.rb +53 -0
  40. data/lib/ndr_support/utf8_encoding/force_binary.rb +44 -0
  41. data/lib/ndr_support/utf8_encoding/object_support.rb +31 -0
  42. data/lib/ndr_support/version.rb +5 -0
  43. data/lib/ndr_support/yaml/serialization_migration.rb +65 -0
  44. data/lib/tasks/audit_code.rake +423 -0
  45. data/ndr_support.gemspec +39 -0
  46. data/test/array_test.rb +20 -0
  47. data/test/concerns/working_days_test.rb +122 -0
  48. data/test/daterange_test.rb +194 -0
  49. data/test/fixnum/calculations_test.rb +28 -0
  50. data/test/hash_test.rb +84 -0
  51. data/test/integer_test.rb +14 -0
  52. data/test/nil_test.rb +40 -0
  53. data/test/ourdate_test.rb +27 -0
  54. data/test/ourtime_test.rb +27 -0
  55. data/test/regexp_range_test.rb +135 -0
  56. data/test/resources/filesystem_paths.yml +37 -0
  57. data/test/safe_file_test.rb +597 -0
  58. data/test/safe_path_test.rb +168 -0
  59. data/test/string/cleaning_test.rb +176 -0
  60. data/test/string/conversions_test.rb +353 -0
  61. data/test/test_helper.rb +41 -0
  62. data/test/time/conversions_test.rb +15 -0
  63. data/test/utf8_encoding/control_characters_test.rb +84 -0
  64. data/test/utf8_encoding/force_binary_test.rb +64 -0
  65. data/test/utf8_encoding_test.rb +170 -0
  66. data/test/yaml/serialization_test.rb +145 -0
  67. metadata +295 -0
@@ -0,0 +1,136 @@
1
+ class String
2
+ INVALID_CONTROL_CHARS = /[\x00-\x08\x0b-\x0c\x0e-\x1f]/
3
+ ROMAN_ONE_TO_FIVE_MAPPING = { 'I' => '1', 'II' => '2', 'III' => '3', 'IIII' => '4', 'IV' => '4', 'V' => '5' }
4
+
5
+ # Used for comparing addresses
6
+ def squash
7
+ upcase.delete('^A-Z0-9')
8
+ end
9
+
10
+ # Show postcode in various formats.
11
+ # Parameter "option" can be :user, :compact, :db
12
+ def postcodeize(option = :user)
13
+ nspce = delete(' ').upcase
14
+ unless nspce.blank? || /([A-Z][0-9]|[A-Z][0-9][0-9]|[A-Z][0-9][A-Z]|[A-Z][A-Z][0-9]|[A-Z][A-Z][0-9][0-9]|[A-Z][A-Z][0-9][A-Z])[0-9][A-Z][A-Z]$/ =~ nspce
15
+ return self # Don't change old-style or malformed postcodes
16
+ end
17
+ case option
18
+ when :compact
19
+ nspce
20
+ when :db
21
+ case nspce.length
22
+ when 5 then nspce.insert(-4, ' ')
23
+ when 6 then nspce.insert(-4, ' ')
24
+ else nspce
25
+ end
26
+ else # anything else, including :user --> friendly format
27
+ nspce.length < 5 ? nspce : nspce.insert(-4, ' ')
28
+ end
29
+ end
30
+
31
+ def clean(what)
32
+ case what
33
+ when :nhsnumber
34
+ self.delete('^0-9')[0..9]
35
+ when :postcode, :get_postcode
36
+ self.postcodeize(:db)
37
+ when :lpi
38
+ self.upcase.delete('^0-9A-Z')
39
+ when :sex
40
+ # SECURE: BNS 2012-10-09: But may behave oddly for multi-line input
41
+ if self =~ /^M|1/i
42
+ '1'
43
+ elsif self =~ /^F|2/i
44
+ '2'
45
+ else
46
+ '0'
47
+ end
48
+ when :sex_c
49
+ if self =~ /^M|1/i
50
+ 'M'
51
+ elsif self =~ /^F|2/i
52
+ 'F'
53
+ else
54
+ ''
55
+ end
56
+ when :name
57
+ substitutions = {
58
+ '.' => '',
59
+ /,|;/ => ' ',
60
+ /\s{2,}/ => ' ',
61
+ '`' => '\''
62
+ }
63
+ substitutions.inject(self.upcase) { |str, scheme| str.gsub(*scheme) }.strip
64
+ when :ethniccategory
65
+ replace_ethniccategory = {
66
+ '0' => '0',
67
+ '1' => 'M',
68
+ '2' => 'N',
69
+ '3' => 'H',
70
+ '4' => 'J',
71
+ '5' => 'K',
72
+ '6' => 'R',
73
+ '7' => '8',
74
+ '&' => 'X',
75
+ ' ' => 'X',
76
+ '99' => 'X'
77
+ }
78
+ replace_ethniccategory[self] || self.upcase
79
+ when :code
80
+ self.split(/ |,|;/).map do |code|
81
+ code.blank? ? next : code.gsub('.', '')
82
+ end.compact.join(' ')
83
+ when :code_icd
84
+ # regexp = /[A-Z][0-9]{2}(\.(X|[0-9]{1,2})|[0-9]?)( *(D|A)( |,|;|$))/
85
+ codes = self.upcase.split(/ |,|;/).delete_if { |x| x.squash.blank? }
86
+ cleaned_codes = []
87
+ codes.each do |code|
88
+ if code == 'D' || code == 'A'
89
+ cleaned_codes[-1] += code
90
+ else
91
+ cleaned_codes << code
92
+ end
93
+ end
94
+ cleaned_codes.join(' ')
95
+ when :code_opcs
96
+ clean_code_opcs
97
+ when :hospitalnumber
98
+ self[-1..-1] =~ /\d/ ? self : self[0..-2]
99
+ when :xmlsafe, :make_xml_safe
100
+ self.strip_xml_unsafe_characters
101
+ when :roman5
102
+ # This deromanises roman numerals between 1 and 5
103
+ self.gsub(/[IV]+/i) { |match| ROMAN_ONE_TO_FIVE_MAPPING[match.upcase] }
104
+ when :tnmcategory
105
+ self.sub!(/\A[tnm]/i, '')
106
+ if self =~ /\Ax\z/i
107
+ self.upcase
108
+ else
109
+ self.downcase
110
+ end
111
+ else
112
+ self.gsub(' ?', ' ')
113
+ end
114
+ end
115
+
116
+ def strip_xml_unsafe_characters
117
+ self.gsub(String::INVALID_CONTROL_CHARS, '')
118
+ end
119
+
120
+ def xml_unsafe?
121
+ self =~ String::INVALID_CONTROL_CHARS
122
+ end
123
+
124
+ private
125
+
126
+ def clean_code_opcs
127
+ split(/ |,|;/).map do |code|
128
+ db_code = code.squash
129
+ if 4 == db_code.length || db_code =~ /CZ00[12]/
130
+ db_code
131
+ else
132
+ next
133
+ end
134
+ end.compact.join(' ')
135
+ end
136
+ end
@@ -0,0 +1,137 @@
1
+ # encoding: utf-8
2
+ require 'active_support/core_ext/string/conversions'
3
+ require 'ndr_support/daterange'
4
+ require 'ndr_support/ourdate'
5
+ require 'ndr_support/ourtime'
6
+
7
+ # Forward-port ParseDate to Ruby 1.9.x and beyond.
8
+ # We only use this in String#to_date, but keep the logic
9
+ # encapsulated for testing purposes - the behaviour of
10
+ # Date._parse has been known to change.
11
+ unless defined?(::ParseDate)
12
+ class ParseDate
13
+ def self.parsedate(str, comp = false)
14
+ Date._parse(str, comp).
15
+ values_at(:year, :mon, :mday, :hour, :min, :sec, :zone, :wday)
16
+ end
17
+ end
18
+ end
19
+
20
+ class String
21
+ SOUNDEX_CHARS = 'BPFVCSKGJQXZDTLMNR'
22
+ SOUNDEX_NUMS = '111122222222334556'
23
+ SOUNDEX_CHARS_EX = '^' + SOUNDEX_CHARS
24
+ SOUNDEX_CHARS_DEL = '^A-Z'
25
+
26
+ # desc: http://en.wikipedia.org/wiki/Soundex
27
+ def soundex(census = true)
28
+ str = upcase.delete(SOUNDEX_CHARS_DEL).squeeze
29
+
30
+ str[0..0] + str[1..-1].
31
+ delete(SOUNDEX_CHARS_EX).
32
+ tr(SOUNDEX_CHARS, SOUNDEX_NUMS)[0..(census ? 2 : -1)].
33
+ squeeze[0..(census ? 2 : -1)].
34
+ ljust(3, '0') rescue ''
35
+ end
36
+
37
+ def sounds_like(other)
38
+ soundex == other.soundex
39
+ end
40
+
41
+ def date1
42
+ Daterange.new(self).date1
43
+ end
44
+
45
+ def date2
46
+ Daterange.new(self).date2
47
+ end
48
+
49
+ def thedate
50
+ Ourdate.new(self).thedate
51
+ end
52
+
53
+ def thetime
54
+ Ourtime.new(self).thetime
55
+ end
56
+
57
+ # Convert "SMITH JD" into "Smith JD"
58
+ def surname_and_initials
59
+ a = split
60
+ initials = a.pop
61
+ a.collect(&:capitalize).join(' ') + ' ' + initials
62
+ end
63
+
64
+ # Like titleize but copes with Scottish and Irish names.
65
+ def surnameize
66
+ s = slice(0, 2).upcase
67
+ if s == 'MC' || s == "O'"
68
+ s.titleize + slice(2..-1).titleize
69
+ else
70
+ titleize
71
+ end
72
+ end
73
+
74
+ # Show NHS numbers with spaces
75
+ def nhs_numberize
76
+ return self unless length == 10
77
+ self[0..2] + ' ' + self[3..5] + ' ' + self[6..9]
78
+ end
79
+
80
+ # truncate a string, with a HTML &hellip; at the end
81
+ def truncate_hellip(n)
82
+ length > n ? slice(0, n - 1) + '&hellip;' : self
83
+ end
84
+
85
+ # Try to convert the string value into a date.
86
+ # If given a pattern, use it to parse date, otherwise use default setting to parse it
87
+ def to_date(pattern = nil)
88
+ return '' if empty? # TODO: check if this is used... :/
89
+ return nil if blank?
90
+
91
+ pattern = '%d%m%Y' if 'ddmmyyyy' == pattern
92
+
93
+ if pattern == 'yyyymmdd' || pattern == 'yyyymmdd_ons'
94
+ # Workaround for ONS dates (with missing day / month): revert to old
95
+ # parsing behaviour. (Instead, EDeathRecord should substitute a Daterange)
96
+ # TODO: Move all death parsing to format 'yyyymmdd_ons'
97
+ return nil if self =~ /\A([0-9]{4}00[0-9]{2}|[0-9]{6}00)\Z/
98
+ pattern = '%Y%m%d'
99
+ end
100
+
101
+ if self =~ /\A([0-9][0-9]?)[.]([0-9][0-9]?)[.]([0-9][0-9][0-9][0-9])\Z/ # dd.mm.yyyy
102
+ return date1 # Uses Daterange to consistently parse our displayed date format
103
+ end
104
+
105
+ if pattern.to_s.include?('%')
106
+ # Use Date.strptime if the pattern contains a percent sign
107
+ parsed_date = DateTime.strptime(self, pattern)
108
+ Ourdate.build_datetime(parsed_date.year, parsed_date.month, parsed_date.day)
109
+ else
110
+ # Use '.' rather than '/' as a separator for more consistent parsing:
111
+ year, month, day, *_ = ParseDate.parsedate(gsub('/', '.'))
112
+
113
+ if ['yyyy/dd/mm', 'mm/dd/yyyy'].include?(pattern)
114
+ month, day = day, month
115
+ elsif 8 == length && self !~ /\d{8}/
116
+ # dd/mm/yy, rather than yyyymmdd
117
+ year, day = day, year
118
+ year += 100 if year <= Ourdate.today.year % 100
119
+ year += 1900
120
+ elsif 9 == length
121
+ # dd/mmm/yy, rare case.
122
+ year += 100 if year <= Ourdate.today.year % 100
123
+ year += 1900
124
+ end
125
+
126
+ Ourdate.build_datetime(year, month, day)
127
+ end
128
+ end
129
+
130
+ # Try to convert the string value into boolean
131
+ def to_boolean
132
+ # SECURE: BNS 2012-10-09: But may behave oddly for multi-line input
133
+ return true if self == true || self =~ (/^(true|t|yes|y|1)$/i)
134
+ return false if self == false || self.nil? || self =~ (/^(false|f|no|n|0)$/i)
135
+ fail ArgumentError, "invalid value for Boolean: \"#{self}\""
136
+ end
137
+ end
@@ -0,0 +1 @@
1
+ load 'tasks/audit_code.rake'
@@ -0,0 +1,13 @@
1
+ class Time
2
+ # Ruby 1.9 defines Time#to_time natively (as part of the
3
+ # stdlib Time, rather than core Time), but it returns
4
+ # the time in the local timezone. ActiveSupport contains
5
+ # the following definition, but it is only actually used
6
+ # by Ruby 1.8.7. We wish to continue with that behaviour,
7
+ # as local time zones have caused problems with our
8
+ # Time#to_s format (which either formats as '%d.%m.%Y %H:%M'
9
+ # or '%d.%m.%Y').
10
+ def to_time
11
+ self
12
+ end
13
+ end
@@ -0,0 +1,72 @@
1
+ require 'ndr_support/utf8_encoding/control_characters'
2
+ require 'ndr_support/utf8_encoding/force_binary'
3
+ require 'ndr_support/utf8_encoding/object_support'
4
+
5
+ # Provides encoding support to be used for file / rawtext handling.
6
+ module UTF8Encoding
7
+ include ControlCharacters
8
+ include ForceBinary
9
+ include ObjectSupport
10
+
11
+ # Raised when we cannot ensure a string is valid UTF-8
12
+ class UTF8CoercionError < EncodingError; end
13
+
14
+ # Our known source encodings, in order of preference:
15
+ AUTO_ENCODINGS = %w( UTF-8 UTF-16 Windows-1252 )
16
+ # How should unmappable characters be escaped, when forcing encoding?
17
+ REPLACEMENT_SCHEME = lambda { |char| '0x' + char.ord.to_s(16).rjust(2, '0') }
18
+
19
+ # Returns a new string with valid UTF-8 encoding,
20
+ # or raises an exception if encoding fails.
21
+ def ensure_utf8(string, source_encoding = nil)
22
+ ensure_utf8!(string.dup, source_encoding)
23
+ end
24
+
25
+ # Attempts to encode `string` to UTF-8, in place.
26
+ # Returns `string`, or raises an exception.
27
+ def ensure_utf8!(string, source_encoding = nil)
28
+ # A list of encodings we should try from:
29
+ candidates = source_encoding ? Array.wrap(source_encoding) : AUTO_ENCODINGS
30
+
31
+ # Attempt to coerce the string to UTF-8, from one of the source
32
+ # candidates (in order of preference):
33
+ apply_candidates!(string, candidates)
34
+
35
+ unless string.valid_encoding?
36
+ # None of our candidate source encodings worked, so fail:
37
+ fail(UTF8CoercionError, "Attempted to use: #{candidates}")
38
+ end
39
+
40
+ string
41
+ end
42
+
43
+ # Returns a UTF-8 version of `string`, escaping any unmappable characters.
44
+ def coerce_utf8(string, source_encoding = nil)
45
+ coerce_utf8!(string.dup, source_encoding)
46
+ end
47
+
48
+ # Coerces `string` to UTF-8, in place, escaping any unmappable characters.
49
+ def coerce_utf8!(string, source_encoding = nil)
50
+ # Try normally first...
51
+ ensure_utf8!(string, source_encoding)
52
+ rescue UTF8CoercionError
53
+ # ...before going back-to-basics, and replacing things that don't map:
54
+ string.encode!('UTF-8', 'BINARY', :fallback => REPLACEMENT_SCHEME)
55
+ end
56
+
57
+ private
58
+
59
+ def apply_candidates!(string, candidates)
60
+ candidates.detect do |encoding|
61
+ begin
62
+ # Attempt to encode as UTF-8 from source `encoding`:
63
+ string.encode!('UTF-8', encoding)
64
+ # If that worked, we're done; otherwise, move on.
65
+ string.valid_encoding?
66
+ rescue EncodingError
67
+ # If that failed really badly, move on:
68
+ false
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,53 @@
1
+ require 'ndr_support/utf8_encoding'
2
+
3
+ module UTF8Encoding
4
+ # Allows any supported object to have control characters
5
+ # escaped, using standard replacement scheme.
6
+ module ControlCharacters
7
+ # The range of characters we consider:
8
+ CONTROL_CHARACTERS = /[\x00-\x1f]|\x7f/
9
+ # Exceptions that are allowed:
10
+ ALLOWED_CONTROL_CHARACTERS = %W( \x09 \x0a \x0d )
11
+
12
+ # Recursively escape any control characters in `object`.
13
+ def escape_control_chars_in_object!(object)
14
+ case object
15
+ when String
16
+ escape_control_chars!(object)
17
+ when Hash
18
+ escape_control_chars_in_hash!(object)
19
+ when Array
20
+ escape_control_chars_in_array!(object)
21
+ else
22
+ object
23
+ end
24
+ end
25
+
26
+ # Returns a copy of `string`, with any control characters escaped.
27
+ def escape_control_chars(string)
28
+ escape_control_chars!(string.dup)
29
+ end
30
+
31
+ # Escapes in-place any control characters in `string`, before returning it.
32
+ def escape_control_chars!(string)
33
+ string.gsub!(CONTROL_CHARACTERS) do |character|
34
+ if ALLOWED_CONTROL_CHARACTERS.include?(character)
35
+ character
36
+ else
37
+ UTF8Encoding::REPLACEMENT_SCHEME[character]
38
+ end
39
+ end
40
+ string
41
+ end
42
+
43
+ # Escape control characters in values of the given `hash`.
44
+ def escape_control_chars_in_hash!(hash)
45
+ hash.each_value { |value| escape_control_chars_in_object!(value) }
46
+ end
47
+
48
+ # Escape control characters in elements of the given `array`.
49
+ def escape_control_chars_in_array!(array)
50
+ array.each { |element| escape_control_chars_in_object!(element) }
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,44 @@
1
+ require 'ndr_support/utf8_encoding'
2
+
3
+ module UTF8Encoding
4
+ # Allows any supported object to have any high-ascii string
5
+ # content to be force-encoded from UTF-8 to BINARY (/ASCII-8BIT).
6
+ # This ensures that any serialisation to YAML, using Psych,
7
+ # can be stored in other encodings. (Psych by default emits
8
+ # UTF-8 YAML, which might not survive being stored in a Windows-1252
9
+ # database, for example.)
10
+ module ForceBinary
11
+ # Recursively ensure the correct encoding is being used:
12
+ def binary_encode_any_high_ascii(object)
13
+ case object
14
+ when String
15
+ binary_encode_if_any_high_ascii(object)
16
+ when Hash
17
+ binary_encode_any_high_ascii_in_hash(object)
18
+ when Array
19
+ binary_encode_any_high_ascii_in_array(object)
20
+ else
21
+ object
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ # Returns a BINARY-encoded version of `string`, if is cannot be represented as 7bit ASCII.
28
+ def binary_encode_if_any_high_ascii(string)
29
+ string = ensure_utf8(string)
30
+ string.force_encoding('BINARY') if string.bytes.detect { |byte| byte > 127 }
31
+ string
32
+ end
33
+
34
+ # Ensures all values of the given `hash` are BINARY-encoded, if necessary.
35
+ def binary_encode_any_high_ascii_in_hash(hash)
36
+ Hash[hash.map { |key, value| [key, binary_encode_any_high_ascii(value)] }]
37
+ end
38
+
39
+ # Ensures all values of the given `array` are BINARY-encoded, if necessary.
40
+ def binary_encode_any_high_ascii_in_array(array)
41
+ array.map { |element| binary_encode_any_high_ascii(element) }
42
+ end
43
+ end
44
+ end