ndr_support 5.10.1 → 5.10.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -1
- data/code_safety.yml +12 -8
- data/lib/ndr_support/string/clean_methodable.rb +151 -0
- data/lib/ndr_support/string/cleaning.rb +7 -114
- data/lib/ndr_support/version.rb +1 -1
- data/test/string/cleaning_test.rb +21 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5f9ac10bd29787e3ff5b382618cc9389e09c1f5c1b3684a5083ca3babaa0fe99
|
4
|
+
data.tar.gz: d7b0273dee82913349ecc33fbd87310749e4190912ee5349000402e89e0e3e2b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 746812be4b4685cd841e7ba69eb56fa81e8f34d5d62fc5c4db1fbfdfe88dd06052df35429dcc927d43c6c1fe5b383768eec751d8c191d84283bb15737bb06138
|
7
|
+
data.tar.gz: 7fa1f87906e1701528f5d83268d60a664e053dd200341722c24e488eb2af4dc83ccd6d91014ecc5c893176afc71ea64510ea7e34708265b3b472432f8d2da225
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,18 @@
|
|
1
1
|
## [Unreleased]
|
2
|
-
*
|
2
|
+
* no unreleased changes
|
3
|
+
|
4
|
+
## 5.10.3 / 2023-02-12
|
5
|
+
## Fixed
|
6
|
+
* Fixed issue with constant in wrong file/concern
|
7
|
+
* Added tests for roman5 cleaning method
|
8
|
+
* Separated log10 tests
|
9
|
+
|
10
|
+
## 5.10.2 / 2024-02-09
|
11
|
+
## Fixed
|
12
|
+
* Refactored the cleaning code to fix Rubcocop issues
|
13
|
+
|
14
|
+
## Added
|
15
|
+
* Added a new 'log10' cleaning method
|
3
16
|
|
4
17
|
## 5.10.1 / 2024-01-04
|
5
18
|
## Fixed
|
data/code_safety.yml
CHANGED
@@ -22,8 +22,8 @@ file safety:
|
|
22
22
|
safe_revision: 8796a1a32273a106a6b1a134a48077cff0176659
|
23
23
|
CHANGELOG.md:
|
24
24
|
comments:
|
25
|
-
reviewed_by:
|
26
|
-
safe_revision:
|
25
|
+
reviewed_by: ollietulloch
|
26
|
+
safe_revision: 8cb498126d68a19f880b0cd1b83b127c8ec90a89
|
27
27
|
CODE_OF_CONDUCT.md:
|
28
28
|
comments:
|
29
29
|
reviewed_by: timgentry
|
@@ -136,10 +136,14 @@ file safety:
|
|
136
136
|
comments:
|
137
137
|
reviewed_by: brian.shand
|
138
138
|
safe_revision: f4f7cb0803ea34a2f1ba83495d8bcbd942786bce
|
139
|
+
lib/ndr_support/string/clean_methodable.rb:
|
140
|
+
comments:
|
141
|
+
reviewed_by: ollietulloch
|
142
|
+
safe_revision: ff21f98e6608eedb4b33f977778736b38f9fbacb
|
139
143
|
lib/ndr_support/string/cleaning.rb:
|
140
144
|
comments:
|
141
|
-
reviewed_by:
|
142
|
-
safe_revision:
|
145
|
+
reviewed_by: ollietulloch
|
146
|
+
safe_revision: d99c4bf5d2cbbfe78101beff399e7da8645417b2
|
143
147
|
lib/ndr_support/string/conversions.rb:
|
144
148
|
comments:
|
145
149
|
reviewed_by: brian.shand
|
@@ -170,8 +174,8 @@ file safety:
|
|
170
174
|
safe_revision: '09b3f97fb4c1afc88d0d070c39796b1f60c39d25'
|
171
175
|
lib/ndr_support/version.rb:
|
172
176
|
comments:
|
173
|
-
reviewed_by:
|
174
|
-
safe_revision:
|
177
|
+
reviewed_by: ollietulloch
|
178
|
+
safe_revision: 8cb498126d68a19f880b0cd1b83b127c8ec90a89
|
175
179
|
lib/ndr_support/working_days.rb:
|
176
180
|
comments:
|
177
181
|
reviewed_by: josh.pencheon
|
@@ -254,8 +258,8 @@ file safety:
|
|
254
258
|
safe_revision: b562d0c15ff1b1d565522a47e6bae47ea09706f0
|
255
259
|
test/string/cleaning_test.rb:
|
256
260
|
comments:
|
257
|
-
reviewed_by:
|
258
|
-
safe_revision:
|
261
|
+
reviewed_by: ollietulloch
|
262
|
+
safe_revision: d99c4bf5d2cbbfe78101beff399e7da8645417b2
|
259
263
|
test/string/conversions_test.rb:
|
260
264
|
comments:
|
261
265
|
reviewed_by: brian.shand
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# Adds the 'clean' method to String, which can be used to clean strings in various ways
|
2
|
+
# depending on the contents
|
3
|
+
module CleanMethodable
|
4
|
+
extend ActiveSupport::Concern
|
5
|
+
|
6
|
+
ROMAN_ONE_TO_FIVE_MAPPING = { 'I' => '1', 'II' => '2', 'III' => '3', 'IIII' => '4', 'IV' => '4', 'V' => '5' }.freeze
|
7
|
+
|
8
|
+
CLEAN_METHODS = {
|
9
|
+
nhsnumber: :clean_nhsnumber,
|
10
|
+
postcode: :clean_postcode, get_postcode: :clean_postcode,
|
11
|
+
lpi: :clean_lpi,
|
12
|
+
gender: :clean_gender, sex: :clean_sex, sex_c: :clean_sex_c,
|
13
|
+
name: :clean_name,
|
14
|
+
ethniccategory: :clean_ethniccategory,
|
15
|
+
code: :clean_code, code_icd: :clean_code_icd, icd: :clean_icd,
|
16
|
+
code_opcs: :clean_code_opcs,
|
17
|
+
hospitalnumber: :clean_hospitalnumber,
|
18
|
+
xmlsafe: :clean_xmlsafe, make_xml_safe: :clean_xmlsafe,
|
19
|
+
roman5: :clean_roman5,
|
20
|
+
tnmcategory: :clean_tnmcategory,
|
21
|
+
strip: :strip, upcase: :upcase, itself: :itself,
|
22
|
+
log10: :clean_log10
|
23
|
+
}.freeze
|
24
|
+
|
25
|
+
def clean(what)
|
26
|
+
cleaning_method = CLEAN_METHODS[what]
|
27
|
+
return send(cleaning_method) if cleaning_method
|
28
|
+
|
29
|
+
gsub(' ?', ' ')
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def clean_nhsnumber
|
35
|
+
delete('^0-9')[0..9]
|
36
|
+
end
|
37
|
+
|
38
|
+
def clean_postcode
|
39
|
+
postcodeize(:db)
|
40
|
+
end
|
41
|
+
|
42
|
+
def clean_lpi
|
43
|
+
upcase.delete('^0-9A-Z')
|
44
|
+
end
|
45
|
+
|
46
|
+
def clean_gender
|
47
|
+
return '1' if self =~ /\AM(ale)?/i
|
48
|
+
return '2' if self =~ /\AF(emale)?/i
|
49
|
+
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
def clean_sex
|
54
|
+
# SECURE: BNS 2012-10-09: But may behave oddly for multi-line input
|
55
|
+
return '1' if self =~ /^M|1/i
|
56
|
+
return '2' if self =~ /^F|2/i
|
57
|
+
|
58
|
+
'0'
|
59
|
+
end
|
60
|
+
|
61
|
+
def clean_sex_c
|
62
|
+
return 'M' if self =~ /^M|1/i
|
63
|
+
return 'F' if self =~ /^F|2/i
|
64
|
+
|
65
|
+
''
|
66
|
+
end
|
67
|
+
|
68
|
+
def clean_name
|
69
|
+
substitutions = {
|
70
|
+
'.' => '',
|
71
|
+
/,|;/ => ' ',
|
72
|
+
/\s{2,}/ => ' ',
|
73
|
+
'`' => '\''
|
74
|
+
}
|
75
|
+
substitutions.inject(upcase) { |a, e| a.gsub(*e) }.strip
|
76
|
+
end
|
77
|
+
|
78
|
+
def clean_ethniccategory
|
79
|
+
replace_ethniccategory = {
|
80
|
+
'0' => '0', '1' => 'M', '2' => 'N',
|
81
|
+
'3' => 'H', '4' => 'J', '5' => 'K',
|
82
|
+
'6' => 'R', '7' => '8', '&' => 'X',
|
83
|
+
' ' => 'X', '99' => 'X'
|
84
|
+
}
|
85
|
+
replace_ethniccategory[self] || upcase
|
86
|
+
end
|
87
|
+
|
88
|
+
def clean_code
|
89
|
+
split_on_separators.map do |code|
|
90
|
+
code.blank? ? next : code.delete('.')
|
91
|
+
end.compact.join(' ')
|
92
|
+
end
|
93
|
+
|
94
|
+
def clean_code_icd
|
95
|
+
warn '[DEPRECATION] clean(:code_icd) is deprecated - consider using clean(:icd) instead.'
|
96
|
+
# regexp = /[A-Z][0-9]{2}(\.(X|[0-9]{1,2})|[0-9]?)( *(D|A)( |,|;|$))/
|
97
|
+
codes = upcase.split_on_separators.delete_if { |x| x.squash.blank? }
|
98
|
+
cleaned_codes = []
|
99
|
+
codes.each do |code|
|
100
|
+
if %w[A D].include?(code)
|
101
|
+
cleaned_codes[-1] += code
|
102
|
+
else
|
103
|
+
cleaned_codes << code
|
104
|
+
end
|
105
|
+
end
|
106
|
+
cleaned_codes.join(' ')
|
107
|
+
end
|
108
|
+
|
109
|
+
def clean_icd
|
110
|
+
codes = upcase.squish.split_on_separators.reject(&:blank?)
|
111
|
+
codes.map { |code| code.gsub(/(?<=\d)(\.?X?)/, '') }.join(' ')
|
112
|
+
end
|
113
|
+
|
114
|
+
def clean_hospitalnumber
|
115
|
+
self[-1..] =~ /\d/ ? self : self[0..-2]
|
116
|
+
end
|
117
|
+
|
118
|
+
def clean_xmlsafe
|
119
|
+
strip_xml_unsafe_characters
|
120
|
+
end
|
121
|
+
|
122
|
+
def clean_roman5
|
123
|
+
# This deromanises roman numerals between 1 and 5
|
124
|
+
gsub(/[IV]+/i) { |match| ROMAN_ONE_TO_FIVE_MAPPING[match.upcase] }
|
125
|
+
end
|
126
|
+
|
127
|
+
def clean_tnmcategory
|
128
|
+
sub!(/\A[tnm]/i, '')
|
129
|
+
if self =~ /\Ax\z/i
|
130
|
+
upcase
|
131
|
+
else
|
132
|
+
downcase
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def clean_code_opcs
|
137
|
+
split_on_separators.map do |code|
|
138
|
+
db_code = code.squash
|
139
|
+
next unless 4 == db_code.length || db_code =~ /CZ00[12]/
|
140
|
+
|
141
|
+
db_code
|
142
|
+
end.compact.join(' ')
|
143
|
+
end
|
144
|
+
|
145
|
+
def clean_log10
|
146
|
+
f_value = Float(self, exception: false)
|
147
|
+
return self if f_value.nil? || f_value.negative?
|
148
|
+
|
149
|
+
f_value.zero? ? '0.0' : Math.log10(f_value).to_s
|
150
|
+
end
|
151
|
+
end
|
@@ -1,8 +1,12 @@
|
|
1
1
|
require 'active_support/core_ext/string/filters'
|
2
|
+
require 'ndr_support/string/clean_methodable'
|
2
3
|
|
4
|
+
# Extends String clean with various methods of cleaning strings
|
5
|
+
# zand polishing them
|
3
6
|
class String
|
7
|
+
include CleanMethodable
|
8
|
+
|
4
9
|
INVALID_CONTROL_CHARS = /[\x00-\x08\x0b-\x0c\x0e-\x1f]/
|
5
|
-
ROMAN_ONE_TO_FIVE_MAPPING = { 'I' => '1', 'II' => '2', 'III' => '3', 'IIII' => '4', 'IV' => '4', 'V' => '5' }
|
6
10
|
|
7
11
|
POSTCODE_REGEXP = /
|
8
12
|
^(
|
@@ -26,9 +30,8 @@ class String
|
|
26
30
|
# Parameter "option" can be :user, :compact, :db
|
27
31
|
def postcodeize(option = :user)
|
28
32
|
nspce = gsub(/[[:space:]]/, '').upcase
|
29
|
-
unless nspce.blank? || POSTCODE_REGEXP =~ nspce
|
30
|
-
|
31
|
-
end
|
33
|
+
return self unless nspce.blank? || POSTCODE_REGEXP =~ nspce # Don't change old-style or malformed postcodes
|
34
|
+
|
32
35
|
case option
|
33
36
|
when :compact
|
34
37
|
nspce
|
@@ -43,106 +46,6 @@ class String
|
|
43
46
|
end
|
44
47
|
end
|
45
48
|
|
46
|
-
def clean(what)
|
47
|
-
case what
|
48
|
-
when :nhsnumber
|
49
|
-
delete('^0-9')[0..9]
|
50
|
-
when :postcode, :get_postcode
|
51
|
-
postcodeize(:db)
|
52
|
-
when :lpi
|
53
|
-
upcase.delete('^0-9A-Z')
|
54
|
-
when :gender
|
55
|
-
if self =~ /\AM(ale)?/i
|
56
|
-
'1'
|
57
|
-
elsif self =~ /\AF(emale)?/i
|
58
|
-
'2'
|
59
|
-
else
|
60
|
-
self
|
61
|
-
end
|
62
|
-
when :sex
|
63
|
-
# SECURE: BNS 2012-10-09: But may behave oddly for multi-line input
|
64
|
-
if self =~ /^M|1/i
|
65
|
-
'1'
|
66
|
-
elsif self =~ /^F|2/i
|
67
|
-
'2'
|
68
|
-
else
|
69
|
-
'0'
|
70
|
-
end
|
71
|
-
when :sex_c
|
72
|
-
if self =~ /^M|1/i
|
73
|
-
'M'
|
74
|
-
elsif self =~ /^F|2/i
|
75
|
-
'F'
|
76
|
-
else
|
77
|
-
''
|
78
|
-
end
|
79
|
-
when :name
|
80
|
-
substitutions = {
|
81
|
-
'.' => '',
|
82
|
-
/,|;/ => ' ',
|
83
|
-
/\s{2,}/ => ' ',
|
84
|
-
'`' => '\''
|
85
|
-
}
|
86
|
-
substitutions.inject(upcase) { |a, e| a.gsub(*e) }.strip
|
87
|
-
when :ethniccategory
|
88
|
-
replace_ethniccategory = {
|
89
|
-
'0' => '0',
|
90
|
-
'1' => 'M',
|
91
|
-
'2' => 'N',
|
92
|
-
'3' => 'H',
|
93
|
-
'4' => 'J',
|
94
|
-
'5' => 'K',
|
95
|
-
'6' => 'R',
|
96
|
-
'7' => '8',
|
97
|
-
'&' => 'X',
|
98
|
-
' ' => 'X',
|
99
|
-
'99' => 'X'
|
100
|
-
}
|
101
|
-
replace_ethniccategory[self] || upcase
|
102
|
-
when :code
|
103
|
-
split_on_separators.map do |code|
|
104
|
-
code.blank? ? next : code.delete('.')
|
105
|
-
end.compact.join(' ')
|
106
|
-
when :code_icd
|
107
|
-
warn '[DEPRECATION] clean(:code_icd) is deprecated - consider using clean(:icd) instead.'
|
108
|
-
# regexp = /[A-Z][0-9]{2}(\.(X|[0-9]{1,2})|[0-9]?)( *(D|A)( |,|;|$))/
|
109
|
-
codes = upcase.split_on_separators.delete_if { |x| x.squash.blank? }
|
110
|
-
cleaned_codes = []
|
111
|
-
codes.each do |code|
|
112
|
-
if code == 'D' || code == 'A'
|
113
|
-
cleaned_codes[-1] += code
|
114
|
-
else
|
115
|
-
cleaned_codes << code
|
116
|
-
end
|
117
|
-
end
|
118
|
-
cleaned_codes.join(' ')
|
119
|
-
when :icd
|
120
|
-
codes = upcase.squish.split_on_separators.reject(&:blank?)
|
121
|
-
codes.map { |code| code.gsub(/(?<=\d)(\.?X?)/, '') }.join(' ')
|
122
|
-
when :code_opcs
|
123
|
-
clean_code_opcs
|
124
|
-
when :hospitalnumber
|
125
|
-
self[-1..-1] =~ /\d/ ? self : self[0..-2]
|
126
|
-
when :xmlsafe, :make_xml_safe
|
127
|
-
strip_xml_unsafe_characters
|
128
|
-
when :roman5
|
129
|
-
# This deromanises roman numerals between 1 and 5
|
130
|
-
gsub(/[IV]+/i) { |match| ROMAN_ONE_TO_FIVE_MAPPING[match.upcase] }
|
131
|
-
when :tnmcategory
|
132
|
-
sub!(/\A[tnm]/i, '')
|
133
|
-
if self =~ /\Ax\z/i
|
134
|
-
upcase
|
135
|
-
else
|
136
|
-
downcase
|
137
|
-
end
|
138
|
-
when :strip, :upcase, :itself
|
139
|
-
# SECURE: 14-06-2017 TPG Fixed list of executable methods (whats)
|
140
|
-
send(what)
|
141
|
-
else
|
142
|
-
gsub(' ?', ' ')
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
49
|
def strip_xml_unsafe_characters
|
147
50
|
gsub(String::INVALID_CONTROL_CHARS, '')
|
148
51
|
end
|
@@ -156,14 +59,4 @@ class String
|
|
156
59
|
def split_on_separators(regexp = / |,|;/)
|
157
60
|
split(regexp)
|
158
61
|
end
|
159
|
-
|
160
|
-
private
|
161
|
-
|
162
|
-
def clean_code_opcs
|
163
|
-
split_on_separators.map do |code|
|
164
|
-
db_code = code.squash
|
165
|
-
next unless 4 == db_code.length || db_code =~ /CZ00[12]/
|
166
|
-
db_code
|
167
|
-
end.compact.join(' ')
|
168
|
-
end
|
169
62
|
end
|
data/lib/ndr_support/version.rb
CHANGED
@@ -32,6 +32,25 @@ class String
|
|
32
32
|
assert_equal 'HANTS', 'HANTS'.postcodeize(:db)
|
33
33
|
end
|
34
34
|
|
35
|
+
test 'clean_log10' do
|
36
|
+
assert_equal '0.0', '0'.clean(:log10)
|
37
|
+
assert_equal '-10.1', '-10.1'.clean(:log10)
|
38
|
+
assert_match(/\A0.041392685158225[0-9]*\z/, '1.1'.clean(:log10),
|
39
|
+
"Different ruby versions give '0.04139268515822507' or '0.04139268515822508'")
|
40
|
+
assert_equal 'BILBO', 'BILBO'.clean(:log10)
|
41
|
+
end
|
42
|
+
|
43
|
+
test 'clean_roman5' do
|
44
|
+
assert_equal '12345', 'I2345'.clean(:roman5)
|
45
|
+
assert_equal '12345', '1II345'.clean(:roman5)
|
46
|
+
assert_equal '12345', '12III45'.clean(:roman5)
|
47
|
+
assert_equal '12345', '123IIII5'.clean(:roman5)
|
48
|
+
assert_equal '12345', '123IIII5'.clean(:roman5)
|
49
|
+
assert_equal '12345', '123IV5'.clean(:roman5)
|
50
|
+
assert_equal '12345', '1234V'.clean(:roman5)
|
51
|
+
assert_equal '12345', '1II34V'.clean(:roman5)
|
52
|
+
end
|
53
|
+
|
35
54
|
test 'xml_unsafe?' do
|
36
55
|
without_control = 'hello world!'
|
37
56
|
refute without_control.xml_unsafe?
|
@@ -153,7 +172,8 @@ class String
|
|
153
172
|
end
|
154
173
|
|
155
174
|
test 'clean code_icd' do
|
156
|
-
|
175
|
+
assert_equal 'C449 Q123A Q455', 'C449,Q123,A,Q455'.clean(:code_icd)
|
176
|
+
assert_equal 'C449 Q123D Q455', 'C449,Q123,D,Q455'.clean(:code_icd)
|
157
177
|
end
|
158
178
|
|
159
179
|
test 'clean icd' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ndr_support
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.10.
|
4
|
+
version: 5.10.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- NCRS Development Team
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-02-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -242,6 +242,7 @@ files:
|
|
242
242
|
- lib/ndr_support/regexp_range.rb
|
243
243
|
- lib/ndr_support/safe_file.rb
|
244
244
|
- lib/ndr_support/safe_path.rb
|
245
|
+
- lib/ndr_support/string/clean_methodable.rb
|
245
246
|
- lib/ndr_support/string/cleaning.rb
|
246
247
|
- lib/ndr_support/string/conversions.rb
|
247
248
|
- lib/ndr_support/tasks.rb
|
@@ -298,7 +299,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
298
299
|
- !ruby/object:Gem::Version
|
299
300
|
version: '0'
|
300
301
|
requirements: []
|
301
|
-
rubygems_version: 3.2.
|
302
|
+
rubygems_version: 3.2.3
|
302
303
|
signing_key:
|
303
304
|
specification_version: 4
|
304
305
|
summary: NDR Support library
|