ndr_support 5.10.1 → 5.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -1
- data/code_safety.yml +12 -8
- data/lib/ndr_support/string/clean_methodable.rb +151 -0
- data/lib/ndr_support/string/cleaning.rb +7 -114
- data/lib/ndr_support/version.rb +1 -1
- data/test/string/cleaning_test.rb +21 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5f9ac10bd29787e3ff5b382618cc9389e09c1f5c1b3684a5083ca3babaa0fe99
|
4
|
+
data.tar.gz: d7b0273dee82913349ecc33fbd87310749e4190912ee5349000402e89e0e3e2b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 746812be4b4685cd841e7ba69eb56fa81e8f34d5d62fc5c4db1fbfdfe88dd06052df35429dcc927d43c6c1fe5b383768eec751d8c191d84283bb15737bb06138
|
7
|
+
data.tar.gz: 7fa1f87906e1701528f5d83268d60a664e053dd200341722c24e488eb2af4dc83ccd6d91014ecc5c893176afc71ea64510ea7e34708265b3b472432f8d2da225
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,18 @@
|
|
1
1
|
## [Unreleased]
|
2
|
-
*
|
2
|
+
* no unreleased changes
|
3
|
+
|
4
|
+
## 5.10.3 / 2023-02-12
|
5
|
+
## Fixed
|
6
|
+
* Fixed issue with constant in wrong file/concern
|
7
|
+
* Added tests for roman5 cleaning method
|
8
|
+
* Separated log10 tests
|
9
|
+
|
10
|
+
## 5.10.2 / 2024-02-09
|
11
|
+
## Fixed
|
12
|
+
* Refactored the cleaning code to fix Rubcocop issues
|
13
|
+
|
14
|
+
## Added
|
15
|
+
* Added a new 'log10' cleaning method
|
3
16
|
|
4
17
|
## 5.10.1 / 2024-01-04
|
5
18
|
## Fixed
|
data/code_safety.yml
CHANGED
@@ -22,8 +22,8 @@ file safety:
|
|
22
22
|
safe_revision: 8796a1a32273a106a6b1a134a48077cff0176659
|
23
23
|
CHANGELOG.md:
|
24
24
|
comments:
|
25
|
-
reviewed_by:
|
26
|
-
safe_revision:
|
25
|
+
reviewed_by: ollietulloch
|
26
|
+
safe_revision: 8cb498126d68a19f880b0cd1b83b127c8ec90a89
|
27
27
|
CODE_OF_CONDUCT.md:
|
28
28
|
comments:
|
29
29
|
reviewed_by: timgentry
|
@@ -136,10 +136,14 @@ file safety:
|
|
136
136
|
comments:
|
137
137
|
reviewed_by: brian.shand
|
138
138
|
safe_revision: f4f7cb0803ea34a2f1ba83495d8bcbd942786bce
|
139
|
+
lib/ndr_support/string/clean_methodable.rb:
|
140
|
+
comments:
|
141
|
+
reviewed_by: ollietulloch
|
142
|
+
safe_revision: ff21f98e6608eedb4b33f977778736b38f9fbacb
|
139
143
|
lib/ndr_support/string/cleaning.rb:
|
140
144
|
comments:
|
141
|
-
reviewed_by:
|
142
|
-
safe_revision:
|
145
|
+
reviewed_by: ollietulloch
|
146
|
+
safe_revision: d99c4bf5d2cbbfe78101beff399e7da8645417b2
|
143
147
|
lib/ndr_support/string/conversions.rb:
|
144
148
|
comments:
|
145
149
|
reviewed_by: brian.shand
|
@@ -170,8 +174,8 @@ file safety:
|
|
170
174
|
safe_revision: '09b3f97fb4c1afc88d0d070c39796b1f60c39d25'
|
171
175
|
lib/ndr_support/version.rb:
|
172
176
|
comments:
|
173
|
-
reviewed_by:
|
174
|
-
safe_revision:
|
177
|
+
reviewed_by: ollietulloch
|
178
|
+
safe_revision: 8cb498126d68a19f880b0cd1b83b127c8ec90a89
|
175
179
|
lib/ndr_support/working_days.rb:
|
176
180
|
comments:
|
177
181
|
reviewed_by: josh.pencheon
|
@@ -254,8 +258,8 @@ file safety:
|
|
254
258
|
safe_revision: b562d0c15ff1b1d565522a47e6bae47ea09706f0
|
255
259
|
test/string/cleaning_test.rb:
|
256
260
|
comments:
|
257
|
-
reviewed_by:
|
258
|
-
safe_revision:
|
261
|
+
reviewed_by: ollietulloch
|
262
|
+
safe_revision: d99c4bf5d2cbbfe78101beff399e7da8645417b2
|
259
263
|
test/string/conversions_test.rb:
|
260
264
|
comments:
|
261
265
|
reviewed_by: brian.shand
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# Adds the 'clean' method to String, which can be used to clean strings in various ways
|
2
|
+
# depending on the contents
|
3
|
+
module CleanMethodable
|
4
|
+
extend ActiveSupport::Concern
|
5
|
+
|
6
|
+
ROMAN_ONE_TO_FIVE_MAPPING = { 'I' => '1', 'II' => '2', 'III' => '3', 'IIII' => '4', 'IV' => '4', 'V' => '5' }.freeze
|
7
|
+
|
8
|
+
CLEAN_METHODS = {
|
9
|
+
nhsnumber: :clean_nhsnumber,
|
10
|
+
postcode: :clean_postcode, get_postcode: :clean_postcode,
|
11
|
+
lpi: :clean_lpi,
|
12
|
+
gender: :clean_gender, sex: :clean_sex, sex_c: :clean_sex_c,
|
13
|
+
name: :clean_name,
|
14
|
+
ethniccategory: :clean_ethniccategory,
|
15
|
+
code: :clean_code, code_icd: :clean_code_icd, icd: :clean_icd,
|
16
|
+
code_opcs: :clean_code_opcs,
|
17
|
+
hospitalnumber: :clean_hospitalnumber,
|
18
|
+
xmlsafe: :clean_xmlsafe, make_xml_safe: :clean_xmlsafe,
|
19
|
+
roman5: :clean_roman5,
|
20
|
+
tnmcategory: :clean_tnmcategory,
|
21
|
+
strip: :strip, upcase: :upcase, itself: :itself,
|
22
|
+
log10: :clean_log10
|
23
|
+
}.freeze
|
24
|
+
|
25
|
+
def clean(what)
|
26
|
+
cleaning_method = CLEAN_METHODS[what]
|
27
|
+
return send(cleaning_method) if cleaning_method
|
28
|
+
|
29
|
+
gsub(' ?', ' ')
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def clean_nhsnumber
|
35
|
+
delete('^0-9')[0..9]
|
36
|
+
end
|
37
|
+
|
38
|
+
def clean_postcode
|
39
|
+
postcodeize(:db)
|
40
|
+
end
|
41
|
+
|
42
|
+
def clean_lpi
|
43
|
+
upcase.delete('^0-9A-Z')
|
44
|
+
end
|
45
|
+
|
46
|
+
def clean_gender
|
47
|
+
return '1' if self =~ /\AM(ale)?/i
|
48
|
+
return '2' if self =~ /\AF(emale)?/i
|
49
|
+
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
def clean_sex
|
54
|
+
# SECURE: BNS 2012-10-09: But may behave oddly for multi-line input
|
55
|
+
return '1' if self =~ /^M|1/i
|
56
|
+
return '2' if self =~ /^F|2/i
|
57
|
+
|
58
|
+
'0'
|
59
|
+
end
|
60
|
+
|
61
|
+
def clean_sex_c
|
62
|
+
return 'M' if self =~ /^M|1/i
|
63
|
+
return 'F' if self =~ /^F|2/i
|
64
|
+
|
65
|
+
''
|
66
|
+
end
|
67
|
+
|
68
|
+
def clean_name
|
69
|
+
substitutions = {
|
70
|
+
'.' => '',
|
71
|
+
/,|;/ => ' ',
|
72
|
+
/\s{2,}/ => ' ',
|
73
|
+
'`' => '\''
|
74
|
+
}
|
75
|
+
substitutions.inject(upcase) { |a, e| a.gsub(*e) }.strip
|
76
|
+
end
|
77
|
+
|
78
|
+
def clean_ethniccategory
|
79
|
+
replace_ethniccategory = {
|
80
|
+
'0' => '0', '1' => 'M', '2' => 'N',
|
81
|
+
'3' => 'H', '4' => 'J', '5' => 'K',
|
82
|
+
'6' => 'R', '7' => '8', '&' => 'X',
|
83
|
+
' ' => 'X', '99' => 'X'
|
84
|
+
}
|
85
|
+
replace_ethniccategory[self] || upcase
|
86
|
+
end
|
87
|
+
|
88
|
+
def clean_code
|
89
|
+
split_on_separators.map do |code|
|
90
|
+
code.blank? ? next : code.delete('.')
|
91
|
+
end.compact.join(' ')
|
92
|
+
end
|
93
|
+
|
94
|
+
def clean_code_icd
|
95
|
+
warn '[DEPRECATION] clean(:code_icd) is deprecated - consider using clean(:icd) instead.'
|
96
|
+
# regexp = /[A-Z][0-9]{2}(\.(X|[0-9]{1,2})|[0-9]?)( *(D|A)( |,|;|$))/
|
97
|
+
codes = upcase.split_on_separators.delete_if { |x| x.squash.blank? }
|
98
|
+
cleaned_codes = []
|
99
|
+
codes.each do |code|
|
100
|
+
if %w[A D].include?(code)
|
101
|
+
cleaned_codes[-1] += code
|
102
|
+
else
|
103
|
+
cleaned_codes << code
|
104
|
+
end
|
105
|
+
end
|
106
|
+
cleaned_codes.join(' ')
|
107
|
+
end
|
108
|
+
|
109
|
+
def clean_icd
|
110
|
+
codes = upcase.squish.split_on_separators.reject(&:blank?)
|
111
|
+
codes.map { |code| code.gsub(/(?<=\d)(\.?X?)/, '') }.join(' ')
|
112
|
+
end
|
113
|
+
|
114
|
+
def clean_hospitalnumber
|
115
|
+
self[-1..] =~ /\d/ ? self : self[0..-2]
|
116
|
+
end
|
117
|
+
|
118
|
+
def clean_xmlsafe
|
119
|
+
strip_xml_unsafe_characters
|
120
|
+
end
|
121
|
+
|
122
|
+
def clean_roman5
|
123
|
+
# This deromanises roman numerals between 1 and 5
|
124
|
+
gsub(/[IV]+/i) { |match| ROMAN_ONE_TO_FIVE_MAPPING[match.upcase] }
|
125
|
+
end
|
126
|
+
|
127
|
+
def clean_tnmcategory
|
128
|
+
sub!(/\A[tnm]/i, '')
|
129
|
+
if self =~ /\Ax\z/i
|
130
|
+
upcase
|
131
|
+
else
|
132
|
+
downcase
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def clean_code_opcs
|
137
|
+
split_on_separators.map do |code|
|
138
|
+
db_code = code.squash
|
139
|
+
next unless 4 == db_code.length || db_code =~ /CZ00[12]/
|
140
|
+
|
141
|
+
db_code
|
142
|
+
end.compact.join(' ')
|
143
|
+
end
|
144
|
+
|
145
|
+
def clean_log10
|
146
|
+
f_value = Float(self, exception: false)
|
147
|
+
return self if f_value.nil? || f_value.negative?
|
148
|
+
|
149
|
+
f_value.zero? ? '0.0' : Math.log10(f_value).to_s
|
150
|
+
end
|
151
|
+
end
|
@@ -1,8 +1,12 @@
|
|
1
1
|
require 'active_support/core_ext/string/filters'
|
2
|
+
require 'ndr_support/string/clean_methodable'
|
2
3
|
|
4
|
+
# Extends String clean with various methods of cleaning strings
|
5
|
+
# zand polishing them
|
3
6
|
class String
|
7
|
+
include CleanMethodable
|
8
|
+
|
4
9
|
INVALID_CONTROL_CHARS = /[\x00-\x08\x0b-\x0c\x0e-\x1f]/
|
5
|
-
ROMAN_ONE_TO_FIVE_MAPPING = { 'I' => '1', 'II' => '2', 'III' => '3', 'IIII' => '4', 'IV' => '4', 'V' => '5' }
|
6
10
|
|
7
11
|
POSTCODE_REGEXP = /
|
8
12
|
^(
|
@@ -26,9 +30,8 @@ class String
|
|
26
30
|
# Parameter "option" can be :user, :compact, :db
|
27
31
|
def postcodeize(option = :user)
|
28
32
|
nspce = gsub(/[[:space:]]/, '').upcase
|
29
|
-
unless nspce.blank? || POSTCODE_REGEXP =~ nspce
|
30
|
-
|
31
|
-
end
|
33
|
+
return self unless nspce.blank? || POSTCODE_REGEXP =~ nspce # Don't change old-style or malformed postcodes
|
34
|
+
|
32
35
|
case option
|
33
36
|
when :compact
|
34
37
|
nspce
|
@@ -43,106 +46,6 @@ class String
|
|
43
46
|
end
|
44
47
|
end
|
45
48
|
|
46
|
-
def clean(what)
|
47
|
-
case what
|
48
|
-
when :nhsnumber
|
49
|
-
delete('^0-9')[0..9]
|
50
|
-
when :postcode, :get_postcode
|
51
|
-
postcodeize(:db)
|
52
|
-
when :lpi
|
53
|
-
upcase.delete('^0-9A-Z')
|
54
|
-
when :gender
|
55
|
-
if self =~ /\AM(ale)?/i
|
56
|
-
'1'
|
57
|
-
elsif self =~ /\AF(emale)?/i
|
58
|
-
'2'
|
59
|
-
else
|
60
|
-
self
|
61
|
-
end
|
62
|
-
when :sex
|
63
|
-
# SECURE: BNS 2012-10-09: But may behave oddly for multi-line input
|
64
|
-
if self =~ /^M|1/i
|
65
|
-
'1'
|
66
|
-
elsif self =~ /^F|2/i
|
67
|
-
'2'
|
68
|
-
else
|
69
|
-
'0'
|
70
|
-
end
|
71
|
-
when :sex_c
|
72
|
-
if self =~ /^M|1/i
|
73
|
-
'M'
|
74
|
-
elsif self =~ /^F|2/i
|
75
|
-
'F'
|
76
|
-
else
|
77
|
-
''
|
78
|
-
end
|
79
|
-
when :name
|
80
|
-
substitutions = {
|
81
|
-
'.' => '',
|
82
|
-
/,|;/ => ' ',
|
83
|
-
/\s{2,}/ => ' ',
|
84
|
-
'`' => '\''
|
85
|
-
}
|
86
|
-
substitutions.inject(upcase) { |a, e| a.gsub(*e) }.strip
|
87
|
-
when :ethniccategory
|
88
|
-
replace_ethniccategory = {
|
89
|
-
'0' => '0',
|
90
|
-
'1' => 'M',
|
91
|
-
'2' => 'N',
|
92
|
-
'3' => 'H',
|
93
|
-
'4' => 'J',
|
94
|
-
'5' => 'K',
|
95
|
-
'6' => 'R',
|
96
|
-
'7' => '8',
|
97
|
-
'&' => 'X',
|
98
|
-
' ' => 'X',
|
99
|
-
'99' => 'X'
|
100
|
-
}
|
101
|
-
replace_ethniccategory[self] || upcase
|
102
|
-
when :code
|
103
|
-
split_on_separators.map do |code|
|
104
|
-
code.blank? ? next : code.delete('.')
|
105
|
-
end.compact.join(' ')
|
106
|
-
when :code_icd
|
107
|
-
warn '[DEPRECATION] clean(:code_icd) is deprecated - consider using clean(:icd) instead.'
|
108
|
-
# regexp = /[A-Z][0-9]{2}(\.(X|[0-9]{1,2})|[0-9]?)( *(D|A)( |,|;|$))/
|
109
|
-
codes = upcase.split_on_separators.delete_if { |x| x.squash.blank? }
|
110
|
-
cleaned_codes = []
|
111
|
-
codes.each do |code|
|
112
|
-
if code == 'D' || code == 'A'
|
113
|
-
cleaned_codes[-1] += code
|
114
|
-
else
|
115
|
-
cleaned_codes << code
|
116
|
-
end
|
117
|
-
end
|
118
|
-
cleaned_codes.join(' ')
|
119
|
-
when :icd
|
120
|
-
codes = upcase.squish.split_on_separators.reject(&:blank?)
|
121
|
-
codes.map { |code| code.gsub(/(?<=\d)(\.?X?)/, '') }.join(' ')
|
122
|
-
when :code_opcs
|
123
|
-
clean_code_opcs
|
124
|
-
when :hospitalnumber
|
125
|
-
self[-1..-1] =~ /\d/ ? self : self[0..-2]
|
126
|
-
when :xmlsafe, :make_xml_safe
|
127
|
-
strip_xml_unsafe_characters
|
128
|
-
when :roman5
|
129
|
-
# This deromanises roman numerals between 1 and 5
|
130
|
-
gsub(/[IV]+/i) { |match| ROMAN_ONE_TO_FIVE_MAPPING[match.upcase] }
|
131
|
-
when :tnmcategory
|
132
|
-
sub!(/\A[tnm]/i, '')
|
133
|
-
if self =~ /\Ax\z/i
|
134
|
-
upcase
|
135
|
-
else
|
136
|
-
downcase
|
137
|
-
end
|
138
|
-
when :strip, :upcase, :itself
|
139
|
-
# SECURE: 14-06-2017 TPG Fixed list of executable methods (whats)
|
140
|
-
send(what)
|
141
|
-
else
|
142
|
-
gsub(' ?', ' ')
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
49
|
def strip_xml_unsafe_characters
|
147
50
|
gsub(String::INVALID_CONTROL_CHARS, '')
|
148
51
|
end
|
@@ -156,14 +59,4 @@ class String
|
|
156
59
|
def split_on_separators(regexp = / |,|;/)
|
157
60
|
split(regexp)
|
158
61
|
end
|
159
|
-
|
160
|
-
private
|
161
|
-
|
162
|
-
def clean_code_opcs
|
163
|
-
split_on_separators.map do |code|
|
164
|
-
db_code = code.squash
|
165
|
-
next unless 4 == db_code.length || db_code =~ /CZ00[12]/
|
166
|
-
db_code
|
167
|
-
end.compact.join(' ')
|
168
|
-
end
|
169
62
|
end
|
data/lib/ndr_support/version.rb
CHANGED
@@ -32,6 +32,25 @@ class String
|
|
32
32
|
assert_equal 'HANTS', 'HANTS'.postcodeize(:db)
|
33
33
|
end
|
34
34
|
|
35
|
+
test 'clean_log10' do
|
36
|
+
assert_equal '0.0', '0'.clean(:log10)
|
37
|
+
assert_equal '-10.1', '-10.1'.clean(:log10)
|
38
|
+
assert_match(/\A0.041392685158225[0-9]*\z/, '1.1'.clean(:log10),
|
39
|
+
"Different ruby versions give '0.04139268515822507' or '0.04139268515822508'")
|
40
|
+
assert_equal 'BILBO', 'BILBO'.clean(:log10)
|
41
|
+
end
|
42
|
+
|
43
|
+
test 'clean_roman5' do
|
44
|
+
assert_equal '12345', 'I2345'.clean(:roman5)
|
45
|
+
assert_equal '12345', '1II345'.clean(:roman5)
|
46
|
+
assert_equal '12345', '12III45'.clean(:roman5)
|
47
|
+
assert_equal '12345', '123IIII5'.clean(:roman5)
|
48
|
+
assert_equal '12345', '123IIII5'.clean(:roman5)
|
49
|
+
assert_equal '12345', '123IV5'.clean(:roman5)
|
50
|
+
assert_equal '12345', '1234V'.clean(:roman5)
|
51
|
+
assert_equal '12345', '1II34V'.clean(:roman5)
|
52
|
+
end
|
53
|
+
|
35
54
|
test 'xml_unsafe?' do
|
36
55
|
without_control = 'hello world!'
|
37
56
|
refute without_control.xml_unsafe?
|
@@ -153,7 +172,8 @@ class String
|
|
153
172
|
end
|
154
173
|
|
155
174
|
test 'clean code_icd' do
|
156
|
-
|
175
|
+
assert_equal 'C449 Q123A Q455', 'C449,Q123,A,Q455'.clean(:code_icd)
|
176
|
+
assert_equal 'C449 Q123D Q455', 'C449,Q123,D,Q455'.clean(:code_icd)
|
157
177
|
end
|
158
178
|
|
159
179
|
test 'clean icd' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ndr_support
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.10.
|
4
|
+
version: 5.10.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- NCRS Development Team
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-02-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -242,6 +242,7 @@ files:
|
|
242
242
|
- lib/ndr_support/regexp_range.rb
|
243
243
|
- lib/ndr_support/safe_file.rb
|
244
244
|
- lib/ndr_support/safe_path.rb
|
245
|
+
- lib/ndr_support/string/clean_methodable.rb
|
245
246
|
- lib/ndr_support/string/cleaning.rb
|
246
247
|
- lib/ndr_support/string/conversions.rb
|
247
248
|
- lib/ndr_support/tasks.rb
|
@@ -298,7 +299,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
298
299
|
- !ruby/object:Gem::Version
|
299
300
|
version: '0'
|
300
301
|
requirements: []
|
301
|
-
rubygems_version: 3.2.
|
302
|
+
rubygems_version: 3.2.3
|
302
303
|
signing_key:
|
303
304
|
specification_version: 4
|
304
305
|
summary: NDR Support library
|