ndr_import 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +14 -0
  3. data/.rubocop.yml +27 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +22 -0
  6. data/CODE_OF_CONDUCT.md +13 -0
  7. data/Gemfile +4 -0
  8. data/Guardfile +16 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +69 -0
  11. data/Rakefile +13 -0
  12. data/code_safety.yml +374 -0
  13. data/gemfiles/Gemfile.rails32 +5 -0
  14. data/gemfiles/Gemfile.rails32.lock +142 -0
  15. data/gemfiles/Gemfile.rails41 +5 -0
  16. data/gemfiles/Gemfile.rails41.lock +145 -0
  17. data/gemfiles/Gemfile.rails42 +5 -0
  18. data/gemfiles/Gemfile.rails42.lock +145 -0
  19. data/lib/ndr_import.rb +13 -0
  20. data/lib/ndr_import/csv_library.rb +40 -0
  21. data/lib/ndr_import/file/all.rb +8 -0
  22. data/lib/ndr_import/file/base.rb +76 -0
  23. data/lib/ndr_import/file/delimited.rb +86 -0
  24. data/lib/ndr_import/file/excel.rb +131 -0
  25. data/lib/ndr_import/file/pdf.rb +38 -0
  26. data/lib/ndr_import/file/registry.rb +50 -0
  27. data/lib/ndr_import/file/text.rb +52 -0
  28. data/lib/ndr_import/file/word.rb +30 -0
  29. data/lib/ndr_import/file/zip.rb +67 -0
  30. data/lib/ndr_import/helpers/file/delimited.rb +105 -0
  31. data/lib/ndr_import/helpers/file/excel.rb +181 -0
  32. data/lib/ndr_import/helpers/file/pdf.rb +29 -0
  33. data/lib/ndr_import/helpers/file/word.rb +27 -0
  34. data/lib/ndr_import/helpers/file/xml.rb +45 -0
  35. data/lib/ndr_import/helpers/file/zip.rb +44 -0
  36. data/lib/ndr_import/mapper.rb +220 -0
  37. data/lib/ndr_import/mapping_error.rb +5 -0
  38. data/lib/ndr_import/non_tabular/column_mapping.rb +73 -0
  39. data/lib/ndr_import/non_tabular/line.rb +46 -0
  40. data/lib/ndr_import/non_tabular/mapping.rb +35 -0
  41. data/lib/ndr_import/non_tabular/record.rb +99 -0
  42. data/lib/ndr_import/non_tabular/table.rb +193 -0
  43. data/lib/ndr_import/non_tabular_file_helper.rb +160 -0
  44. data/lib/ndr_import/standard_mappings.rb +23 -0
  45. data/lib/ndr_import/table.rb +179 -0
  46. data/lib/ndr_import/version.rb +4 -0
  47. data/ndr_import.gemspec +44 -0
  48. data/test/file/base_test.rb +54 -0
  49. data/test/file/delimited_test.rb +143 -0
  50. data/test/file/excel_test.rb +85 -0
  51. data/test/file/pdf_test.rb +35 -0
  52. data/test/file/registry_test.rb +60 -0
  53. data/test/file/text_test.rb +92 -0
  54. data/test/file/word_test.rb +35 -0
  55. data/test/file/zip_test.rb +47 -0
  56. data/test/helpers/file/delimited_test.rb +113 -0
  57. data/test/helpers/file/excel_test.rb +97 -0
  58. data/test/helpers/file/pdf_test.rb +26 -0
  59. data/test/helpers/file/word_test.rb +26 -0
  60. data/test/helpers/file/xml_test.rb +131 -0
  61. data/test/helpers/file/zip_test.rb +75 -0
  62. data/test/mapper_test.rb +551 -0
  63. data/test/non_tabular/mapping_test.rb +36 -0
  64. data/test/non_tabular/table_test.rb +510 -0
  65. data/test/non_tabular_file_helper_test.rb +501 -0
  66. data/test/readme_test.rb +53 -0
  67. data/test/resources/bomd.csv +3 -0
  68. data/test/resources/broken.csv +3 -0
  69. data/test/resources/filesystem_paths.yml +26 -0
  70. data/test/resources/flat_file.pdf +0 -0
  71. data/test/resources/flat_file.txt +27 -0
  72. data/test/resources/flat_file.yml +20 -0
  73. data/test/resources/hello_utf16be.txt +0 -0
  74. data/test/resources/hello_utf16le.txt +0 -0
  75. data/test/resources/hello_utf8.txt +2 -0
  76. data/test/resources/hello_windows.txt +2 -0
  77. data/test/resources/hello_world.doc +0 -0
  78. data/test/resources/hello_world.pdf +0 -0
  79. data/test/resources/hello_world.txt +2 -0
  80. data/test/resources/high_ascii_delimited.txt +2 -0
  81. data/test/resources/malformed.xml +6 -0
  82. data/test/resources/normal.csv +3 -0
  83. data/test/resources/normal.csv.zip +0 -0
  84. data/test/resources/normal_pipe.csv +3 -0
  85. data/test/resources/normal_thorn.csv +3 -0
  86. data/test/resources/not_a_pdf.pdf +0 -0
  87. data/test/resources/not_a_word_file.doc +0 -0
  88. data/test/resources/sample_xls.xls +0 -0
  89. data/test/resources/sample_xlsx.xlsx +0 -0
  90. data/test/resources/standard_mappings.yml +39 -0
  91. data/test/resources/txt_file_xls_extension.xls +1 -0
  92. data/test/resources/txt_file_xlsx_extension.xlsx +1 -0
  93. data/test/resources/utf-16be_xml.xml +0 -0
  94. data/test/resources/utf-16be_xml_with_declaration.xml +0 -0
  95. data/test/resources/utf-16le_xml.xml +0 -0
  96. data/test/resources/utf-8_xml.xml +9 -0
  97. data/test/resources/windows-1252_xml.xml +9 -0
  98. data/test/resources/windows.csv +5 -0
  99. data/test/resources/xlsx_file_xls_extension.xls +0 -0
  100. data/test/standard_mappings_test.rb +22 -0
  101. data/test/table_test.rb +288 -0
  102. data/test/test_helper.rb +13 -0
  103. metadata +443 -0
@@ -0,0 +1,97 @@
1
+ require 'test_helper'
2
+ require 'ndr_import/helpers/file/excel'
3
+
4
+ # Excel file helper tests
5
+ class ExcelTest < ActiveSupport::TestCase
6
+ # This is a test importer class to test the excel file helper mixin
7
+ class TestImporter
8
+ include NdrImport::Helpers::File::Excel
9
+ end
10
+
11
+ def setup
12
+ @permanenttestfiles = SafePath.new('permanent_test_files')
13
+ @importer = TestImporter.new
14
+ end
15
+
16
+ test 'read_excel_file helper should read xls file' do
17
+ file_content = @importer.send(:read_excel_file, @permanenttestfiles.join('sample_xls.xls'))
18
+ assert_equal file_content.count, 2
19
+ assert_equal file_content.first, %w(1A 1B)
20
+ end
21
+
22
+ test 'read_excel_file helper should read xlsx file' do
23
+ file_content = @importer.send(:read_excel_file, @permanenttestfiles.join('sample_xlsx.xlsx'))
24
+ assert_equal file_content.count, 2
25
+ assert_equal file_content.first, %w(1A 1B)
26
+ end
27
+
28
+ test 'read_excel_file helper should read xlsx file with the incorrect xls extension' do
29
+ file_path = @permanenttestfiles.join('xlsx_file_xls_extension.xls')
30
+ file_content = @importer.send(:read_excel_file, file_path)
31
+ assert_equal file_content.count, 2
32
+ assert_equal file_content.first, %w(1A 1B)
33
+
34
+ SafeFile.delete @permanenttestfiles.join('xlsx_file_xls_extension_amend.xlsx')
35
+ end
36
+
37
+ test 'read_excel_file helper should handle exceptions' do
38
+ # txt file
39
+ SafeFile.open(@permanenttestfiles.join('temp.txt'), 'w') { |f| f.write 'dummy line' }
40
+ assert_raises RuntimeError do
41
+ @importer.send(:read_excel_file, @permanenttestfiles.join('temp.txt'))
42
+ end
43
+
44
+ # .txt file in .xls extension
45
+ File.rename @permanenttestfiles.join('temp.txt'), @permanenttestfiles.join('temp.xls')
46
+ assert_raises RuntimeError do
47
+ @importer.send(:read_excel_file, @permanenttestfiles.join('temp.xls'))
48
+ end
49
+
50
+ # .txt file in .xlsx extension
51
+ File.rename @permanenttestfiles.join('temp.xls'), @permanenttestfiles.join('temp.xlsx')
52
+ assert_raises RuntimeError do
53
+ @importer.send(:read_excel_file, @permanenttestfiles.join('temp.xlsx'))
54
+ end
55
+
56
+ SafeFile.delete @permanenttestfiles.join('temp.xlsx')
57
+ SafeFile.delete @permanenttestfiles.join('temp_amend.xlsx')
58
+ end
59
+
60
+ test 'excel_tables helper should read xls table correctly' do
61
+ table = @importer.send(:excel_tables, @permanenttestfiles.join('sample_xls.xls'))
62
+ table.each do |tablename, sheet|
63
+ assert_equal 'Sheet1', tablename
64
+ assert_equal %w(1A 1B), sheet.first
65
+ end
66
+ end
67
+
68
+ test 'excel_tables helper should read xlsx table correctly' do
69
+ table = @importer.send(:excel_tables, @permanenttestfiles.join('sample_xlsx.xlsx'))
70
+ table.each do |tablename, sheet|
71
+ assert_equal 'Sheet1', tablename
72
+ assert_equal %w(1A 1B), sheet.first
73
+ end
74
+ end
75
+
76
+ def test_each_excel_table_should_be_deprecated
77
+ original_stderr = $stderr
78
+ $stderr = StringIO.new
79
+
80
+ table = @importer.send(:each_excel_table, @permanenttestfiles.join('sample_xlsx.xlsx'))
81
+ table.each do |tablename, sheet|
82
+ assert_equal 'Sheet1', tablename
83
+ assert_equal %w(1A 1B), sheet.first
84
+ end
85
+
86
+ assert_match(/\A\[warning\] each_excel_table will be deprecated/, $stderr.string)
87
+ ensure
88
+ $stderr = original_stderr
89
+ end if Gem::Requirement.new('< 3.0.0').satisfied_by?(Gem::Version.new(NdrImport::VERSION))
90
+
91
+ def test_deprecated_methods_removed_in_v3
92
+ refute @importer.protected_methods.include?(:each_excel_table), 'should be removed in v3.0.0'
93
+ refute @importer.private_methods.include?(:each_excel_row), 'should be removed in v3.0.0'
94
+ refute @importer.private_methods.include?(:each_xls_row), 'should be removed in v3.0.0'
95
+ refute @importer.private_methods.include?(:each_xlsx_row), 'should be removed in v3.0.0'
96
+ end if Gem::Requirement.new('>= 3.0.0').satisfied_by?(Gem::Version.new(NdrImport::VERSION))
97
+ end
@@ -0,0 +1,26 @@
1
+ require 'test_helper'
2
+ require 'ndr_import/helpers/file/pdf'
3
+
4
+ # PDF file helper tests
5
+ class PdfTest < ActiveSupport::TestCase
6
+ # This is a test importer class to test the PDF file helper mixin
7
+ class TestImporter
8
+ include NdrImport::Helpers::File::Pdf
9
+ end
10
+
11
+ def setup
12
+ @permanent_test_files = SafePath.new('permanent_test_files')
13
+ @importer = TestImporter.new
14
+ end
15
+
16
+ test 'read_pdf_file helper should read pdf file' do
17
+ file_content = @importer.send(:read_pdf_file, @permanent_test_files.join('hello_world.pdf'))
18
+ assert_equal file_content, ['Hello World']
19
+ end
20
+
21
+ test 'read_pdf_file helper should raise exception on invalid pdf file' do
22
+ assert_raises RuntimeError do
23
+ @importer.send(:read_pdf_file, @permanent_test_files.join('not_a_pdf.pdf'))
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ require 'test_helper'
2
+ require 'ndr_import/helpers/file/word'
3
+
4
+ # Word file helper tests
5
+ class WordTest < ActiveSupport::TestCase
6
+ # This is a test importer class to test the Word file helper mixin
7
+ class TestImporter
8
+ include NdrImport::Helpers::File::Word
9
+ end
10
+
11
+ def setup
12
+ @permanent_test_files = SafePath.new('permanent_test_files')
13
+ @importer = TestImporter.new
14
+ end
15
+
16
+ test 'read_word_file helper should read word file' do
17
+ file_content = @importer.send(:read_word_file, @permanent_test_files.join('hello_world.doc'))
18
+ assert_equal file_content, ['Hello world, this is a word document']
19
+ end
20
+
21
+ test 'read_word_file helper should raise exception on invalid word file' do
22
+ assert_raises RuntimeError do
23
+ @importer.send(:read_word_file, @permanent_test_files.join('not_a_word_file.doc'))
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,131 @@
1
+ require 'test_helper'
2
+ require 'ndr_import/helpers/file/xml'
3
+ require 'nokogiri'
4
+
5
+ # XML file helper tests
6
+ class XmlTest < ActiveSupport::TestCase
7
+ # This is a test importer class to test the XML file helper mixin
8
+ class TestImporter
9
+ include NdrImport::Helpers::File::Xml
10
+ end
11
+
12
+ def setup
13
+ @home = SafePath.new('test_space_rw')
14
+ @permanent_test_files = SafePath.new('permanent_test_files')
15
+ @importer = TestImporter.new
16
+ end
17
+
18
+ test 'import_xml_file should handle incoming UTF-8' do
19
+ doc = @importer.send(:read_xml_file, @permanent_test_files.join('utf-8_xml.xml'))
20
+ greek = doc.xpath('//letter').map(&:text).join
21
+
22
+ assert_equal 'UTF-8', doc.encoding
23
+
24
+ assert greek.valid_encoding?
25
+ assert_equal Encoding.find('UTF-8'), greek.encoding
26
+ assert_equal 2, greek.chars.to_a.length
27
+ assert_equal [206, 177, 206, 178], greek.bytes.to_a # 2-bytes each for alpha and beta
28
+ end
29
+
30
+ test 'import_xml_file should handle incoming UTF-16 (big endian)' do
31
+ doc = @importer.send(:read_xml_file, @permanent_test_files.join('utf-16be_xml.xml'))
32
+ greek = doc.xpath('//letter').map(&:text).join
33
+
34
+ assert_equal 'UTF-8', doc.encoding
35
+
36
+ assert greek.valid_encoding?
37
+ assert_equal Encoding.find('UTF-8'), greek.encoding
38
+ assert_equal 2, greek.chars.to_a.length
39
+ assert_equal [206, 177, 206, 178], greek.bytes.to_a # 2-bytes each for alpha and beta
40
+ end
41
+
42
+ test 'import_xml_file should handle incoming UTF-16 (little endian)' do
43
+ doc = @importer.send(:read_xml_file, @permanent_test_files.join('utf-16le_xml.xml'))
44
+ greek = doc.xpath('//letter').map(&:text).join
45
+
46
+ assert_equal 'UTF-8', doc.encoding
47
+
48
+ assert greek.valid_encoding?
49
+ assert_equal Encoding.find('UTF-8'), greek.encoding
50
+ assert_equal 2, greek.chars.to_a.length
51
+ assert_equal [206, 177, 206, 178], greek.bytes.to_a # 2 bytes each for alpha and beta
52
+ end
53
+
54
+ test 'import_xml_file should handle incoming UTF-16 with declaration' do
55
+ doc = @importer.send(:read_xml_file,
56
+ @permanent_test_files.join('utf-16be_xml_with_declaration.xml'))
57
+ greek = doc.xpath('//letter').map(&:text).join
58
+
59
+ assert greek.valid_encoding?
60
+ assert_equal Encoding.find('UTF-8'), greek.encoding
61
+ assert_equal 2, greek.chars.to_a.length
62
+ assert_equal [206, 177, 206, 178], greek.bytes.to_a # 2 bytes each for alpha and beta
63
+
64
+ # The document should be UTF-8, and we shouldn't
65
+ # get encoding mismatches when interrogating it:
66
+ assert_equal 'UTF-8', doc.encoding
67
+ assert_equal 1, doc.css('note[id=alpha]').length
68
+ end
69
+
70
+ test 'import_xml_file should handle incoming Windows-1252' do
71
+ doc = @importer.send(:read_xml_file, @permanent_test_files.join('windows-1252_xml.xml'))
72
+ punct = doc.xpath('//letter').map(&:text).join
73
+
74
+ assert_equal 'UTF-8', doc.encoding
75
+
76
+ assert punct.valid_encoding?
77
+ assert_equal Encoding.find('UTF-8'), punct.encoding
78
+ assert_equal 2, punct.chars.to_a.length
79
+ assert_equal [226, 128, 153, 226, 128, 147], punct.bytes.to_a # 3 bytes each for apostrophe and dash
80
+ end
81
+
82
+ test 'import_xml_file with malformed XML file' do
83
+ assert_raises Nokogiri::XML::SyntaxError do
84
+ @importer.send(:read_xml_file, @permanent_test_files.join('malformed.xml'))
85
+ end
86
+ end
87
+
88
+ test '.import_xml_file should reject non safe path arguments' do
89
+ assert_raises ArgumentError do
90
+ @importer.send(:read_xml_file, @home.join('simple.xml').to_s)
91
+ end
92
+ end
93
+
94
+ test '.import_xml_file should accept safepath' do
95
+ builder = Nokogiri::XML::Builder.new do |xml|
96
+ xml.root do
97
+ xml.note(:id => 1) do
98
+ xml.time 'Thu Dec 13 13:12:00 UTC 2012'
99
+ xml.title 'Note 1'
100
+ xml.body 'Note 1 body blabla bla'
101
+ end
102
+ xml.note(:id => 2) do
103
+ xml.time 'Thu Dec 14 12:11:00 UTC 2012'
104
+ xml.title 'note 2'
105
+ xml.body 'note 2 body blablabala'
106
+ end
107
+ end
108
+ end
109
+ SafeFile.open(@home.join('simple.xml'), 'w') { |f| f.write builder.to_xml }
110
+
111
+ doc = @importer.send(:read_xml_file, @home.join('simple.xml'))
112
+
113
+ assert_equal 1, doc.children.reject { |c| c.text =~ /\A\n *\Z/ }.length
114
+ assert_equal 'root', doc.children.reject { |c| c.text =~ /\A\n *\Z/ }[0].name
115
+ assert_equal 2, doc.
116
+ children.reject { |c| c.text =~ /\A\n *\Z/ }[0].
117
+ children.reject { |c| c.text =~ /\A\n *\Z/ }.length
118
+ assert_equal 'note', doc.children.reject { |c| c.text =~ /\A\n *\Z/ }[0].
119
+ children.reject { |c| c.text =~ /\A\n *\Z/ }[0].name
120
+ assert_equal 3, doc.
121
+ children.reject { |c| c.text =~ /\A\n *\Z/ }[0].
122
+ children.reject { |c| c.text =~ /\A\n *\Z/ }[0].
123
+ children.reject { |c| c.text =~ /\A\n *\Z/ }.length
124
+ assert_equal 'Thu Dec 13 13:12:00 UTC 2012', doc.
125
+ children.reject { |c| c.text =~ /\A\n *\Z/ }[0].
126
+ children.reject { |c| c.text =~ /\A\n *\Z/ }[0].
127
+ children.reject { |c| c.text =~ /\A\n *\Z/ }[0].text
128
+
129
+ SafeFile.delete @home.join('simple.xml')
130
+ end
131
+ end
@@ -0,0 +1,75 @@
1
+ require 'test_helper'
2
+ require 'ndr_import/helpers/file/zip'
3
+ require 'zip'
4
+
5
+ # Zip file helper tests
6
+ class ZipTest < ActiveSupport::TestCase
7
+ # This is a test importer class to test the Zip file helper mixin
8
+ class TestImporter
9
+ include NdrImport::Helpers::File::Zip
10
+ end
11
+
12
+ def setup
13
+ @home = SafePath.new('test_space_rw')
14
+ @permanent_test_files = SafePath.new('permanent_test_files')
15
+ @importer = TestImporter.new
16
+ end
17
+
18
+ test '.unzip should reject non SafePath arguments' do
19
+ zip = @home.join('imaginary.zip')
20
+
21
+ assert_raises ArgumentError do
22
+ @importer.send(:unzip_file, zip.to_s, @home.to_s)
23
+ end
24
+
25
+ assert_raises ArgumentError do
26
+ @importer.send(:unzip_file, zip.to_s, @home)
27
+ end
28
+
29
+ assert_raises ArgumentError do
30
+ @importer.send(:unzip_file, zip, @home.to_s)
31
+ end
32
+ end
33
+
34
+ test '.unzip unzip zip file' do
35
+ zip_name = @home.join('test.zip')
36
+
37
+ files = [
38
+ @home.join('f1'),
39
+ @home.join('f2'),
40
+ @home.join('f3')
41
+ ]
42
+
43
+ files.each do |fname|
44
+ File.open(fname, 'w') { |f| f.write "test #{fname}" }
45
+ end
46
+
47
+ ::Zip::File.open(zip_name, Zip::File::CREATE) do |zipfile|
48
+ files.each do |fname|
49
+ zipfile.add(File.basename(fname.to_s), fname.to_s)
50
+ end
51
+ end
52
+
53
+ File.delete(*files)
54
+
55
+ files.each do |fname|
56
+ assert !File.exist?(fname)
57
+ end
58
+
59
+ assert File.exist?(zip_name)
60
+ dest = @home.join('unziped')
61
+
62
+ @importer.send(:unzip_file, zip_name, dest)
63
+
64
+ files.each do |fname|
65
+ assert File.exist?(dest.join(File.basename(fname)))
66
+ end
67
+
68
+ files.each do |fname|
69
+ File.delete(dest.join(File.basename(fname)))
70
+ end
71
+
72
+ File.delete(zip_name)
73
+ FileUtils.rm_r(dest)
74
+ end
75
+ end
@@ -0,0 +1,551 @@
1
+ require 'test_helper'
2
+
3
+ # expose private methods
4
+ class TestMapper
5
+ include NdrImport::Mapper
6
+
7
+ public :fixed_width_columns, :mapped_line, :mapped_value, :replace_before_mapping
8
+
9
+ # TODO: test fixed_width_columns
10
+ end
11
+
12
+ # This tests the NdrImport::Mapper class
13
+ class MapperTest < ActiveSupport::TestCase
14
+ def setup
15
+ @permanent_test_files = SafePath.new('permanent_test_files')
16
+ end
17
+
18
+ format_mapping = { 'format' => 'dd/mm/yyyy' }
19
+ format_mapping_yyyymmdd = { 'format' => 'yyyymmdd' }
20
+ clean_name_mapping = { 'clean' => :name }
21
+ clean_ethniccategory_mapping = { 'clean' => :ethniccategory }
22
+ clean_icd_mapping = { 'clean' => :code_icd }
23
+ clean_opcs_mapping = { 'clean' => :code_opcs }
24
+ map_mapping = { 'map' => { 'A' => '1' } }
25
+ replace_mapping = { 'replace' => { '.0' => '' } }
26
+ daysafter_mapping = { 'daysafter' => '2012-05-16' }
27
+ # TODO: match_mapping = {}
28
+
29
+ simple_mapping = [{ 'column' => 'patient address', 'mappings' => ['field' => 'address'] }]
30
+
31
+ simple_mapping_with_clean_opcs = YAML.load <<-YML
32
+ - column: primaryprocedures
33
+ mappings:
34
+ - field: primaryprocedures
35
+ clean: :code_opcs
36
+ YML
37
+
38
+ join_mapping = YAML.load <<-YML
39
+ - column: forename1
40
+ mappings:
41
+ - field: forenames
42
+ order: 1
43
+ join: " "
44
+ - column: forename2
45
+ mappings:
46
+ - field: forenames
47
+ order: 2
48
+ YML
49
+
50
+ join_compact_mapping = YAML.load <<-YML
51
+ - column: forename1
52
+ mappings:
53
+ - field: forenames
54
+ order: 1
55
+ join: " "
56
+ compact: false
57
+ - column: forename2
58
+ mappings:
59
+ - field: forenames
60
+ order: 2
61
+ YML
62
+
63
+ unused_mapping = [{ 'column' => 'extra', 'rawtext_name' => 'extra' }]
64
+
65
+ cross_populate_mapping = YAML.load <<-YML
66
+ - column: referringclinicianname
67
+ mappings:
68
+ - field: consultantname
69
+ - field: consultantcode
70
+ priority: 2
71
+ - column: referringcliniciancode
72
+ mappings:
73
+ - field: consultantcode
74
+ YML
75
+
76
+ cross_populate_replace_mapping = YAML.load <<-YML
77
+ - column: referringclinicianname
78
+ mappings:
79
+ - field: consultantname
80
+ - field: consultantcode
81
+ priority: 2
82
+ replace:
83
+ ? !ruby/regexp /^BOB FOSSIL$/i
84
+ : "ROBERT FOSSIL"
85
+ - column: referringcliniciancode
86
+ mappings:
87
+ - field: consultantcode
88
+ priority: 1
89
+ YML
90
+
91
+ cross_populate_map_mapping = YAML.load <<-YML
92
+ - column: referringclinicianname
93
+ mappings:
94
+ - field: consultantname
95
+ - field: consultantcode
96
+ priority: 2
97
+ map:
98
+ "Bob Fossil": "C5678"
99
+ - column: referringcliniciancode
100
+ mappings:
101
+ - field: consultantcode
102
+ priority: 1
103
+ YML
104
+
105
+ cross_populate_order_mapping = YAML.load <<-YML
106
+ - column: referringclinicianname
107
+ mappings:
108
+ - field: consultantname
109
+ - field: consultantcode
110
+ priority: 2
111
+ - column: referringcliniciancode
112
+ mappings:
113
+ - field: consultantcode
114
+ priority: 1
115
+ - column: somecolumn
116
+ mappings:
117
+ - field: consultantcode
118
+ priority: 5
119
+ - column: anothercolumn
120
+ mappings:
121
+ - field: consultantcode
122
+ priority: 10
123
+ YML
124
+
125
+ cross_populate_no_priority = YAML.load <<-YML
126
+ - column: columnoneraw
127
+ mappings:
128
+ - field: columnone
129
+ - field: columntwo
130
+ - column: columntworaw
131
+ mappings:
132
+ - field: columntwo
133
+ priority: 5
134
+ YML
135
+
136
+ standard_mapping_without = YAML.load <<-YML
137
+ - column: surname
138
+ rawtext_name: surname
139
+ mappings:
140
+ - field: surname
141
+ clean: :name
142
+ - column: forename
143
+ rawtext_name: forenames
144
+ mappings:
145
+ - field: forenames
146
+ clean: :name
147
+ - column: sex
148
+ rawtext_name: sex
149
+ mappings:
150
+ - field: sex
151
+ clean: :sex
152
+ - column: nhs_no
153
+ rawtext_name: nhsnumber
154
+ mappings:
155
+ - field: nhsnumber
156
+ clean: :nhsnumber
157
+ YML
158
+
159
+ standard_mapping_with = YAML.load <<-YML
160
+ - standard_mapping: surname
161
+ - column: forename
162
+ standard_mapping: forenames
163
+ - standard_mapping: sex
164
+ - column: nhs_no
165
+ standard_mapping: nhsnumber
166
+ YML
167
+
168
+ standard_mapping_merge = YAML.load <<-YML
169
+ - column: surname
170
+ standard_mapping: surname
171
+ mappings:
172
+ - field: surname2
173
+ YML
174
+
175
+ standard_mapping_column = YAML.load <<-YML
176
+ - column: overriding_column_name
177
+ standard_mapping: test
178
+ YML
179
+
180
+ invalid_priorities = YAML.load <<-YML
181
+ - column: columnoneraw
182
+ mappings:
183
+ - field: columnone
184
+ - field: columntwo
185
+ priority: 5
186
+ - column: columntworaw
187
+ mappings:
188
+ - field: columntwo
189
+ priority: 5
190
+ YML
191
+
192
+ invalid_standard_mapping = YAML.load <<-YML
193
+ - column: surname
194
+ standard_mapping: surnames
195
+ YML
196
+
197
+ joined_mapping_blank_start = YAML.load <<-YML
198
+ - column: addressoneraw
199
+ mappings:
200
+ - field: address
201
+ join: ","
202
+ order: 1
203
+ - column: postcode
204
+ mappings:
205
+ - field: address
206
+ order: 2
207
+ YML
208
+
209
+ joined_mapping_blank_start_uncompacted = YAML.load <<-YML
210
+ - column: addressoneraw
211
+ mappings:
212
+ - field: address
213
+ join: ","
214
+ order: 1
215
+ compact: false
216
+ - column: postcode
217
+ mappings:
218
+ - field: address
219
+ order: 2
220
+ YML
221
+
222
+ date_mapping = YAML.load <<-YML
223
+ - column: birth_date
224
+ rawtext_name: dateofbirth
225
+ mappings:
226
+ - field: dateofbirth
227
+ format: dd/mm/yyyy
228
+ - column: received_date
229
+ rawtext_name: receiveddate
230
+ mappings:
231
+ - field: receiveddate
232
+ format: yyyymmdd
233
+ - column: american_date
234
+ rawtext_name: americandate
235
+ mappings:
236
+ - field: americandate
237
+ format: mm/dd/yyyy
238
+ - column: short_date
239
+ rawtext_name: shortdate
240
+ mappings:
241
+ - field: shortdate
242
+ format: dd/mm/yy
243
+ - column: funky_date
244
+ rawtext_name: funkydate
245
+ mappings:
246
+ - field: funkydate
247
+ format: dd/mmm/yy
248
+ YML
249
+
250
+ do_not_capture_column = YAML.load <<-YML
251
+ - column: ignore_me
252
+ do_not_capture: true
253
+ YML
254
+
255
+ base64_mapping = YAML.load <<-YML
256
+ - column: base64
257
+ decode:
258
+ - :base64
259
+ - :word_doc
260
+ YML
261
+
262
+ invalid_decode_mapping = YAML.load <<-YML
263
+ - column: column_name
264
+ decode:
265
+ - :invalid_encoding
266
+ YML
267
+
268
+ test 'map should return a number' do
269
+ assert_equal '1', TestMapper.new.mapped_value('A', map_mapping)
270
+ end
271
+
272
+ test 'map should return nil' do
273
+ assert_nil TestMapper.new.mapped_value('B', map_mapping)
274
+ end
275
+
276
+ test 'map should return correct date format' do
277
+ assert_equal Date.new(2011, 1, 25), TestMapper.new.mapped_value('25/01/2011', format_mapping)
278
+ assert_equal Date.new(2011, 1, 25),
279
+ TestMapper.new.mapped_value('20110125', format_mapping_yyyymmdd)
280
+ end
281
+
282
+ test 'map should return incorrect date format' do
283
+ assert_not_equal Date.new(2011, 3, 4),
284
+ TestMapper.new.mapped_value('03/04/2011', format_mapping)
285
+ end
286
+
287
+ test 'map should return nil date format' do
288
+ assert_nil TestMapper.new.mapped_value('03/25/2011', format_mapping)
289
+ end
290
+
291
+ test 'map should replace value' do
292
+ value = '2.0'
293
+ TestMapper.new.replace_before_mapping(value, replace_mapping)
294
+ assert_equal '2', value
295
+ end
296
+
297
+ test 'map should not alter value' do
298
+ value = '2.1'
299
+ TestMapper.new.replace_before_mapping(value, replace_mapping)
300
+ assert_equal '2.1', value
301
+ end
302
+
303
+ test 'map should clean name' do
304
+ assert_equal 'ANNABELLE SMITH',
305
+ TestMapper.new.mapped_value('anna.belle,smith', clean_name_mapping)
306
+ end
307
+
308
+ test 'map should clean ethenic category' do
309
+ assert_equal 'M', TestMapper.new.mapped_value('1', clean_ethniccategory_mapping)
310
+ assert_equal 'X', TestMapper.new.mapped_value('99', clean_ethniccategory_mapping)
311
+ assert_equal 'A', TestMapper.new.mapped_value('A', clean_ethniccategory_mapping)
312
+ assert_equal 'INVALID', TestMapper.new.mapped_value('InValiD', clean_ethniccategory_mapping)
313
+ end
314
+
315
+ test 'map should clean icd code' do
316
+ assert_equal 'C34.3 R93.2 Z51.5',
317
+ TestMapper.new.mapped_value('C34.3,R93.2,Z51.5', clean_icd_mapping)
318
+ end
319
+
320
+ test 'map should clean opcs code' do
321
+ assert_equal 'U212 Y973', TestMapper.new.mapped_value('U212,Y973,X1', clean_opcs_mapping)
322
+ assert_equal '', TestMapper.new.mapped_value('98', clean_opcs_mapping)
323
+ assert_equal '', TestMapper.new.mapped_value('TooLong', clean_opcs_mapping)
324
+ assert_equal nil, TestMapper.new.mapped_value('', clean_opcs_mapping)
325
+ assert_equal 'ABCD', TestMapper.new.mapped_value('AbcD', clean_opcs_mapping)
326
+ assert_equal '1234', TestMapper.new.mapped_value('1234', clean_opcs_mapping)
327
+ end
328
+
329
+ test 'should return correct date format for date fields with daysafter' do
330
+ assert_equal Date.new(2012, 5, 18), TestMapper.new.mapped_value(2, daysafter_mapping)
331
+ assert_equal Date.new(2012, 5, 18), TestMapper.new.mapped_value('2', daysafter_mapping)
332
+ assert_equal Date.new(2012, 5, 14), TestMapper.new.mapped_value(-2, daysafter_mapping)
333
+ assert_equal Date.new(2012, 5, 14), TestMapper.new.mapped_value('-2', daysafter_mapping)
334
+ assert_equal Date.new(2012, 5, 16), TestMapper.new.mapped_value(0, daysafter_mapping)
335
+ assert_equal 'String', TestMapper.new.mapped_value('String', daysafter_mapping)
336
+ assert_equal '', TestMapper.new.mapped_value('', daysafter_mapping)
337
+ assert_nil TestMapper.new.mapped_value(nil, daysafter_mapping)
338
+ assert_equal Date.new(2057, 8, 23), TestMapper.new.mapped_value(16_535, daysafter_mapping)
339
+ # Answer independently checked http://www.wolframalpha.com/input/?i=2012-05-16+%2B+9379+days
340
+ assert_equal Date.new(2038, 1, 19), TestMapper.new.mapped_value(9379, daysafter_mapping)
341
+ assert_equal Date.new(1946, 5, 11),
342
+ TestMapper.new.mapped_value(16_900, 'daysafter' => '1900-02-01')
343
+ assert_equal Date.new(2014, 4, 8),
344
+ TestMapper.new.mapped_value(16_900, 'daysafter' => '1967-12-31')
345
+ assert_equal Date.new(2046, 4, 9),
346
+ TestMapper.new.mapped_value(16_900, 'daysafter' => '2000-01-01')
347
+ end
348
+
349
+ test 'line mapping should create valid hash' do
350
+ line_hash = TestMapper.new.mapped_line(['1 test road, testtown'], simple_mapping)
351
+ assert_equal '1 test road, testtown', line_hash['address']
352
+ assert_equal '1 test road, testtown', line_hash[:rawtext]['patient address']
353
+ end
354
+
355
+ test 'line mapping should create valid hash with blank cleaned value' do
356
+ assert_equal '', TestMapper.new.mapped_value('98', clean_opcs_mapping)
357
+ line_hash = TestMapper.new.mapped_line(['98'], simple_mapping_with_clean_opcs)
358
+ assert_equal nil, line_hash['primaryprocedures']
359
+ assert_equal '98', line_hash[:rawtext]['primaryprocedures']
360
+ end
361
+
362
+ test 'line mapping should create valid hash with join' do
363
+ line_hash = TestMapper.new.mapped_line(%w(Catherine Elizabeth), join_mapping)
364
+ assert_equal 'Catherine Elizabeth', line_hash['forenames']
365
+ assert_equal 'Catherine', line_hash[:rawtext]['forename1']
366
+ assert_equal 'Elizabeth', line_hash[:rawtext]['forename2']
367
+ end
368
+
369
+ test 'line mapping should create valid hash with rawtext only' do
370
+ line_hash = TestMapper.new.mapped_line(['otherinfo'], unused_mapping)
371
+ assert_equal 1, line_hash.length
372
+ assert_equal 'otherinfo', line_hash[:rawtext]['extra']
373
+ end
374
+
375
+ test 'should create valid hash with unused cross populate' do
376
+ line_hash = TestMapper.new.mapped_line(['Bob Fossil', 'C1234'], cross_populate_mapping)
377
+ assert_equal 'Bob Fossil', line_hash[:rawtext]['referringclinicianname']
378
+ assert_equal 'C1234', line_hash[:rawtext]['referringcliniciancode']
379
+
380
+ assert_equal 'Bob Fossil', line_hash['consultantname']
381
+ assert_equal 'C1234', line_hash['consultantcode']
382
+ end
383
+
384
+ test 'should create valid hash with used cross populate' do
385
+ line_hash = TestMapper.new.mapped_line(['Bob Fossil', ''], cross_populate_mapping)
386
+ assert_equal 'Bob Fossil', line_hash[:rawtext]['referringclinicianname']
387
+ assert_equal '', line_hash[:rawtext]['referringcliniciancode']
388
+
389
+ assert_equal 'Bob Fossil', line_hash['consultantname']
390
+ assert_equal 'Bob Fossil', line_hash['consultantcode']
391
+ end
392
+
393
+ test 'should create valid hash with unused cross populate replace' do
394
+ line_hash = TestMapper.new.mapped_line(['Bob Fossil', 'C1234'], cross_populate_replace_mapping)
395
+ assert_equal 'Bob Fossil', line_hash[:rawtext]['referringclinicianname']
396
+ assert_equal 'C1234', line_hash[:rawtext]['referringcliniciancode']
397
+
398
+ assert_equal 'Bob Fossil', line_hash['consultantname']
399
+ assert_equal 'C1234', line_hash['consultantcode']
400
+ end
401
+
402
+ test 'should create valid hash with used cross populate with replace' do
403
+ line_hash = TestMapper.new.mapped_line(['Bob Fossil', ''], cross_populate_replace_mapping)
404
+ assert_equal 'Bob Fossil', line_hash[:rawtext]['referringclinicianname']
405
+ assert_equal '', line_hash[:rawtext]['referringcliniciancode']
406
+
407
+ assert_equal 'Bob Fossil', line_hash['consultantname']
408
+ assert_equal 'ROBERT FOSSIL', line_hash['consultantcode']
409
+ end
410
+
411
+ test 'should create valid hash with used cross populate without replace' do
412
+ line_hash = TestMapper.new.mapped_line(['Bob Smith', ''], cross_populate_replace_mapping)
413
+ assert_equal 'Bob Smith', line_hash[:rawtext]['referringclinicianname']
414
+ assert_equal '', line_hash[:rawtext]['referringcliniciancode']
415
+
416
+ assert_equal 'Bob Smith', line_hash['consultantname']
417
+ assert_equal 'Bob Smith', line_hash['consultantcode']
418
+ end
419
+
420
+ test 'should create valid hash with unused cross populate map' do
421
+ line_hash = TestMapper.new.mapped_line(['Bob Fossil', 'C1234'], cross_populate_map_mapping)
422
+ assert_equal 'Bob Fossil', line_hash[:rawtext]['referringclinicianname']
423
+ assert_equal 'C1234', line_hash[:rawtext]['referringcliniciancode']
424
+
425
+ assert_equal 'Bob Fossil', line_hash['consultantname']
426
+ assert_equal 'C1234', line_hash['consultantcode']
427
+ end
428
+
429
+ test 'should create valid hash with used cross populate with map' do
430
+ line_hash = TestMapper.new.mapped_line(['Bob Fossil', ''], cross_populate_map_mapping)
431
+ assert_equal 'Bob Fossil', line_hash[:rawtext]['referringclinicianname']
432
+ assert_equal '', line_hash[:rawtext]['referringcliniciancode']
433
+
434
+ assert_equal 'Bob Fossil', line_hash['consultantname']
435
+ assert_equal 'C5678', line_hash['consultantcode']
436
+ end
437
+
438
+ test 'should create valid hash with used cross populate without map' do
439
+ line_hash = TestMapper.new.mapped_line(['Bob Smith', ''], cross_populate_map_mapping)
440
+ assert_equal 'Bob Smith', line_hash[:rawtext]['referringclinicianname']
441
+ assert_equal '', line_hash[:rawtext]['referringcliniciancode']
442
+
443
+ assert_equal 'Bob Smith', line_hash['consultantname']
444
+ assert_nil line_hash['consultantcode']
445
+ end
446
+
447
+ test 'should create valid hash with used cross populate without map and priorities' do
448
+ line_hash = TestMapper.new.mapped_line(['Pass', '', 'Fail', 'Large Fail'],
449
+ cross_populate_order_mapping)
450
+ assert_equal 'Pass', line_hash[:rawtext]['referringclinicianname']
451
+ assert_equal '', line_hash[:rawtext]['referringcliniciancode']
452
+
453
+ assert_equal 'Pass', line_hash['consultantname']
454
+ assert_equal 'Pass', line_hash['consultantcode']
455
+ end
456
+
457
+ test 'should create valid hash with used cross populate without priority' do
458
+ line_hash = TestMapper.new.mapped_line(%w(Exists Not), cross_populate_no_priority)
459
+ assert_equal 'Exists', line_hash[:rawtext]['columnoneraw']
460
+ assert_equal 'Not', line_hash[:rawtext]['columntworaw']
461
+
462
+ assert_equal 'Exists', line_hash['columnone']
463
+ assert_equal 'Exists', line_hash['columntwo']
464
+ end
465
+
466
+ test 'should create equal hashes with standard mapping' do
467
+ line_hash_without = TestMapper.new.mapped_line(
468
+ ['Smith', 'John F', 'male', '01234567'], standard_mapping_without
469
+ )
470
+ line_hash_with = TestMapper.new.mapped_line(
471
+ ['Smith', 'John F', 'male', '01234567'], standard_mapping_with
472
+ )
473
+ assert_equal line_hash_without, line_hash_with
474
+ end
475
+
476
+ test 'should merge standard mapping and normal mapping' do
477
+ line_hash = TestMapper.new.mapped_line(['Smith'], standard_mapping_merge)
478
+ assert_equal 'SMITH', line_hash['surname']
479
+ assert_equal 'Smith', line_hash['surname2']
480
+ end
481
+
482
+ test 'should merge standard mapping in correct order' do
483
+ line_hash = TestMapper.new.mapped_line(['Smith'], standard_mapping_column)
484
+ assert_equal 'Smith', line_hash[:rawtext]['overriding_column_name']
485
+ refute line_hash[:rawtext].include?('standard_mapping_column_name')
486
+ end
487
+
488
+ test 'should raise duplicate priority exception' do
489
+ assert_raise(RuntimeError) do
490
+ TestMapper.new.mapped_line(%w(A B), invalid_priorities)
491
+ end
492
+ end
493
+
494
+ test 'should raise nonexistent standard mapping exception' do
495
+ assert_raise(RuntimeError) do
496
+ TestMapper.new.mapped_line(['A'], invalid_standard_mapping)
497
+ end
498
+ end
499
+
500
+ test 'should join blank first field with compacting' do
501
+ line_hash = TestMapper.new.mapped_line(['', 'CB3 0DS'], joined_mapping_blank_start)
502
+ assert_equal 'CB3 0DS', line_hash['address']
503
+ end
504
+
505
+ test 'should join blank first field without compacting' do
506
+ line_hash = TestMapper.new.mapped_line(['', 'CB3 0DS'], joined_mapping_blank_start_uncompacted)
507
+ assert_equal ',CB3 0DS', line_hash['address']
508
+ end
509
+
510
+ test 'line mapping should map date formats correctly' do
511
+ real_date = Date.new(1927, 7, 6)
512
+ incomings = %w( 06/07/1927 19270706 07/06/1927 06/07/27 06/JUL/27 )
513
+ columns = %w( dateofbirth receiveddate americandate shortdate funkydate )
514
+ line_hash = TestMapper.new.mapped_line(incomings, date_mapping)
515
+
516
+ columns.each do |column_name|
517
+ assert_equal real_date, line_hash[column_name].to_date
518
+ end
519
+ end
520
+
521
+ test 'should ignore columns marked do not capture' do
522
+ line_hash = TestMapper.new.mapped_line(['rubbish'], do_not_capture_column)
523
+ refute line_hash[:rawtext].include?('ignore_me')
524
+ end
525
+
526
+ test 'should decode base64 encoded word document' do
527
+ test_file = @permanent_test_files.join('hello_world.doc')
528
+ encoded_content = Base64.encode64(File.binread(test_file))
529
+ line_hash = TestMapper.new.mapped_line([encoded_content], base64_mapping)
530
+ assert_equal 'Hello world, this is a word document', line_hash[:rawtext]['base64']
531
+ end
532
+
533
+ test 'should decode word.doc' do
534
+ test_file = @permanent_test_files.join('hello_world.doc')
535
+ file_content = File.binread(test_file)
536
+ text_content = TestMapper.new.send(:decode_raw_value, file_content, :word_doc)
537
+ assert_equal 'Hello world, this is a word document', text_content
538
+ end
539
+
540
+ test 'should read word.doc stream' do
541
+ test_file = @permanent_test_files.join('hello_world.doc')
542
+ file_content = TestMapper.new.send(:read_word_stream, File.open(test_file, 'r'))
543
+ assert_equal 'Hello world, this is a word document', file_content
544
+ end
545
+
546
+ test 'should raise unknown encoding exception' do
547
+ assert_raise(RuntimeError) do
548
+ TestMapper.new.mapped_line(['A'], invalid_decode_mapping)
549
+ end
550
+ end
551
+ end