ndr_import 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (103) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +14 -0
  3. data/.rubocop.yml +27 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +22 -0
  6. data/CODE_OF_CONDUCT.md +13 -0
  7. data/Gemfile +4 -0
  8. data/Guardfile +16 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +69 -0
  11. data/Rakefile +13 -0
  12. data/code_safety.yml +374 -0
  13. data/gemfiles/Gemfile.rails32 +5 -0
  14. data/gemfiles/Gemfile.rails32.lock +142 -0
  15. data/gemfiles/Gemfile.rails41 +5 -0
  16. data/gemfiles/Gemfile.rails41.lock +145 -0
  17. data/gemfiles/Gemfile.rails42 +5 -0
  18. data/gemfiles/Gemfile.rails42.lock +145 -0
  19. data/lib/ndr_import.rb +13 -0
  20. data/lib/ndr_import/csv_library.rb +40 -0
  21. data/lib/ndr_import/file/all.rb +8 -0
  22. data/lib/ndr_import/file/base.rb +76 -0
  23. data/lib/ndr_import/file/delimited.rb +86 -0
  24. data/lib/ndr_import/file/excel.rb +131 -0
  25. data/lib/ndr_import/file/pdf.rb +38 -0
  26. data/lib/ndr_import/file/registry.rb +50 -0
  27. data/lib/ndr_import/file/text.rb +52 -0
  28. data/lib/ndr_import/file/word.rb +30 -0
  29. data/lib/ndr_import/file/zip.rb +67 -0
  30. data/lib/ndr_import/helpers/file/delimited.rb +105 -0
  31. data/lib/ndr_import/helpers/file/excel.rb +181 -0
  32. data/lib/ndr_import/helpers/file/pdf.rb +29 -0
  33. data/lib/ndr_import/helpers/file/word.rb +27 -0
  34. data/lib/ndr_import/helpers/file/xml.rb +45 -0
  35. data/lib/ndr_import/helpers/file/zip.rb +44 -0
  36. data/lib/ndr_import/mapper.rb +220 -0
  37. data/lib/ndr_import/mapping_error.rb +5 -0
  38. data/lib/ndr_import/non_tabular/column_mapping.rb +73 -0
  39. data/lib/ndr_import/non_tabular/line.rb +46 -0
  40. data/lib/ndr_import/non_tabular/mapping.rb +35 -0
  41. data/lib/ndr_import/non_tabular/record.rb +99 -0
  42. data/lib/ndr_import/non_tabular/table.rb +193 -0
  43. data/lib/ndr_import/non_tabular_file_helper.rb +160 -0
  44. data/lib/ndr_import/standard_mappings.rb +23 -0
  45. data/lib/ndr_import/table.rb +179 -0
  46. data/lib/ndr_import/version.rb +4 -0
  47. data/ndr_import.gemspec +44 -0
  48. data/test/file/base_test.rb +54 -0
  49. data/test/file/delimited_test.rb +143 -0
  50. data/test/file/excel_test.rb +85 -0
  51. data/test/file/pdf_test.rb +35 -0
  52. data/test/file/registry_test.rb +60 -0
  53. data/test/file/text_test.rb +92 -0
  54. data/test/file/word_test.rb +35 -0
  55. data/test/file/zip_test.rb +47 -0
  56. data/test/helpers/file/delimited_test.rb +113 -0
  57. data/test/helpers/file/excel_test.rb +97 -0
  58. data/test/helpers/file/pdf_test.rb +26 -0
  59. data/test/helpers/file/word_test.rb +26 -0
  60. data/test/helpers/file/xml_test.rb +131 -0
  61. data/test/helpers/file/zip_test.rb +75 -0
  62. data/test/mapper_test.rb +551 -0
  63. data/test/non_tabular/mapping_test.rb +36 -0
  64. data/test/non_tabular/table_test.rb +510 -0
  65. data/test/non_tabular_file_helper_test.rb +501 -0
  66. data/test/readme_test.rb +53 -0
  67. data/test/resources/bomd.csv +3 -0
  68. data/test/resources/broken.csv +3 -0
  69. data/test/resources/filesystem_paths.yml +26 -0
  70. data/test/resources/flat_file.pdf +0 -0
  71. data/test/resources/flat_file.txt +27 -0
  72. data/test/resources/flat_file.yml +20 -0
  73. data/test/resources/hello_utf16be.txt +0 -0
  74. data/test/resources/hello_utf16le.txt +0 -0
  75. data/test/resources/hello_utf8.txt +2 -0
  76. data/test/resources/hello_windows.txt +2 -0
  77. data/test/resources/hello_world.doc +0 -0
  78. data/test/resources/hello_world.pdf +0 -0
  79. data/test/resources/hello_world.txt +2 -0
  80. data/test/resources/high_ascii_delimited.txt +2 -0
  81. data/test/resources/malformed.xml +6 -0
  82. data/test/resources/normal.csv +3 -0
  83. data/test/resources/normal.csv.zip +0 -0
  84. data/test/resources/normal_pipe.csv +3 -0
  85. data/test/resources/normal_thorn.csv +3 -0
  86. data/test/resources/not_a_pdf.pdf +0 -0
  87. data/test/resources/not_a_word_file.doc +0 -0
  88. data/test/resources/sample_xls.xls +0 -0
  89. data/test/resources/sample_xlsx.xlsx +0 -0
  90. data/test/resources/standard_mappings.yml +39 -0
  91. data/test/resources/txt_file_xls_extension.xls +1 -0
  92. data/test/resources/txt_file_xlsx_extension.xlsx +1 -0
  93. data/test/resources/utf-16be_xml.xml +0 -0
  94. data/test/resources/utf-16be_xml_with_declaration.xml +0 -0
  95. data/test/resources/utf-16le_xml.xml +0 -0
  96. data/test/resources/utf-8_xml.xml +9 -0
  97. data/test/resources/windows-1252_xml.xml +9 -0
  98. data/test/resources/windows.csv +5 -0
  99. data/test/resources/xlsx_file_xls_extension.xls +0 -0
  100. data/test/standard_mappings_test.rb +22 -0
  101. data/test/table_test.rb +288 -0
  102. data/test/test_helper.rb +13 -0
  103. metadata +443 -0
@@ -0,0 +1,97 @@
1
+ require 'test_helper'
2
+ require 'ndr_import/helpers/file/excel'
3
+
4
+ # Excel file helper tests
5
+ class ExcelTest < ActiveSupport::TestCase
6
+ # This is a test importer class to test the excel file helper mixin
7
+ class TestImporter
8
+ include NdrImport::Helpers::File::Excel
9
+ end
10
+
11
+ def setup
12
+ @permanenttestfiles = SafePath.new('permanent_test_files')
13
+ @importer = TestImporter.new
14
+ end
15
+
16
+ test 'read_excel_file helper should read xls file' do
17
+ file_content = @importer.send(:read_excel_file, @permanenttestfiles.join('sample_xls.xls'))
18
+ assert_equal file_content.count, 2
19
+ assert_equal file_content.first, %w(1A 1B)
20
+ end
21
+
22
+ test 'read_excel_file helper should read xlsx file' do
23
+ file_content = @importer.send(:read_excel_file, @permanenttestfiles.join('sample_xlsx.xlsx'))
24
+ assert_equal file_content.count, 2
25
+ assert_equal file_content.first, %w(1A 1B)
26
+ end
27
+
28
+ test 'read_excel_file helper should read xlsx file with the incorrect xls extension' do
29
+ file_path = @permanenttestfiles.join('xlsx_file_xls_extension.xls')
30
+ file_content = @importer.send(:read_excel_file, file_path)
31
+ assert_equal file_content.count, 2
32
+ assert_equal file_content.first, %w(1A 1B)
33
+
34
+ SafeFile.delete @permanenttestfiles.join('xlsx_file_xls_extension_amend.xlsx')
35
+ end
36
+
37
+ test 'read_excel_file helper should handle exceptions' do
38
+ # txt file
39
+ SafeFile.open(@permanenttestfiles.join('temp.txt'), 'w') { |f| f.write 'dummy line' }
40
+ assert_raises RuntimeError do
41
+ @importer.send(:read_excel_file, @permanenttestfiles.join('temp.txt'))
42
+ end
43
+
44
+ # .txt file in .xls extension
45
+ File.rename @permanenttestfiles.join('temp.txt'), @permanenttestfiles.join('temp.xls')
46
+ assert_raises RuntimeError do
47
+ @importer.send(:read_excel_file, @permanenttestfiles.join('temp.xls'))
48
+ end
49
+
50
+ # .txt file in .xlsx extension
51
+ File.rename @permanenttestfiles.join('temp.xls'), @permanenttestfiles.join('temp.xlsx')
52
+ assert_raises RuntimeError do
53
+ @importer.send(:read_excel_file, @permanenttestfiles.join('temp.xlsx'))
54
+ end
55
+
56
+ SafeFile.delete @permanenttestfiles.join('temp.xlsx')
57
+ SafeFile.delete @permanenttestfiles.join('temp_amend.xlsx')
58
+ end
59
+
60
+ test 'excel_tables helper should read xls table correctly' do
61
+ table = @importer.send(:excel_tables, @permanenttestfiles.join('sample_xls.xls'))
62
+ table.each do |tablename, sheet|
63
+ assert_equal 'Sheet1', tablename
64
+ assert_equal %w(1A 1B), sheet.first
65
+ end
66
+ end
67
+
68
+ test 'excel_tables helper should read xlsx table correctly' do
69
+ table = @importer.send(:excel_tables, @permanenttestfiles.join('sample_xlsx.xlsx'))
70
+ table.each do |tablename, sheet|
71
+ assert_equal 'Sheet1', tablename
72
+ assert_equal %w(1A 1B), sheet.first
73
+ end
74
+ end
75
+
76
+ def test_each_excel_table_should_be_deprecated
77
+ original_stderr = $stderr
78
+ $stderr = StringIO.new
79
+
80
+ table = @importer.send(:each_excel_table, @permanenttestfiles.join('sample_xlsx.xlsx'))
81
+ table.each do |tablename, sheet|
82
+ assert_equal 'Sheet1', tablename
83
+ assert_equal %w(1A 1B), sheet.first
84
+ end
85
+
86
+ assert_match(/\A\[warning\] each_excel_table will be deprecated/, $stderr.string)
87
+ ensure
88
+ $stderr = original_stderr
89
+ end if Gem::Requirement.new('< 3.0.0').satisfied_by?(Gem::Version.new(NdrImport::VERSION))
90
+
91
+ def test_deprecated_methods_removed_in_v3
92
+ refute @importer.protected_methods.include?(:each_excel_table), 'should be removed in v3.0.0'
93
+ refute @importer.private_methods.include?(:each_excel_row), 'should be removed in v3.0.0'
94
+ refute @importer.private_methods.include?(:each_xls_row), 'should be removed in v3.0.0'
95
+ refute @importer.private_methods.include?(:each_xlsx_row), 'should be removed in v3.0.0'
96
+ end if Gem::Requirement.new('>= 3.0.0').satisfied_by?(Gem::Version.new(NdrImport::VERSION))
97
+ end
@@ -0,0 +1,26 @@
1
+ require 'test_helper'
2
+ require 'ndr_import/helpers/file/pdf'
3
+
4
+ # PDF file helper tests
5
+ class PdfTest < ActiveSupport::TestCase
6
+ # This is a test importer class to test the PDF file helper mixin
7
+ class TestImporter
8
+ include NdrImport::Helpers::File::Pdf
9
+ end
10
+
11
+ def setup
12
+ @permanent_test_files = SafePath.new('permanent_test_files')
13
+ @importer = TestImporter.new
14
+ end
15
+
16
+ test 'read_pdf_file helper should read pdf file' do
17
+ file_content = @importer.send(:read_pdf_file, @permanent_test_files.join('hello_world.pdf'))
18
+ assert_equal file_content, ['Hello World']
19
+ end
20
+
21
+ test 'read_pdf_file helper should raise exception on invalid pdf file' do
22
+ assert_raises RuntimeError do
23
+ @importer.send(:read_pdf_file, @permanent_test_files.join('not_a_pdf.pdf'))
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ require 'test_helper'
2
+ require 'ndr_import/helpers/file/word'
3
+
4
+ # Word file helper tests
5
+ class WordTest < ActiveSupport::TestCase
6
+ # This is a test importer class to test the Word file helper mixin
7
+ class TestImporter
8
+ include NdrImport::Helpers::File::Word
9
+ end
10
+
11
+ def setup
12
+ @permanent_test_files = SafePath.new('permanent_test_files')
13
+ @importer = TestImporter.new
14
+ end
15
+
16
+ test 'read_word_file helper should read word file' do
17
+ file_content = @importer.send(:read_word_file, @permanent_test_files.join('hello_world.doc'))
18
+ assert_equal file_content, ['Hello world, this is a word document']
19
+ end
20
+
21
+ test 'read_word_file helper should raise exception on invalid word file' do
22
+ assert_raises RuntimeError do
23
+ @importer.send(:read_word_file, @permanent_test_files.join('not_a_word_file.doc'))
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,131 @@
1
+ require 'test_helper'
2
+ require 'ndr_import/helpers/file/xml'
3
+ require 'nokogiri'
4
+
5
+ # XML file helper tests
6
+ class XmlTest < ActiveSupport::TestCase
7
+ # This is a test importer class to test the XML file helper mixin
8
+ class TestImporter
9
+ include NdrImport::Helpers::File::Xml
10
+ end
11
+
12
+ def setup
13
+ @home = SafePath.new('test_space_rw')
14
+ @permanent_test_files = SafePath.new('permanent_test_files')
15
+ @importer = TestImporter.new
16
+ end
17
+
18
+ test 'import_xml_file should handle incoming UTF-8' do
19
+ doc = @importer.send(:read_xml_file, @permanent_test_files.join('utf-8_xml.xml'))
20
+ greek = doc.xpath('//letter').map(&:text).join
21
+
22
+ assert_equal 'UTF-8', doc.encoding
23
+
24
+ assert greek.valid_encoding?
25
+ assert_equal Encoding.find('UTF-8'), greek.encoding
26
+ assert_equal 2, greek.chars.to_a.length
27
+ assert_equal [206, 177, 206, 178], greek.bytes.to_a # 2-bytes each for alpha and beta
28
+ end
29
+
30
+ test 'import_xml_file should handle incoming UTF-16 (big endian)' do
31
+ doc = @importer.send(:read_xml_file, @permanent_test_files.join('utf-16be_xml.xml'))
32
+ greek = doc.xpath('//letter').map(&:text).join
33
+
34
+ assert_equal 'UTF-8', doc.encoding
35
+
36
+ assert greek.valid_encoding?
37
+ assert_equal Encoding.find('UTF-8'), greek.encoding
38
+ assert_equal 2, greek.chars.to_a.length
39
+ assert_equal [206, 177, 206, 178], greek.bytes.to_a # 2-bytes each for alpha and beta
40
+ end
41
+
42
+ test 'import_xml_file should handle incoming UTF-16 (little endian)' do
43
+ doc = @importer.send(:read_xml_file, @permanent_test_files.join('utf-16le_xml.xml'))
44
+ greek = doc.xpath('//letter').map(&:text).join
45
+
46
+ assert_equal 'UTF-8', doc.encoding
47
+
48
+ assert greek.valid_encoding?
49
+ assert_equal Encoding.find('UTF-8'), greek.encoding
50
+ assert_equal 2, greek.chars.to_a.length
51
+ assert_equal [206, 177, 206, 178], greek.bytes.to_a # 2 bytes each for alpha and beta
52
+ end
53
+
54
+ test 'import_xml_file should handle incoming UTF-16 with declaration' do
55
+ doc = @importer.send(:read_xml_file,
56
+ @permanent_test_files.join('utf-16be_xml_with_declaration.xml'))
57
+ greek = doc.xpath('//letter').map(&:text).join
58
+
59
+ assert greek.valid_encoding?
60
+ assert_equal Encoding.find('UTF-8'), greek.encoding
61
+ assert_equal 2, greek.chars.to_a.length
62
+ assert_equal [206, 177, 206, 178], greek.bytes.to_a # 2 bytes each for alpha and beta
63
+
64
+ # The document should be UTF-8, and we shouldn't
65
+ # get encoding mismatches when interrogating it:
66
+ assert_equal 'UTF-8', doc.encoding
67
+ assert_equal 1, doc.css('note[id=alpha]').length
68
+ end
69
+
70
+ test 'import_xml_file should handle incoming Windows-1252' do
71
+ doc = @importer.send(:read_xml_file, @permanent_test_files.join('windows-1252_xml.xml'))
72
+ punct = doc.xpath('//letter').map(&:text).join
73
+
74
+ assert_equal 'UTF-8', doc.encoding
75
+
76
+ assert punct.valid_encoding?
77
+ assert_equal Encoding.find('UTF-8'), punct.encoding
78
+ assert_equal 2, punct.chars.to_a.length
79
+ assert_equal [226, 128, 153, 226, 128, 147], punct.bytes.to_a # 3 bytes each for apostrophe and dash
80
+ end
81
+
82
+ test 'import_xml_file with malformed XML file' do
83
+ assert_raises Nokogiri::XML::SyntaxError do
84
+ @importer.send(:read_xml_file, @permanent_test_files.join('malformed.xml'))
85
+ end
86
+ end
87
+
88
+ test '.import_xml_file should reject non safe path arguments' do
89
+ assert_raises ArgumentError do
90
+ @importer.send(:read_xml_file, @home.join('simple.xml').to_s)
91
+ end
92
+ end
93
+
94
+ test '.import_xml_file should accept safepath' do
95
+ builder = Nokogiri::XML::Builder.new do |xml|
96
+ xml.root do
97
+ xml.note(:id => 1) do
98
+ xml.time 'Thu Dec 13 13:12:00 UTC 2012'
99
+ xml.title 'Note 1'
100
+ xml.body 'Note 1 body blabla bla'
101
+ end
102
+ xml.note(:id => 2) do
103
+ xml.time 'Thu Dec 14 12:11:00 UTC 2012'
104
+ xml.title 'note 2'
105
+ xml.body 'note 2 body blablabala'
106
+ end
107
+ end
108
+ end
109
+ SafeFile.open(@home.join('simple.xml'), 'w') { |f| f.write builder.to_xml }
110
+
111
+ doc = @importer.send(:read_xml_file, @home.join('simple.xml'))
112
+
113
+ assert_equal 1, doc.children.reject { |c| c.text =~ /\A\n *\Z/ }.length
114
+ assert_equal 'root', doc.children.reject { |c| c.text =~ /\A\n *\Z/ }[0].name
115
+ assert_equal 2, doc.
116
+ children.reject { |c| c.text =~ /\A\n *\Z/ }[0].
117
+ children.reject { |c| c.text =~ /\A\n *\Z/ }.length
118
+ assert_equal 'note', doc.children.reject { |c| c.text =~ /\A\n *\Z/ }[0].
119
+ children.reject { |c| c.text =~ /\A\n *\Z/ }[0].name
120
+ assert_equal 3, doc.
121
+ children.reject { |c| c.text =~ /\A\n *\Z/ }[0].
122
+ children.reject { |c| c.text =~ /\A\n *\Z/ }[0].
123
+ children.reject { |c| c.text =~ /\A\n *\Z/ }.length
124
+ assert_equal 'Thu Dec 13 13:12:00 UTC 2012', doc.
125
+ children.reject { |c| c.text =~ /\A\n *\Z/ }[0].
126
+ children.reject { |c| c.text =~ /\A\n *\Z/ }[0].
127
+ children.reject { |c| c.text =~ /\A\n *\Z/ }[0].text
128
+
129
+ SafeFile.delete @home.join('simple.xml')
130
+ end
131
+ end
@@ -0,0 +1,75 @@
1
+ require 'test_helper'
2
+ require 'ndr_import/helpers/file/zip'
3
+ require 'zip'
4
+
5
+ # Zip file helper tests
6
+ class ZipTest < ActiveSupport::TestCase
7
+ # This is a test importer class to test the Zip file helper mixin
8
+ class TestImporter
9
+ include NdrImport::Helpers::File::Zip
10
+ end
11
+
12
+ def setup
13
+ @home = SafePath.new('test_space_rw')
14
+ @permanent_test_files = SafePath.new('permanent_test_files')
15
+ @importer = TestImporter.new
16
+ end
17
+
18
+ test '.unzip should reject non SafePath arguments' do
19
+ zip = @home.join('imaginary.zip')
20
+
21
+ assert_raises ArgumentError do
22
+ @importer.send(:unzip_file, zip.to_s, @home.to_s)
23
+ end
24
+
25
+ assert_raises ArgumentError do
26
+ @importer.send(:unzip_file, zip.to_s, @home)
27
+ end
28
+
29
+ assert_raises ArgumentError do
30
+ @importer.send(:unzip_file, zip, @home.to_s)
31
+ end
32
+ end
33
+
34
+ test '.unzip unzip zip file' do
35
+ zip_name = @home.join('test.zip')
36
+
37
+ files = [
38
+ @home.join('f1'),
39
+ @home.join('f2'),
40
+ @home.join('f3')
41
+ ]
42
+
43
+ files.each do |fname|
44
+ File.open(fname, 'w') { |f| f.write "test #{fname}" }
45
+ end
46
+
47
+ ::Zip::File.open(zip_name, Zip::File::CREATE) do |zipfile|
48
+ files.each do |fname|
49
+ zipfile.add(File.basename(fname.to_s), fname.to_s)
50
+ end
51
+ end
52
+
53
+ File.delete(*files)
54
+
55
+ files.each do |fname|
56
+ assert !File.exist?(fname)
57
+ end
58
+
59
+ assert File.exist?(zip_name)
60
+ dest = @home.join('unziped')
61
+
62
+ @importer.send(:unzip_file, zip_name, dest)
63
+
64
+ files.each do |fname|
65
+ assert File.exist?(dest.join(File.basename(fname)))
66
+ end
67
+
68
+ files.each do |fname|
69
+ File.delete(dest.join(File.basename(fname)))
70
+ end
71
+
72
+ File.delete(zip_name)
73
+ FileUtils.rm_r(dest)
74
+ end
75
+ end
@@ -0,0 +1,551 @@
1
+ require 'test_helper'
2
+
3
+ # expose private methods
4
+ class TestMapper
5
+ include NdrImport::Mapper
6
+
7
+ public :fixed_width_columns, :mapped_line, :mapped_value, :replace_before_mapping
8
+
9
+ # TODO: test fixed_width_columns
10
+ end
11
+
12
+ # This tests the NdrImport::Mapper class
13
+ class MapperTest < ActiveSupport::TestCase
14
+ def setup
15
+ @permanent_test_files = SafePath.new('permanent_test_files')
16
+ end
17
+
18
+ format_mapping = { 'format' => 'dd/mm/yyyy' }
19
+ format_mapping_yyyymmdd = { 'format' => 'yyyymmdd' }
20
+ clean_name_mapping = { 'clean' => :name }
21
+ clean_ethniccategory_mapping = { 'clean' => :ethniccategory }
22
+ clean_icd_mapping = { 'clean' => :code_icd }
23
+ clean_opcs_mapping = { 'clean' => :code_opcs }
24
+ map_mapping = { 'map' => { 'A' => '1' } }
25
+ replace_mapping = { 'replace' => { '.0' => '' } }
26
+ daysafter_mapping = { 'daysafter' => '2012-05-16' }
27
+ # TODO: match_mapping = {}
28
+
29
+ simple_mapping = [{ 'column' => 'patient address', 'mappings' => ['field' => 'address'] }]
30
+
31
+ simple_mapping_with_clean_opcs = YAML.load <<-YML
32
+ - column: primaryprocedures
33
+ mappings:
34
+ - field: primaryprocedures
35
+ clean: :code_opcs
36
+ YML
37
+
38
+ join_mapping = YAML.load <<-YML
39
+ - column: forename1
40
+ mappings:
41
+ - field: forenames
42
+ order: 1
43
+ join: " "
44
+ - column: forename2
45
+ mappings:
46
+ - field: forenames
47
+ order: 2
48
+ YML
49
+
50
+ join_compact_mapping = YAML.load <<-YML
51
+ - column: forename1
52
+ mappings:
53
+ - field: forenames
54
+ order: 1
55
+ join: " "
56
+ compact: false
57
+ - column: forename2
58
+ mappings:
59
+ - field: forenames
60
+ order: 2
61
+ YML
62
+
63
+ unused_mapping = [{ 'column' => 'extra', 'rawtext_name' => 'extra' }]
64
+
65
+ cross_populate_mapping = YAML.load <<-YML
66
+ - column: referringclinicianname
67
+ mappings:
68
+ - field: consultantname
69
+ - field: consultantcode
70
+ priority: 2
71
+ - column: referringcliniciancode
72
+ mappings:
73
+ - field: consultantcode
74
+ YML
75
+
76
+ cross_populate_replace_mapping = YAML.load <<-YML
77
+ - column: referringclinicianname
78
+ mappings:
79
+ - field: consultantname
80
+ - field: consultantcode
81
+ priority: 2
82
+ replace:
83
+ ? !ruby/regexp /^BOB FOSSIL$/i
84
+ : "ROBERT FOSSIL"
85
+ - column: referringcliniciancode
86
+ mappings:
87
+ - field: consultantcode
88
+ priority: 1
89
+ YML
90
+
91
+ cross_populate_map_mapping = YAML.load <<-YML
92
+ - column: referringclinicianname
93
+ mappings:
94
+ - field: consultantname
95
+ - field: consultantcode
96
+ priority: 2
97
+ map:
98
+ "Bob Fossil": "C5678"
99
+ - column: referringcliniciancode
100
+ mappings:
101
+ - field: consultantcode
102
+ priority: 1
103
+ YML
104
+
105
+ cross_populate_order_mapping = YAML.load <<-YML
106
+ - column: referringclinicianname
107
+ mappings:
108
+ - field: consultantname
109
+ - field: consultantcode
110
+ priority: 2
111
+ - column: referringcliniciancode
112
+ mappings:
113
+ - field: consultantcode
114
+ priority: 1
115
+ - column: somecolumn
116
+ mappings:
117
+ - field: consultantcode
118
+ priority: 5
119
+ - column: anothercolumn
120
+ mappings:
121
+ - field: consultantcode
122
+ priority: 10
123
+ YML
124
+
125
+ cross_populate_no_priority = YAML.load <<-YML
126
+ - column: columnoneraw
127
+ mappings:
128
+ - field: columnone
129
+ - field: columntwo
130
+ - column: columntworaw
131
+ mappings:
132
+ - field: columntwo
133
+ priority: 5
134
+ YML
135
+
136
+ standard_mapping_without = YAML.load <<-YML
137
+ - column: surname
138
+ rawtext_name: surname
139
+ mappings:
140
+ - field: surname
141
+ clean: :name
142
+ - column: forename
143
+ rawtext_name: forenames
144
+ mappings:
145
+ - field: forenames
146
+ clean: :name
147
+ - column: sex
148
+ rawtext_name: sex
149
+ mappings:
150
+ - field: sex
151
+ clean: :sex
152
+ - column: nhs_no
153
+ rawtext_name: nhsnumber
154
+ mappings:
155
+ - field: nhsnumber
156
+ clean: :nhsnumber
157
+ YML
158
+
159
+ standard_mapping_with = YAML.load <<-YML
160
+ - standard_mapping: surname
161
+ - column: forename
162
+ standard_mapping: forenames
163
+ - standard_mapping: sex
164
+ - column: nhs_no
165
+ standard_mapping: nhsnumber
166
+ YML
167
+
168
+ standard_mapping_merge = YAML.load <<-YML
169
+ - column: surname
170
+ standard_mapping: surname
171
+ mappings:
172
+ - field: surname2
173
+ YML
174
+
175
+ standard_mapping_column = YAML.load <<-YML
176
+ - column: overriding_column_name
177
+ standard_mapping: test
178
+ YML
179
+
180
+ invalid_priorities = YAML.load <<-YML
181
+ - column: columnoneraw
182
+ mappings:
183
+ - field: columnone
184
+ - field: columntwo
185
+ priority: 5
186
+ - column: columntworaw
187
+ mappings:
188
+ - field: columntwo
189
+ priority: 5
190
+ YML
191
+
192
+ invalid_standard_mapping = YAML.load <<-YML
193
+ - column: surname
194
+ standard_mapping: surnames
195
+ YML
196
+
197
+ joined_mapping_blank_start = YAML.load <<-YML
198
+ - column: addressoneraw
199
+ mappings:
200
+ - field: address
201
+ join: ","
202
+ order: 1
203
+ - column: postcode
204
+ mappings:
205
+ - field: address
206
+ order: 2
207
+ YML
208
+
209
+ joined_mapping_blank_start_uncompacted = YAML.load <<-YML
210
+ - column: addressoneraw
211
+ mappings:
212
+ - field: address
213
+ join: ","
214
+ order: 1
215
+ compact: false
216
+ - column: postcode
217
+ mappings:
218
+ - field: address
219
+ order: 2
220
+ YML
221
+
222
+ date_mapping = YAML.load <<-YML
223
+ - column: birth_date
224
+ rawtext_name: dateofbirth
225
+ mappings:
226
+ - field: dateofbirth
227
+ format: dd/mm/yyyy
228
+ - column: received_date
229
+ rawtext_name: receiveddate
230
+ mappings:
231
+ - field: receiveddate
232
+ format: yyyymmdd
233
+ - column: american_date
234
+ rawtext_name: americandate
235
+ mappings:
236
+ - field: americandate
237
+ format: mm/dd/yyyy
238
+ - column: short_date
239
+ rawtext_name: shortdate
240
+ mappings:
241
+ - field: shortdate
242
+ format: dd/mm/yy
243
+ - column: funky_date
244
+ rawtext_name: funkydate
245
+ mappings:
246
+ - field: funkydate
247
+ format: dd/mmm/yy
248
+ YML
249
+
250
+ do_not_capture_column = YAML.load <<-YML
251
+ - column: ignore_me
252
+ do_not_capture: true
253
+ YML
254
+
255
+ base64_mapping = YAML.load <<-YML
256
+ - column: base64
257
+ decode:
258
+ - :base64
259
+ - :word_doc
260
+ YML
261
+
262
+ invalid_decode_mapping = YAML.load <<-YML
263
+ - column: column_name
264
+ decode:
265
+ - :invalid_encoding
266
+ YML
267
+
268
+ test 'map should return a number' do
269
+ assert_equal '1', TestMapper.new.mapped_value('A', map_mapping)
270
+ end
271
+
272
+ test 'map should return nil' do
273
+ assert_nil TestMapper.new.mapped_value('B', map_mapping)
274
+ end
275
+
276
+ test 'map should return correct date format' do
277
+ assert_equal Date.new(2011, 1, 25), TestMapper.new.mapped_value('25/01/2011', format_mapping)
278
+ assert_equal Date.new(2011, 1, 25),
279
+ TestMapper.new.mapped_value('20110125', format_mapping_yyyymmdd)
280
+ end
281
+
282
+ test 'map should return incorrect date format' do
283
+ assert_not_equal Date.new(2011, 3, 4),
284
+ TestMapper.new.mapped_value('03/04/2011', format_mapping)
285
+ end
286
+
287
+ test 'map should return nil date format' do
288
+ assert_nil TestMapper.new.mapped_value('03/25/2011', format_mapping)
289
+ end
290
+
291
+ test 'map should replace value' do
292
+ value = '2.0'
293
+ TestMapper.new.replace_before_mapping(value, replace_mapping)
294
+ assert_equal '2', value
295
+ end
296
+
297
+ test 'map should not alter value' do
298
+ value = '2.1'
299
+ TestMapper.new.replace_before_mapping(value, replace_mapping)
300
+ assert_equal '2.1', value
301
+ end
302
+
303
+ test 'map should clean name' do
304
+ assert_equal 'ANNABELLE SMITH',
305
+ TestMapper.new.mapped_value('anna.belle,smith', clean_name_mapping)
306
+ end
307
+
308
+ test 'map should clean ethenic category' do
309
+ assert_equal 'M', TestMapper.new.mapped_value('1', clean_ethniccategory_mapping)
310
+ assert_equal 'X', TestMapper.new.mapped_value('99', clean_ethniccategory_mapping)
311
+ assert_equal 'A', TestMapper.new.mapped_value('A', clean_ethniccategory_mapping)
312
+ assert_equal 'INVALID', TestMapper.new.mapped_value('InValiD', clean_ethniccategory_mapping)
313
+ end
314
+
315
+ test 'map should clean icd code' do
316
+ assert_equal 'C34.3 R93.2 Z51.5',
317
+ TestMapper.new.mapped_value('C34.3,R93.2,Z51.5', clean_icd_mapping)
318
+ end
319
+
320
+ test 'map should clean opcs code' do
321
+ assert_equal 'U212 Y973', TestMapper.new.mapped_value('U212,Y973,X1', clean_opcs_mapping)
322
+ assert_equal '', TestMapper.new.mapped_value('98', clean_opcs_mapping)
323
+ assert_equal '', TestMapper.new.mapped_value('TooLong', clean_opcs_mapping)
324
+ assert_equal nil, TestMapper.new.mapped_value('', clean_opcs_mapping)
325
+ assert_equal 'ABCD', TestMapper.new.mapped_value('AbcD', clean_opcs_mapping)
326
+ assert_equal '1234', TestMapper.new.mapped_value('1234', clean_opcs_mapping)
327
+ end
328
+
329
+ test 'should return correct date format for date fields with daysafter' do
330
+ assert_equal Date.new(2012, 5, 18), TestMapper.new.mapped_value(2, daysafter_mapping)
331
+ assert_equal Date.new(2012, 5, 18), TestMapper.new.mapped_value('2', daysafter_mapping)
332
+ assert_equal Date.new(2012, 5, 14), TestMapper.new.mapped_value(-2, daysafter_mapping)
333
+ assert_equal Date.new(2012, 5, 14), TestMapper.new.mapped_value('-2', daysafter_mapping)
334
+ assert_equal Date.new(2012, 5, 16), TestMapper.new.mapped_value(0, daysafter_mapping)
335
+ assert_equal 'String', TestMapper.new.mapped_value('String', daysafter_mapping)
336
+ assert_equal '', TestMapper.new.mapped_value('', daysafter_mapping)
337
+ assert_nil TestMapper.new.mapped_value(nil, daysafter_mapping)
338
+ assert_equal Date.new(2057, 8, 23), TestMapper.new.mapped_value(16_535, daysafter_mapping)
339
+ # Answer independently checked http://www.wolframalpha.com/input/?i=2012-05-16+%2B+9379+days
340
+ assert_equal Date.new(2038, 1, 19), TestMapper.new.mapped_value(9379, daysafter_mapping)
341
+ assert_equal Date.new(1946, 5, 11),
342
+ TestMapper.new.mapped_value(16_900, 'daysafter' => '1900-02-01')
343
+ assert_equal Date.new(2014, 4, 8),
344
+ TestMapper.new.mapped_value(16_900, 'daysafter' => '1967-12-31')
345
+ assert_equal Date.new(2046, 4, 9),
346
+ TestMapper.new.mapped_value(16_900, 'daysafter' => '2000-01-01')
347
+ end
348
+
349
+ test 'line mapping should create valid hash' do
350
+ line_hash = TestMapper.new.mapped_line(['1 test road, testtown'], simple_mapping)
351
+ assert_equal '1 test road, testtown', line_hash['address']
352
+ assert_equal '1 test road, testtown', line_hash[:rawtext]['patient address']
353
+ end
354
+
355
+ test 'line mapping should create valid hash with blank cleaned value' do
356
+ assert_equal '', TestMapper.new.mapped_value('98', clean_opcs_mapping)
357
+ line_hash = TestMapper.new.mapped_line(['98'], simple_mapping_with_clean_opcs)
358
+ assert_equal nil, line_hash['primaryprocedures']
359
+ assert_equal '98', line_hash[:rawtext]['primaryprocedures']
360
+ end
361
+
362
+ test 'line mapping should create valid hash with join' do
363
+ line_hash = TestMapper.new.mapped_line(%w(Catherine Elizabeth), join_mapping)
364
+ assert_equal 'Catherine Elizabeth', line_hash['forenames']
365
+ assert_equal 'Catherine', line_hash[:rawtext]['forename1']
366
+ assert_equal 'Elizabeth', line_hash[:rawtext]['forename2']
367
+ end
368
+
369
+ test 'line mapping should create valid hash with rawtext only' do
370
+ line_hash = TestMapper.new.mapped_line(['otherinfo'], unused_mapping)
371
+ assert_equal 1, line_hash.length
372
+ assert_equal 'otherinfo', line_hash[:rawtext]['extra']
373
+ end
374
+
375
+ test 'should create valid hash with unused cross populate' do
376
+ line_hash = TestMapper.new.mapped_line(['Bob Fossil', 'C1234'], cross_populate_mapping)
377
+ assert_equal 'Bob Fossil', line_hash[:rawtext]['referringclinicianname']
378
+ assert_equal 'C1234', line_hash[:rawtext]['referringcliniciancode']
379
+
380
+ assert_equal 'Bob Fossil', line_hash['consultantname']
381
+ assert_equal 'C1234', line_hash['consultantcode']
382
+ end
383
+
384
+ test 'should create valid hash with used cross populate' do
385
+ line_hash = TestMapper.new.mapped_line(['Bob Fossil', ''], cross_populate_mapping)
386
+ assert_equal 'Bob Fossil', line_hash[:rawtext]['referringclinicianname']
387
+ assert_equal '', line_hash[:rawtext]['referringcliniciancode']
388
+
389
+ assert_equal 'Bob Fossil', line_hash['consultantname']
390
+ assert_equal 'Bob Fossil', line_hash['consultantcode']
391
+ end
392
+
393
+ test 'should create valid hash with unused cross populate replace' do
394
+ line_hash = TestMapper.new.mapped_line(['Bob Fossil', 'C1234'], cross_populate_replace_mapping)
395
+ assert_equal 'Bob Fossil', line_hash[:rawtext]['referringclinicianname']
396
+ assert_equal 'C1234', line_hash[:rawtext]['referringcliniciancode']
397
+
398
+ assert_equal 'Bob Fossil', line_hash['consultantname']
399
+ assert_equal 'C1234', line_hash['consultantcode']
400
+ end
401
+
402
+ test 'should create valid hash with used cross populate with replace' do
403
+ line_hash = TestMapper.new.mapped_line(['Bob Fossil', ''], cross_populate_replace_mapping)
404
+ assert_equal 'Bob Fossil', line_hash[:rawtext]['referringclinicianname']
405
+ assert_equal '', line_hash[:rawtext]['referringcliniciancode']
406
+
407
+ assert_equal 'Bob Fossil', line_hash['consultantname']
408
+ assert_equal 'ROBERT FOSSIL', line_hash['consultantcode']
409
+ end
410
+
411
+ test 'should create valid hash with used cross populate without replace' do
412
+ line_hash = TestMapper.new.mapped_line(['Bob Smith', ''], cross_populate_replace_mapping)
413
+ assert_equal 'Bob Smith', line_hash[:rawtext]['referringclinicianname']
414
+ assert_equal '', line_hash[:rawtext]['referringcliniciancode']
415
+
416
+ assert_equal 'Bob Smith', line_hash['consultantname']
417
+ assert_equal 'Bob Smith', line_hash['consultantcode']
418
+ end
419
+
420
+ test 'should create valid hash with unused cross populate map' do
421
+ line_hash = TestMapper.new.mapped_line(['Bob Fossil', 'C1234'], cross_populate_map_mapping)
422
+ assert_equal 'Bob Fossil', line_hash[:rawtext]['referringclinicianname']
423
+ assert_equal 'C1234', line_hash[:rawtext]['referringcliniciancode']
424
+
425
+ assert_equal 'Bob Fossil', line_hash['consultantname']
426
+ assert_equal 'C1234', line_hash['consultantcode']
427
+ end
428
+
429
+ test 'should create valid hash with used cross populate with map' do
430
+ line_hash = TestMapper.new.mapped_line(['Bob Fossil', ''], cross_populate_map_mapping)
431
+ assert_equal 'Bob Fossil', line_hash[:rawtext]['referringclinicianname']
432
+ assert_equal '', line_hash[:rawtext]['referringcliniciancode']
433
+
434
+ assert_equal 'Bob Fossil', line_hash['consultantname']
435
+ assert_equal 'C5678', line_hash['consultantcode']
436
+ end
437
+
438
+ test 'should create valid hash with used cross populate without map' do
439
+ line_hash = TestMapper.new.mapped_line(['Bob Smith', ''], cross_populate_map_mapping)
440
+ assert_equal 'Bob Smith', line_hash[:rawtext]['referringclinicianname']
441
+ assert_equal '', line_hash[:rawtext]['referringcliniciancode']
442
+
443
+ assert_equal 'Bob Smith', line_hash['consultantname']
444
+ assert_nil line_hash['consultantcode']
445
+ end
446
+
447
+ test 'should create valid hash with used cross populate without map and priorities' do
448
+ line_hash = TestMapper.new.mapped_line(['Pass', '', 'Fail', 'Large Fail'],
449
+ cross_populate_order_mapping)
450
+ assert_equal 'Pass', line_hash[:rawtext]['referringclinicianname']
451
+ assert_equal '', line_hash[:rawtext]['referringcliniciancode']
452
+
453
+ assert_equal 'Pass', line_hash['consultantname']
454
+ assert_equal 'Pass', line_hash['consultantcode']
455
+ end
456
+
457
+ test 'should create valid hash with used cross populate without priority' do
458
+ line_hash = TestMapper.new.mapped_line(%w(Exists Not), cross_populate_no_priority)
459
+ assert_equal 'Exists', line_hash[:rawtext]['columnoneraw']
460
+ assert_equal 'Not', line_hash[:rawtext]['columntworaw']
461
+
462
+ assert_equal 'Exists', line_hash['columnone']
463
+ assert_equal 'Exists', line_hash['columntwo']
464
+ end
465
+
466
+ test 'should create equal hashes with standard mapping' do
467
+ line_hash_without = TestMapper.new.mapped_line(
468
+ ['Smith', 'John F', 'male', '01234567'], standard_mapping_without
469
+ )
470
+ line_hash_with = TestMapper.new.mapped_line(
471
+ ['Smith', 'John F', 'male', '01234567'], standard_mapping_with
472
+ )
473
+ assert_equal line_hash_without, line_hash_with
474
+ end
475
+
476
+ test 'should merge standard mapping and normal mapping' do
477
+ line_hash = TestMapper.new.mapped_line(['Smith'], standard_mapping_merge)
478
+ assert_equal 'SMITH', line_hash['surname']
479
+ assert_equal 'Smith', line_hash['surname2']
480
+ end
481
+
482
+ test 'should merge standard mapping in correct order' do
483
+ line_hash = TestMapper.new.mapped_line(['Smith'], standard_mapping_column)
484
+ assert_equal 'Smith', line_hash[:rawtext]['overriding_column_name']
485
+ refute line_hash[:rawtext].include?('standard_mapping_column_name')
486
+ end
487
+
488
+ test 'should raise duplicate priority exception' do
489
+ assert_raise(RuntimeError) do
490
+ TestMapper.new.mapped_line(%w(A B), invalid_priorities)
491
+ end
492
+ end
493
+
494
+ test 'should raise nonexistent standard mapping exception' do
495
+ assert_raise(RuntimeError) do
496
+ TestMapper.new.mapped_line(['A'], invalid_standard_mapping)
497
+ end
498
+ end
499
+
500
+ test 'should join blank first field with compacting' do
501
+ line_hash = TestMapper.new.mapped_line(['', 'CB3 0DS'], joined_mapping_blank_start)
502
+ assert_equal 'CB3 0DS', line_hash['address']
503
+ end
504
+
505
+ test 'should join blank first field without compacting' do
506
+ line_hash = TestMapper.new.mapped_line(['', 'CB3 0DS'], joined_mapping_blank_start_uncompacted)
507
+ assert_equal ',CB3 0DS', line_hash['address']
508
+ end
509
+
510
+ test 'line mapping should map date formats correctly' do
511
+ real_date = Date.new(1927, 7, 6)
512
+ incomings = %w( 06/07/1927 19270706 07/06/1927 06/07/27 06/JUL/27 )
513
+ columns = %w( dateofbirth receiveddate americandate shortdate funkydate )
514
+ line_hash = TestMapper.new.mapped_line(incomings, date_mapping)
515
+
516
+ columns.each do |column_name|
517
+ assert_equal real_date, line_hash[column_name].to_date
518
+ end
519
+ end
520
+
521
+ test 'should ignore columns marked do not capture' do
522
+ line_hash = TestMapper.new.mapped_line(['rubbish'], do_not_capture_column)
523
+ refute line_hash[:rawtext].include?('ignore_me')
524
+ end
525
+
526
+ test 'should decode base64 encoded word document' do
527
+ test_file = @permanent_test_files.join('hello_world.doc')
528
+ encoded_content = Base64.encode64(File.binread(test_file))
529
+ line_hash = TestMapper.new.mapped_line([encoded_content], base64_mapping)
530
+ assert_equal 'Hello world, this is a word document', line_hash[:rawtext]['base64']
531
+ end
532
+
533
+ test 'should decode word.doc' do
534
+ test_file = @permanent_test_files.join('hello_world.doc')
535
+ file_content = File.binread(test_file)
536
+ text_content = TestMapper.new.send(:decode_raw_value, file_content, :word_doc)
537
+ assert_equal 'Hello world, this is a word document', text_content
538
+ end
539
+
540
+ test 'should read word.doc stream' do
541
+ test_file = @permanent_test_files.join('hello_world.doc')
542
+ file_content = TestMapper.new.send(:read_word_stream, File.open(test_file, 'r'))
543
+ assert_equal 'Hello world, this is a word document', file_content
544
+ end
545
+
546
+ test 'should raise unknown encoding exception' do
547
+ assert_raise(RuntimeError) do
548
+ TestMapper.new.mapped_line(['A'], invalid_decode_mapping)
549
+ end
550
+ end
551
+ end