ndr_import 9.0.0 → 10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 500566629ada1bfbab0def117060fec471bf2fab2c320a112b05a97a8474ee95
4
- data.tar.gz: 572dc475c3704f2f749e9de11e031829e593d1e2a6fa24a6755342f2f9bdbaae
3
+ metadata.gz: b98a9642fed703edb02ce4bc18d5c15869f1dd10d0e072866a84a0b6b9529141
4
+ data.tar.gz: 8c4aa215b0e87ca31676a96c703789bfb93d22bf3fa32b44ee7169a4ccfa4607
5
5
  SHA512:
6
- metadata.gz: 8c8491043c7e58d0ca4953c58ff12914c1dd8deb587c67a9cbd488e8420b26f9e8ea55f7b23cbea68bf02956c3215602932dee5ffd23506d4017ac997378b835
7
- data.tar.gz: 03c24de311a201a871f752773f0d43c38e2c9e49d31585759c0b009cc76adbf6668350d0bc7c6c2746f9b84998bd384055b3fa47806fc242a3fa49fdb3ed4851
6
+ metadata.gz: da1b4ae10264ac9a5ff7d09832c979f81608c6e428052bbe1dc403f5d5cc0d9c1f44348b59d02be340cc48a277cf2bfe84f5fd80560f7ebc3b8379b529f65a4f
7
+ data.tar.gz: b969c50b4aec9687571c53f1b49b6798ff65ef2c1d4edeb36ab535e8af59a2387fe8d4a5941ef9c1257686e59c990b30de69a1423779dd8bf77ccfc94bd04786
@@ -0,0 +1,2 @@
1
+ # Admins should have oversight of the version:
2
+ lib/ndr_import/version.rb @publichealthengland/ndr-admins
@@ -0,0 +1,23 @@
1
+ name: Lint
2
+
3
+ on: [pull_request]
4
+
5
+ jobs:
6
+ rubocop:
7
+ name: RuboCop
8
+
9
+ runs-on: ubuntu-latest
10
+
11
+ steps:
12
+ - uses: actions/checkout@v2
13
+ with:
14
+ fetch-depth: 0 # fetch everything
15
+ - name: Set up Ruby
16
+ uses: ruby/setup-ruby@v1
17
+ with:
18
+ ruby-version: 3.0
19
+ - name: Install dependencies
20
+ run: bundle install
21
+ - name: Run RuboCop against BASE..HEAD changes
22
+ run: bundle exec rake rubocop:diff origin/${GITHUB_BASE_REF#*/}
23
+
@@ -0,0 +1,72 @@
1
+ name: Test
2
+
3
+ on: [push]
4
+
5
+ jobs:
6
+ test:
7
+ strategy:
8
+ fail-fast: false
9
+ matrix:
10
+ ruby-version:
11
+ - 2.6
12
+ - 2.7
13
+ - 3.0
14
+ gemfile:
15
+ - gemfiles/Gemfile.rails52
16
+ - gemfiles/Gemfile.rails60
17
+
18
+ name: Ruby ${{ matrix.ruby-version }} / Bundle ${{ matrix.gemfile }}
19
+
20
+ runs-on: ubuntu-latest
21
+
22
+ env:
23
+ BUNDLE_GEMFILE: ${{ matrix.gemfile }}
24
+
25
+ steps:
26
+ - uses: actions/checkout@v2
27
+ - name: Set up Ruby
28
+ uses: ruby/setup-ruby@v1
29
+ with:
30
+ ruby-version: ${{ matrix.ruby-version }}
31
+ - name: Install dependencies
32
+ run: bundle install
33
+ - name: Run tests
34
+ run: bundle exec rake
35
+
36
+ # A utility job upon which Branch Protection can depend,
37
+ # thus remaining agnostic of the matrix.
38
+ test_matrix:
39
+ if: ${{ always() }}
40
+ runs-on: ubuntu-latest
41
+ name: Matrix
42
+ needs: test
43
+ steps:
44
+ - name: Check build matrix status
45
+ if: ${{ needs.test.result != 'success' }}
46
+ run: exit 1
47
+
48
+ notify:
49
+ # Run only on master, but regardless of whether tests past:
50
+ if: ${{ always() && github.ref == 'refs/heads/master' }}
51
+
52
+ needs: test_matrix
53
+
54
+ runs-on: ubuntu-latest
55
+
56
+ steps:
57
+ - uses: 8398a7/action-slack@v3
58
+ with:
59
+ status: custom
60
+ fields: workflow,commit,author
61
+ custom_payload: |
62
+ {
63
+ channel: 'C7FQWGDHP',
64
+ username: 'CI – ' + '${{ github.repository }}'.split('/')[1],
65
+ icon_emoji: ':hammer_and_wrench:',
66
+ attachments: [{
67
+ color: '${{ needs.test_matrix.result }}' === 'success' ? 'good' : '${{ needs.test_matrix.result }}' === 'failure' ? 'danger' : 'warning',
68
+ text: `${process.env.AS_WORKFLOW} against \`${{ github.ref }}\` (${process.env.AS_COMMIT}) for ${{ github.actor }} resulted in *${{ needs.test_matrix.result }}*.`
69
+ }]
70
+ }
71
+ env:
72
+ SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
data/.gitignore CHANGED
@@ -5,7 +5,7 @@
5
5
  /gemfiles/Gemfile.*.lock
6
6
  /_yardoc/
7
7
  /coverage/
8
- /doc/
8
+ /docs/_site/
9
9
  /pkg/
10
10
  /spec/reports/
11
11
  /tmp/
data/CHANGELOG.md CHANGED
@@ -1,6 +1,37 @@
1
1
  ## [Unreleased]
2
2
  *no unreleased changes*
3
3
 
4
+ ## 10.0 / 2021-02-22
5
+ ### Changed
6
+ * By default, escape any control characters found in XML (#60)
7
+
8
+ ## 9.1.0 / 2021-02-01
9
+ ### Added
10
+ * `CSVLibrary` is now deprecated.
11
+ * Handle xlsm files
12
+
13
+ ### Fixed
14
+ * Fix `CSVLibrary.foreach` on Ruby 3.0
15
+ * Updated jekyll bundle
16
+
17
+ ## 9.0.3 / 2021-01-04
18
+ ### Fixed
19
+ * Address issue importing multiple files against the same table (#54)
20
+
21
+ ### Changed
22
+ * ensure keyword args are properly splatted for ruby 2.7
23
+
24
+ ### Added
25
+ * Ruby 2.7 to travis matrix
26
+
27
+ ## 9.0.2 / 2020-08-14
28
+ ### Changed
29
+ * Configure Nokogiri with HUGE for large xml files
30
+
31
+ ## 9.0.1 / 2020-03-26
32
+ ### Fixed
33
+ * bumps to `nokogiri` / `spreadsheet` / `rubyzip` dependencies
34
+
4
35
  ## 9.0.0 / 2019-07-31
5
36
  ### Changed
6
37
  * `File::Xml` will now stream XML files by default. Use `slurp: true` for the old behaviour. (#43)
data/README.md CHANGED
@@ -1,5 +1,4 @@
1
- # NdrImport [![Build Status](https://travis-ci.org/PublicHealthEngland/ndr_import.svg?branch=master)](https://travis-ci.org/PublicHealthEngland/ndr_import) [![Gem Version](https://badge.fury.io/rb/ndr_import.svg)](https://badge.fury.io/rb/ndr_import)
2
-
1
+ # NdrImport [![Build Status](https://github.com/publichealthengland/ndr_import/workflows/Test/badge.svg)](https://github.com/publichealthengland/ndr_import/actions?query=workflow%3Atest) [![Gem Version](https://badge.fury.io/rb/ndr_import.svg)](https://rubygems.org/gems/ndr_import) [![Documentation](https://img.shields.io/badge/ndr_import-docs-blue.svg)](https://www.rubydoc.info/gems/ndr_import)
3
2
  This is the Public Health England (PHE) National Disease Registers (NDR) Import ETL ruby gem, providing:
4
3
 
5
4
  1. file import handlers for *extracting* data from delimited files (csv, pipe, tab, thorn), .xls(x) spreadsheets, .doc(x) word documents, PDF, PDF AcroForms, XML, 7-Zip and Zip files.
data/code_safety.yml CHANGED
@@ -1,9 +1,21 @@
1
1
  ---
2
2
  file safety:
3
+ ".github/CODEOWNERS":
4
+ comments:
5
+ reviewed_by: ollietulloch
6
+ safe_revision: b64ff21375dcde2b8fefe622ee9861f0fea21487
7
+ ".github/workflows/lint.yml":
8
+ comments:
9
+ reviewed_by: ollietulloch
10
+ safe_revision: b64ff21375dcde2b8fefe622ee9861f0fea21487
11
+ ".github/workflows/test.yml":
12
+ comments:
13
+ reviewed_by: ollietulloch
14
+ safe_revision: b64ff21375dcde2b8fefe622ee9861f0fea21487
3
15
  ".gitignore":
4
16
  comments: whole file re-reviewed
5
17
  reviewed_by: josh.pencheon
6
- safe_revision: 3ef51291c413fd5772d61a8394359146a02ae628
18
+ safe_revision: 02aaf91b116c510a7c16f2b6f2389736b2742f49
7
19
  ".hound.yml":
8
20
  comments:
9
21
  reviewed_by: timgentry
@@ -12,14 +24,10 @@ file safety:
12
24
  comments:
13
25
  reviewed_by: josh.pencheon
14
26
  safe_revision: b09e268ff9c8349b914aa1b7ba888e1d39f97e4a
15
- ".travis.yml":
16
- comments:
17
- reviewed_by: josh.pencheon
18
- safe_revision: d3d9a987befeecb122a448d8d06e66d74da13fb5
19
27
  CHANGELOG.md:
20
28
  comments:
21
- reviewed_by: josh.pencheon
22
- safe_revision: 0a4e05d45ee65e25edc36de84d3c450cc15dc3ed
29
+ reviewed_by: joshpencheon
30
+ safe_revision: 8ba7aae5e4839bed03ddc6837dd657ef7720e8ce
23
31
  CODE_OF_CONDUCT.md:
24
32
  comments:
25
33
  reviewed_by: timgentry
@@ -38,8 +46,8 @@ file safety:
38
46
  safe_revision: 5d185a0aeba6a9cd2ff5e59efadcaeec9be45d8b
39
47
  README.md:
40
48
  comments:
41
- reviewed_by: josh.pencheon
42
- safe_revision: e1d967c10059e8c635452838c3f3dd2b969d9ae4
49
+ reviewed_by: ollietulloch
50
+ safe_revision: b64ff21375dcde2b8fefe622ee9861f0fea21487
43
51
  Rakefile:
44
52
  comments:
45
53
  reviewed_by: josh.pencheon
@@ -52,6 +60,70 @@ file safety:
52
60
  comments:
53
61
  reviewed_by: josh.pencheon
54
62
  safe_revision: e1d967c10059e8c635452838c3f3dd2b969d9ae4
63
+ docs/Gemfile:
64
+ comments:
65
+ reviewed_by: josh.pencheon
66
+ safe_revision: 02aaf91b116c510a7c16f2b6f2389736b2742f49
67
+ docs/Gemfile.lock:
68
+ comments:
69
+ reviewed_by: ollietulloch
70
+ safe_revision: ea0149c7739676463a252ffd9fbe4af238762b2b
71
+ docs/_config.yml:
72
+ comments:
73
+ reviewed_by: josh.pencheon
74
+ safe_revision: 02aaf91b116c510a7c16f2b6f2389736b2742f49
75
+ docs/_includes/footer.html:
76
+ comments:
77
+ reviewed_by: josh.pencheon
78
+ safe_revision: 02aaf91b116c510a7c16f2b6f2389736b2742f49
79
+ docs/_includes/header.html:
80
+ comments:
81
+ reviewed_by: josh.pencheon
82
+ safe_revision: 02aaf91b116c510a7c16f2b6f2389736b2742f49
83
+ docs/capturing-data.md:
84
+ comments:
85
+ reviewed_by: josh.pencheon
86
+ safe_revision: 02aaf91b116c510a7c16f2b6f2389736b2742f49
87
+ docs/date-formats.md:
88
+ comments:
89
+ reviewed_by: josh.pencheon
90
+ safe_revision: fa21d6d967bf132800b456b585795beec80b08a3
91
+ docs/getting-started.md:
92
+ comments:
93
+ reviewed_by: josh.pencheon
94
+ safe_revision: fa21d6d967bf132800b456b585795beec80b08a3
95
+ docs/identifying-and-splitting-records.md:
96
+ comments:
97
+ reviewed_by: josh.pencheon
98
+ safe_revision: 02aaf91b116c510a7c16f2b6f2389736b2742f49
99
+ docs/inbuilt-cleaning-methods.md:
100
+ comments:
101
+ reviewed_by: josh.pencheon
102
+ safe_revision: 694b57ce14e0709fc4d31a1357f8416e98f5de91
103
+ docs/index.md:
104
+ comments:
105
+ reviewed_by: josh.pencheon
106
+ safe_revision: 02aaf91b116c510a7c16f2b6f2389736b2742f49
107
+ docs/local-code-transformation-in-yaml-mappings.md:
108
+ comments:
109
+ reviewed_by: josh.pencheon
110
+ safe_revision: 02aaf91b116c510a7c16f2b6f2389736b2742f49
111
+ docs/non-tabular-mappings.md:
112
+ comments:
113
+ reviewed_by: josh.pencheon
114
+ safe_revision: 02aaf91b116c510a7c16f2b6f2389736b2742f49
115
+ docs/priority-field-mapping.md:
116
+ comments:
117
+ reviewed_by: josh.pencheon
118
+ safe_revision: 02aaf91b116c510a7c16f2b6f2389736b2742f49
119
+ docs/standard-yaml-mappings.md:
120
+ comments:
121
+ reviewed_by: josh.pencheon
122
+ safe_revision: 02aaf91b116c510a7c16f2b6f2389736b2742f49
123
+ docs/yaml-mapping-user-guide.md:
124
+ comments:
125
+ reviewed_by: josh.pencheon
126
+ safe_revision: 02aaf91b116c510a7c16f2b6f2389736b2742f49
55
127
  exe/pdf_acro_form_to_yaml:
56
128
  comments:
57
129
  reviewed_by: josh.pencheon
@@ -82,8 +154,8 @@ file safety:
82
154
  safe_revision: 24d6449fd0612552f132dfbf4cada2ae28d0469e
83
155
  lib/ndr_import/csv_library.rb:
84
156
  comments:
85
- reviewed_by: josh.pencheon
86
- safe_revision: be12e57519d3737e8d3901d7b01485c6995708dd
157
+ reviewed_by: ollietulloch
158
+ safe_revision: 6b8668967dbd42d7893a0fa5f0aa1ec1c11227e1
87
159
  lib/ndr_import/file/acro_form.rb:
88
160
  comments:
89
161
  reviewed_by: josh.pencheon
@@ -106,8 +178,8 @@ file safety:
106
178
  safe_revision: 897f8b648d633368cf2001d17ab89c06a12d445b
107
179
  lib/ndr_import/file/excel.rb:
108
180
  comments:
109
- reviewed_by: josh.pencheon
110
- safe_revision: c3183e522bce50008df576ceb47fe4761ab8f966
181
+ reviewed_by: ollietulloch
182
+ safe_revision: 37482c79448bea80033f6f69d97584df330c9861
111
183
  lib/ndr_import/file/office_file_helper.rb:
112
184
  comments:
113
185
  reviewed_by: josh.pencheon
@@ -150,8 +222,8 @@ file safety:
150
222
  safe_revision: dfc958d44b6c58355445fa395db08a62213ee709
151
223
  lib/ndr_import/helpers/file/delimited.rb:
152
224
  comments:
153
- reviewed_by: josh.pencheon
154
- safe_revision: 607c0668f1fffd70d181bc1a31c4f56eed5f6189
225
+ reviewed_by: ollietulloch
226
+ safe_revision: 4a5cc1d362c632fc1f9242c69982fbce33557e17
155
227
  lib/ndr_import/helpers/file/excel.rb:
156
228
  comments:
157
229
  reviewed_by: joshpencheon
@@ -166,13 +238,13 @@ file safety:
166
238
  safe_revision: 45da71ebd3acbc0fe53755bcd75483ba17cb6924
167
239
  lib/ndr_import/helpers/file/xml.rb:
168
240
  comments:
169
- reviewed_by: josh.pencheon
170
- safe_revision: d2245268ec6a0e4f60c521d171a820f299632c4f
241
+ reviewed_by: joshpencheon
242
+ safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
171
243
  lib/ndr_import/helpers/file/xml_streaming.rb:
172
244
  comments: uses SafePath and Shellwords when accessing filesystem, or making system
173
245
  calls
174
246
  reviewed_by: josh.pencheon
175
- safe_revision: ae75fb49baf028ac8ce08e4bedcd3625ff3ff0cd
247
+ safe_revision: 55e502bb4445cb8b985e530e8eb26d92b574ded9
176
248
  lib/ndr_import/helpers/file/zip.rb:
177
249
  comments:
178
250
  reviewed_by: timgentry
@@ -227,16 +299,16 @@ file safety:
227
299
  safe_revision: a69d4a57ddcf13cdc13c27bd2eb91a395fa7ea36
228
300
  lib/ndr_import/universal_importer_helper.rb:
229
301
  comments:
230
- reviewed_by: josh.pencheon
231
- safe_revision: ae75fb49baf028ac8ce08e4bedcd3625ff3ff0cd
302
+ reviewed_by: ollietulloch
303
+ safe_revision: ee2e74e4ceda4ff48cbda6872a6bdf0874212c21
232
304
  lib/ndr_import/unmapped_data_error.rb:
233
305
  comments:
234
306
  reviewed_by: josh.pencheon
235
307
  safe_revision: 5cd2cd0b3a1e254d30d4acc28c6731825a1f84f5
236
308
  lib/ndr_import/version.rb:
237
309
  comments: another check?
238
- reviewed_by: josh.pencheon
239
- safe_revision: 0a4e05d45ee65e25edc36de84d3c450cc15dc3ed
310
+ reviewed_by: joshpencheon
311
+ safe_revision: 8ba7aae5e4839bed03ddc6837dd657ef7720e8ce
240
312
  lib/ndr_import/xml/table.rb:
241
313
  comments:
242
314
  reviewed_by: josh.pencheon
@@ -244,7 +316,11 @@ file safety:
244
316
  ndr_import.gemspec:
245
317
  comments:
246
318
  reviewed_by: josh.pencheon
247
- safe_revision: ae75fb49baf028ac8ce08e4bedcd3625ff3ff0cd
319
+ safe_revision: 95e6ee9997d06471fe6f2f169c3c701471086371
320
+ test/csv_library_test.rb:
321
+ comments:
322
+ reviewed_by: ollietulloch
323
+ safe_revision: 6b8668967dbd42d7893a0fa5f0aa1ec1c11227e1
248
324
  test/file/acro_form_test.rb:
249
325
  comments:
250
326
  reviewed_by: josh.pencheon
@@ -263,16 +339,16 @@ file safety:
263
339
  safe_revision: a69d4a57ddcf13cdc13c27bd2eb91a395fa7ea36
264
340
  test/file/excel_test.rb:
265
341
  comments:
266
- reviewed_by: josh.pencheon
267
- safe_revision: a69d4a57ddcf13cdc13c27bd2eb91a395fa7ea36
342
+ reviewed_by: ollietulloch
343
+ safe_revision: 85a080deaa93e4220ad1bf566f29cbdac9b31c0f
268
344
  test/file/pdf_test.rb:
269
345
  comments:
270
346
  reviewed_by: josh.pencheon
271
347
  safe_revision: cb24ed3ea8116730d07f74546cd6fed0738b171d
272
348
  test/file/registry_test.rb:
273
349
  comments:
274
- reviewed_by: josh.pencheon
275
- safe_revision: 5cd2cd0b3a1e254d30d4acc28c6731825a1f84f5
350
+ reviewed_by: ollietulloch
351
+ safe_revision: 85a080deaa93e4220ad1bf566f29cbdac9b31c0f
276
352
  test/file/seven_zip_test.rb:
277
353
  comments:
278
354
  reviewed_by: josh.pencheon
@@ -319,8 +395,8 @@ file safety:
319
395
  safe_revision: ae75fb49baf028ac8ce08e4bedcd3625ff3ff0cd
320
396
  test/helpers/file/xml_test.rb:
321
397
  comments:
322
- reviewed_by: timgentry
323
- safe_revision: 137170d443ea6bcc0afb18f62202c285ae6501eb
398
+ reviewed_by: joshpencheon
399
+ safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
324
400
  test/helpers/file/zip_test.rb:
325
401
  comments:
326
402
  reviewed_by: josh.pencheon
@@ -431,8 +507,8 @@ file safety:
431
507
  safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
432
508
  test/resources/malformed.xml:
433
509
  comments:
434
- reviewed_by: timgentry
435
- safe_revision: 137170d443ea6bcc0afb18f62202c285ae6501eb
510
+ reviewed_by: joshpencheon
511
+ safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
436
512
  test/resources/malformed_pipe.csv:
437
513
  comments:
438
514
  reviewed_by: josh.pencheon
@@ -489,6 +565,10 @@ file safety:
489
565
  comments:
490
566
  reviewed_by: timgentry
491
567
  safe_revision: 8c30f89f0562ab120769c166d4e93ff839c055f7
568
+ test/resources/sample_xlsm.xlsm:
569
+ comments:
570
+ reviewed_by: ollietulloch
571
+ safe_revision: 85a080deaa93e4220ad1bf566f29cbdac9b31c0f
492
572
  test/resources/sample_xlsx.xlsx:
493
573
  comments:
494
574
  reviewed_by: timgentry
@@ -505,6 +585,10 @@ file safety:
505
585
  comments:
506
586
  reviewed_by: timgentry
507
587
  safe_revision: 31fb1935f4578729d8786eea41cf0ce0a19be1cd
588
+ test/resources/two_files_single_table_mapping.zip:
589
+ comments:
590
+ reviewed_by: ollietulloch
591
+ safe_revision: 830de0f8cb139c5f61525652b424423935cfc7ac
508
592
  test/resources/txt_file_xls_extension.xls:
509
593
  comments:
510
594
  reviewed_by: timgentry
@@ -537,6 +621,10 @@ file safety:
537
621
  comments:
538
622
  reviewed_by: timgentry
539
623
  safe_revision: f755c6960182f7dd460c18866cccfdf09178e860
624
+ test/resources/with-control-chars.xml:
625
+ comments:
626
+ reviewed_by: joshpencheon
627
+ safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
540
628
  test/resources/xlsx_file_xls_extension.xls:
541
629
  comments:
542
630
  reviewed_by: timgentry
@@ -555,8 +643,8 @@ file safety:
555
643
  safe_revision: 93ccee82fc2165d1ca2d9b03d146ae03e769ea96
556
644
  test/universal_importer_helper_test.rb:
557
645
  comments:
558
- reviewed_by: josh.pencheon
559
- safe_revision: c3183e522bce50008df576ceb47fe4761ab8f966
646
+ reviewed_by: ollietulloch
647
+ safe_revision: 830de0f8cb139c5f61525652b424423935cfc7ac
560
648
  test/xml/table_test.rb:
561
649
  comments:
562
650
  reviewed_by: josh.pencheon
@@ -1,38 +1,60 @@
1
1
  # This file allows us to choose the CSV library we want to use.
2
2
 
3
3
  require 'csv'
4
+ require 'active_support/deprecation'
5
+
4
6
  # Using relevant core CSV library.
5
- CSVLibrary = CSV
7
+ class CSVLibrary < CSV; end
6
8
 
7
9
  class << CSVLibrary
8
10
  # Is the library we're using FasterCSV?
9
11
  def fastercsv?
12
+ deprecate('if you desparately want fastercsv, please use it explicitly')
10
13
  not self.const_defined?(:Reader)
11
14
  end
12
15
 
13
16
  # Ensure that we can pass "mode" straight through the underlying IO object
17
+ #
18
+ # Note: this could likely be refactored now, as upstream support for something
19
+ # very similar was added:
20
+ #
21
+ # https://github.com/ruby/csv/commit/b4edaf2cf1aa36f5c6264c07514b66739b87ceee
22
+ #
14
23
  def foreach(path, **options, &block)
15
- return to_enum(__method__, path, options) unless block
16
- open(path, options.delete(:mode) || 'r', options) do |csv|
24
+ deprecate('CSV#foreach exists, with an optional `mode` argument')
25
+ return to_enum(__method__, path, **options) unless block
26
+ open(path, options.delete(:mode) || 'r', **options) do |csv|
17
27
  csv.each(&block)
18
28
  end
19
29
  end
20
30
 
21
31
  def write_csv_to_string(data)
32
+ deprecate('write_csv_to_string -> generate')
22
33
  self.generate do |csv|
23
34
  data.each { |line| csv << line }
24
35
  end
25
36
  end
26
37
 
27
38
  def write_csv_to_file(data, filepath, mode = 'w')
39
+ deprecate('write_csv_to_file -> open')
28
40
  self.open(filepath, mode) do |csv|
29
41
  data.each { |line| csv << line }
30
42
  end
31
43
  end
32
44
 
33
45
  def read_csv_from_file(filepath)
46
+ deprecate('read_csv_from_file -> read')
34
47
  self.read(filepath)
35
48
  end
49
+
50
+ private
51
+
52
+ def deprecate(additional_message = nil)
53
+ ActiveSupport::Deprecation.warn(<<~MESSAGE)
54
+ CSVLibrary is deprecated, and will be removed in a future version of ndr_import.
55
+ Please use standard functionality provided by Ruby's CSV library (#{additional_message}).
56
+ MESSAGE
57
+ end
36
58
  end
37
59
 
38
60
  # Forward port CSV::Cell, as it is sometimes
@@ -90,14 +90,14 @@ module NdrImport
90
90
  case SafeFile.extname(path).downcase
91
91
  when '.xls'
92
92
  Roo::Excel.new(SafeFile.safepath_to_string(path))
93
- when '.xlsx'
93
+ when '.xlsm', '.xlsx'
94
94
  if @options['file_password']
95
95
  Roo::Excelx.new(StringIO.new(decrypted_file_string(path, @options['file_password'])))
96
96
  else
97
97
  Roo::Excelx.new(SafeFile.safepath_to_string(path))
98
98
  end
99
99
  else
100
- fail "Received file path with unexpected extension #{SafeFile.extname(path)}"
100
+ raise "Received file path with unexpected extension #{SafeFile.extname(path)}"
101
101
  end
102
102
  rescue Ole::Storage::FormatError => e
103
103
  # TODO: Do we need to remove the new_file after using it?
@@ -105,16 +105,14 @@ module NdrImport
105
105
  # try to load the .xls file as an .xlsx file, useful for sources like USOM
106
106
  # roo check file extensions in file_type_check (GenericSpreadsheet),
107
107
  # so we create a duplicate file in xlsx extension
108
- if /(.*)\.xls$/.match(path)
109
- new_file_name = SafeFile.basename(path).gsub(/(.*)\.xls$/, '\1_amend.xlsx')
110
- new_file_path = SafeFile.dirname(path).join(new_file_name)
111
- copy_file(path, new_file_path)
108
+ raise e.message unless /(.*)\.xls$/.match(path)
112
109
 
113
- load_workbook(new_file_path)
114
- else
115
- raise e.message
116
- end
117
- rescue => e
110
+ new_file_name = SafeFile.basename(path).gsub(/(.*)\.xls$/, '\1_amend.xlsx')
111
+ new_file_path = SafeFile.dirname(path).join(new_file_name)
112
+ copy_file(path, new_file_path)
113
+
114
+ load_workbook(new_file_path)
115
+ rescue RuntimeError, ::Zip::Error => e
118
116
  raise ["Unable to read the file '#{path}'", e.message].join('; ')
119
117
  end
120
118
 
@@ -133,6 +131,6 @@ module NdrImport
133
131
  end
134
132
  end
135
133
 
136
- Registry.register(Excel, 'xls', 'xlsx')
134
+ Registry.register(Excel, 'xls', 'xlsm', 'xlsx')
137
135
  end
138
136
  end
@@ -32,11 +32,11 @@ module NdrImport
32
32
  return enum_for(:delimited_rows, path, col_sep, liberal) unless block_given?
33
33
 
34
34
  safe_path = SafeFile.safepath_to_string(path)
35
- encodings = determine_encodings!(safe_path, col_sep, liberal)
35
+ options = determine_encodings!(safe_path, col_sep, liberal)
36
36
 
37
- # By now, we know `encodings` should let us read the whole
37
+ # By now, we know `options` should let us read the whole
38
38
  # file succesfully; if there are problems, we should crash.
39
- CSVLibrary.foreach(safe_path, encodings) do |line|
39
+ CSV.foreach(safe_path, options.delete(:mode), **options) do |line|
40
40
  yield line.map(&:to_s)
41
41
  end
42
42
  end
@@ -46,7 +46,7 @@ module NdrImport
46
46
  # Derive the source encoding by trying all supported encodings.
47
47
  # Returns first set of working options, or raises if none could be found.
48
48
  def determine_encodings!(safe_path, col_sep, liberal)
49
- # delimiter encoding => # FasterCSV encoding string
49
+ # delimiter encoding => # CSV encoding string
50
50
  supported_encodings = {
51
51
  'UTF-8' => 'r:bom|utf-8',
52
52
  'Windows-1252' => 'r:windows-1252:utf-8'
@@ -67,14 +67,13 @@ module NdrImport
67
67
  begin
68
68
  options = {
69
69
  col_sep: (col_sep || ',').force_encoding(delimiter_encoding),
70
- liberal_parsing: liberal,
71
- mode: access_mode
70
+ liberal_parsing: liberal
72
71
  }
73
72
 
74
73
  row_num = 0
75
74
  # Iterate through the file; if we reach the end, this encoding worked:
76
- CSVLibrary.foreach(safe_path, options) { |_line| row_num += 1 }
77
- return options
75
+ CSV.foreach(safe_path, access_mode, **options) { |_line| row_num += 1 }
76
+ return options.merge(mode: access_mode)
78
77
  rescue ArgumentError => e
79
78
  next if e.message =~ /invalid byte sequence/ # This encoding didn't work
80
79
  raise(e)
@@ -10,15 +10,20 @@ module NdrImport
10
10
 
11
11
  private
12
12
 
13
- def read_xml_file(path)
14
- file_data = SafeFile.new(path).read
13
+ # By default, escapes any control characters found in the XML
14
+ # - their use is forbidden in XML 1.0, and highly discouraged
15
+ # in XML 1.1; any found are most likely to be erroneous.
16
+ def read_xml_file(path, preserve_control_chars: false)
17
+ file_data = ensure_utf8!(SafeFile.read(path))
18
+ escape_xml_control_chars!(file_data) unless preserve_control_chars
15
19
 
16
20
  require 'nokogiri'
17
21
 
18
- Nokogiri::XML(ensure_utf8! file_data).tap do |doc|
19
- doc.encoding = 'UTF-8'
20
- emulate_strict_mode_fatal_check!(doc)
21
- end
22
+ doc = Nokogiri::XML(file_data, &:huge)
23
+ doc.encoding = 'UTF-8'
24
+ emulate_strict_mode_fatal_check!(doc)
25
+
26
+ doc
22
27
  end
23
28
 
24
29
  # Nokogiri can use give a `STRICT` parse option to libxml, but our friendly
@@ -37,11 +42,19 @@ module NdrImport
37
42
  end
38
43
 
39
44
  return unless fatal_errors.any?
45
+
40
46
  raise Nokogiri::XML::SyntaxError, <<~MSG
41
47
  The file had #{fatal_errors.length} fatal error(s)!"
42
48
  #{fatal_errors.join("\n")}
43
49
  MSG
44
50
  end
51
+
52
+ # In place, escape out any control chars that would cause
53
+ # libxml to crash. Very few are allowable in XML 1.0, and
54
+ # remain heavily discouraged in XML 1.1.
55
+ def escape_xml_control_chars!(data)
56
+ escape_control_chars!(data)
57
+ end
45
58
  end
46
59
  end
47
60
  end
@@ -1,3 +1,5 @@
1
+ require 'shellwords'
2
+
1
3
  require 'ndr_support/safe_file'
2
4
  require 'ndr_support/utf8_encoding'
3
5
 
@@ -1,3 +1,5 @@
1
+ require 'shellwords'
2
+
1
3
  require 'ndr_import/file/registry'
2
4
 
3
5
  module NdrImport
@@ -5,11 +7,30 @@ module NdrImport
5
7
  # complexity of enumerating over files and tables (which should be universally useful).
6
8
  # It is assumed that the host module/class defines `unzip_path`.
7
9
  module UniversalImporterHelper
10
+ # Helper class to allow multiple source enumerators to contribute to one overall table.
11
+ class TableEnumProxy
12
+ include Enumerable
13
+
14
+ def initialize
15
+ @table_enums = []
16
+ end
17
+
18
+ def add_table_enum(table_enum)
19
+ @table_enums << table_enum
20
+ end
21
+
22
+ def each(&block)
23
+ return enum_for(:each) unless block
24
+
25
+ @table_enums.each { |table_enum| table_enum.each(&block) }
26
+ end
27
+ end
28
+
8
29
  def table_enumerators(filename)
9
- table_enumerators = {}
30
+ table_enumerators = Hash.new { |hash, key| hash[key] = TableEnumProxy.new }
10
31
 
11
32
  extract(filename).each do |table, rows|
12
- table_enumerators[table.canonical_name] = table.transform(rows)
33
+ table_enumerators[table.canonical_name].add_table_enum table.transform(rows)
13
34
  end
14
35
 
15
36
  table_enumerators
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # This stores the current version of the NdrImport gem
3
3
  module NdrImport
4
- VERSION = '9.0.0'.freeze
4
+ VERSION = '10.0'
5
5
  end
data/ndr_import.gemspec CHANGED
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
15
15
  # Specify which files should be added to the gem when it is released.
16
16
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
17
17
  spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
18
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(docs|test|spec|features)/}) }
19
19
  end
20
20
  spec.files -= %w[.travis.yml] # Not needed in the gem
21
21
  spec.bindir = 'exe'
@@ -26,7 +26,7 @@ Gem::Specification.new do |spec|
26
26
  spec.add_dependency 'activesupport', '>= 5.0', '< 7'
27
27
  spec.add_dependency 'ndr_support', '>= 5.3.2', '< 6'
28
28
 
29
- spec.add_dependency 'rubyzip', '~> 1.2', '>= 1.2.2'
29
+ spec.add_dependency 'rubyzip', '~> 2.0'
30
30
  spec.add_dependency 'roo', '~> 2.0'
31
31
 
32
32
  spec.add_dependency 'docx', '~> 0.3'
@@ -36,12 +36,12 @@ Gem::Specification.new do |spec|
36
36
  spec.add_dependency 'pdf-reader', '~> 2.1'
37
37
  spec.add_dependency 'roo-xls'
38
38
  spec.add_dependency 'seven_zip_ruby', '~> 1.2'
39
- spec.add_dependency 'spreadsheet', '1.0.3'
39
+ spec.add_dependency 'spreadsheet', '1.2.6'
40
40
 
41
41
  spec.required_ruby_version = '>= 2.5'
42
42
 
43
43
  spec.add_development_dependency 'bundler'
44
- spec.add_development_dependency 'rake', '~> 10.0'
44
+ spec.add_development_dependency 'rake', '~> 12.3', '>= 12.3.3'
45
45
  spec.add_development_dependency 'minitest'
46
46
  spec.add_development_dependency 'mocha'
47
47
  spec.add_development_dependency 'ndr_dev_support', '>= 3.1.3'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ndr_import
3
3
  version: !ruby/object:Gem::Version
4
- version: 9.0.0
4
+ version: '10.0'
5
5
  platform: ruby
6
6
  authors:
7
7
  - NCRS Development Team
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-07-31 00:00:00.000000000 Z
11
+ date: 2021-02-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activemodel
@@ -70,20 +70,14 @@ dependencies:
70
70
  requirements:
71
71
  - - "~>"
72
72
  - !ruby/object:Gem::Version
73
- version: '1.2'
74
- - - ">="
75
- - !ruby/object:Gem::Version
76
- version: 1.2.2
73
+ version: '2.0'
77
74
  type: :runtime
78
75
  prerelease: false
79
76
  version_requirements: !ruby/object:Gem::Requirement
80
77
  requirements:
81
78
  - - "~>"
82
79
  - !ruby/object:Gem::Version
83
- version: '1.2'
84
- - - ">="
85
- - !ruby/object:Gem::Version
86
- version: 1.2.2
80
+ version: '2.0'
87
81
  - !ruby/object:Gem::Dependency
88
82
  name: roo
89
83
  requirement: !ruby/object:Gem::Requirement
@@ -208,14 +202,14 @@ dependencies:
208
202
  requirements:
209
203
  - - '='
210
204
  - !ruby/object:Gem::Version
211
- version: 1.0.3
205
+ version: 1.2.6
212
206
  type: :runtime
213
207
  prerelease: false
214
208
  version_requirements: !ruby/object:Gem::Requirement
215
209
  requirements:
216
210
  - - '='
217
211
  - !ruby/object:Gem::Version
218
- version: 1.0.3
212
+ version: 1.2.6
219
213
  - !ruby/object:Gem::Dependency
220
214
  name: bundler
221
215
  requirement: !ruby/object:Gem::Requirement
@@ -236,14 +230,20 @@ dependencies:
236
230
  requirements:
237
231
  - - "~>"
238
232
  - !ruby/object:Gem::Version
239
- version: '10.0'
233
+ version: '12.3'
234
+ - - ">="
235
+ - !ruby/object:Gem::Version
236
+ version: 12.3.3
240
237
  type: :development
241
238
  prerelease: false
242
239
  version_requirements: !ruby/object:Gem::Requirement
243
240
  requirements:
244
241
  - - "~>"
245
242
  - !ruby/object:Gem::Version
246
- version: '10.0'
243
+ version: '12.3'
244
+ - - ">="
245
+ - !ruby/object:Gem::Version
246
+ version: 12.3.3
247
247
  - !ruby/object:Gem::Dependency
248
248
  name: minitest
249
249
  requirement: !ruby/object:Gem::Requirement
@@ -365,6 +365,9 @@ executables:
365
365
  extensions: []
366
366
  extra_rdoc_files: []
367
367
  files:
368
+ - ".github/CODEOWNERS"
369
+ - ".github/workflows/lint.yml"
370
+ - ".github/workflows/test.yml"
368
371
  - ".gitignore"
369
372
  - ".hound.yml"
370
373
  - ".rubocop.yml"