ndr_import 9.0.1 → 10.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/CODEOWNERS +2 -0
- data/.github/workflows/lint.yml +23 -0
- data/.github/workflows/test.yml +72 -0
- data/CHANGELOG.md +31 -1
- data/README.md +1 -2
- data/code_safety.yml +62 -38
- data/lib/ndr_import/csv_library.rb +25 -3
- data/lib/ndr_import/file/excel.rb +10 -12
- data/lib/ndr_import/helpers/file/delimited.rb +7 -8
- data/lib/ndr_import/helpers/file/xml.rb +19 -6
- data/lib/ndr_import/non_tabular/table.rb +1 -1
- data/lib/ndr_import/table.rb +26 -3
- data/lib/ndr_import/universal_importer_helper.rb +22 -5
- data/lib/ndr_import/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 16cdebb2e3a6809255d5d66971a2db6d5c954731f6def3c73fe0f89d8ea0b7e9
|
4
|
+
data.tar.gz: 575ba2c40ae01f99ebd48f75ff42c2cdf40871cbed87bfb2399de45d9f402e73
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 37e9bcfe3b8a5cab98bd68fc846eafd386595e0b6b79b9b4bf10ba8bc44a6d24ff998f5ab249964009bb0eebebfe0700bd6a39ddd0dfefc1260b529ff5543e63
|
7
|
+
data.tar.gz: c144108ad4d2f63c918e43ef540f30d49be26897d6c4d82bccda6128aaff5b408c3d93425612bedaef73f865292a3d2a408f0c792e35a9f548e50363804c8d10
|
data/.github/CODEOWNERS
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
name: Lint
|
2
|
+
|
3
|
+
on: [pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
rubocop:
|
7
|
+
name: RuboCop
|
8
|
+
|
9
|
+
runs-on: ubuntu-latest
|
10
|
+
|
11
|
+
steps:
|
12
|
+
- uses: actions/checkout@v2
|
13
|
+
with:
|
14
|
+
fetch-depth: 0 # fetch everything
|
15
|
+
- name: Set up Ruby
|
16
|
+
uses: ruby/setup-ruby@v1
|
17
|
+
with:
|
18
|
+
ruby-version: 3.0
|
19
|
+
- name: Install dependencies
|
20
|
+
run: bundle install
|
21
|
+
- name: Run RuboCop against BASE..HEAD changes
|
22
|
+
run: bundle exec rake rubocop:diff origin/${GITHUB_BASE_REF#*/}
|
23
|
+
|
@@ -0,0 +1,72 @@
|
|
1
|
+
name: Test
|
2
|
+
|
3
|
+
on: [push]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
test:
|
7
|
+
strategy:
|
8
|
+
fail-fast: false
|
9
|
+
matrix:
|
10
|
+
ruby-version:
|
11
|
+
- 2.6
|
12
|
+
- 2.7
|
13
|
+
- 3.0
|
14
|
+
gemfile:
|
15
|
+
- gemfiles/Gemfile.rails52
|
16
|
+
- gemfiles/Gemfile.rails60
|
17
|
+
|
18
|
+
name: Ruby ${{ matrix.ruby-version }} / Bundle ${{ matrix.gemfile }}
|
19
|
+
|
20
|
+
runs-on: ubuntu-latest
|
21
|
+
|
22
|
+
env:
|
23
|
+
BUNDLE_GEMFILE: ${{ matrix.gemfile }}
|
24
|
+
|
25
|
+
steps:
|
26
|
+
- uses: actions/checkout@v2
|
27
|
+
- name: Set up Ruby
|
28
|
+
uses: ruby/setup-ruby@v1
|
29
|
+
with:
|
30
|
+
ruby-version: ${{ matrix.ruby-version }}
|
31
|
+
- name: Install dependencies
|
32
|
+
run: bundle install
|
33
|
+
- name: Run tests
|
34
|
+
run: bundle exec rake
|
35
|
+
|
36
|
+
# A utility job upon which Branch Protection can depend,
|
37
|
+
# thus remaining agnostic of the matrix.
|
38
|
+
test_matrix:
|
39
|
+
if: ${{ always() }}
|
40
|
+
runs-on: ubuntu-latest
|
41
|
+
name: Matrix
|
42
|
+
needs: test
|
43
|
+
steps:
|
44
|
+
- name: Check build matrix status
|
45
|
+
if: ${{ needs.test.result != 'success' }}
|
46
|
+
run: exit 1
|
47
|
+
|
48
|
+
notify:
|
49
|
+
# Run only on master, but regardless of whether tests past:
|
50
|
+
if: ${{ always() && github.ref == 'refs/heads/master' }}
|
51
|
+
|
52
|
+
needs: test_matrix
|
53
|
+
|
54
|
+
runs-on: ubuntu-latest
|
55
|
+
|
56
|
+
steps:
|
57
|
+
- uses: 8398a7/action-slack@v3
|
58
|
+
with:
|
59
|
+
status: custom
|
60
|
+
fields: workflow,commit,author
|
61
|
+
custom_payload: |
|
62
|
+
{
|
63
|
+
channel: 'C7FQWGDHP',
|
64
|
+
username: 'CI – ' + '${{ github.repository }}'.split('/')[1],
|
65
|
+
icon_emoji: ':hammer_and_wrench:',
|
66
|
+
attachments: [{
|
67
|
+
color: '${{ needs.test_matrix.result }}' === 'success' ? 'good' : '${{ needs.test_matrix.result }}' === 'failure' ? 'danger' : 'warning',
|
68
|
+
text: `${process.env.AS_WORKFLOW} against \`${{ github.ref }}\` (${process.env.AS_COMMIT}) for ${{ github.actor }} resulted in *${{ needs.test_matrix.result }}*.`
|
69
|
+
}]
|
70
|
+
}
|
71
|
+
env:
|
72
|
+
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,35 @@
|
|
1
1
|
## [Unreleased]
|
2
|
-
*no
|
2
|
+
* no relevant changes
|
3
|
+
|
4
|
+
## 10.1 / 2021-03-08
|
5
|
+
* Allow optional `last_data_column` in NdrImport::Table mappings (#61)
|
6
|
+
|
7
|
+
## 10.0 / 2021-02-22
|
8
|
+
### Changed
|
9
|
+
* By default, escape any control characters found in XML (#60)
|
10
|
+
|
11
|
+
## 9.1.0 / 2021-02-01
|
12
|
+
### Added
|
13
|
+
* `CSVLibrary` is now deprecated.
|
14
|
+
* Handle xlsm files
|
15
|
+
|
16
|
+
### Fixed
|
17
|
+
* Fix `CSVLibrary.foreach` on Ruby 3.0
|
18
|
+
* Updated jekyll bundle
|
19
|
+
|
20
|
+
## 9.0.3 / 2021-01-04
|
21
|
+
### Fixed
|
22
|
+
* Address issue importing multiple files against the same table (#54)
|
23
|
+
|
24
|
+
### Changed
|
25
|
+
* ensure keyword args are properly splatted for ruby 2.7
|
26
|
+
|
27
|
+
### Added
|
28
|
+
* Ruby 2.7 to travis matrix
|
29
|
+
|
30
|
+
## 9.0.2 / 2020-08-14
|
31
|
+
### Changed
|
32
|
+
* Configure Nokogiri with HUGE for large xml files
|
3
33
|
|
4
34
|
## 9.0.1 / 2020-03-26
|
5
35
|
### Fixed
|
data/README.md
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
# NdrImport [![Build Status](https://
|
2
|
-
|
1
|
+
# NdrImport [![Build Status](https://github.com/publichealthengland/ndr_import/workflows/Test/badge.svg)](https://github.com/publichealthengland/ndr_import/actions?query=workflow%3Atest) [![Gem Version](https://badge.fury.io/rb/ndr_import.svg)](https://rubygems.org/gems/ndr_import) [![Documentation](https://img.shields.io/badge/ndr_import-docs-blue.svg)](https://www.rubydoc.info/gems/ndr_import)
|
3
2
|
This is the Public Health England (PHE) National Disease Registers (NDR) Import ETL ruby gem, providing:
|
4
3
|
|
5
4
|
1. file import handlers for *extracting* data from delimited files (csv, pipe, tab, thorn), .xls(x) spreadsheets, .doc(x) word documents, PDF, PDF AcroForms, XML, 7-Zip and Zip files.
|
data/code_safety.yml
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
---
|
2
2
|
file safety:
|
3
|
+
".github/CODEOWNERS":
|
4
|
+
comments:
|
5
|
+
reviewed_by: ollietulloch
|
6
|
+
safe_revision: b64ff21375dcde2b8fefe622ee9861f0fea21487
|
7
|
+
".github/workflows/lint.yml":
|
8
|
+
comments:
|
9
|
+
reviewed_by: ollietulloch
|
10
|
+
safe_revision: b64ff21375dcde2b8fefe622ee9861f0fea21487
|
11
|
+
".github/workflows/test.yml":
|
12
|
+
comments:
|
13
|
+
reviewed_by: ollietulloch
|
14
|
+
safe_revision: b64ff21375dcde2b8fefe622ee9861f0fea21487
|
3
15
|
".gitignore":
|
4
16
|
comments: whole file re-reviewed
|
5
17
|
reviewed_by: josh.pencheon
|
@@ -12,14 +24,10 @@ file safety:
|
|
12
24
|
comments:
|
13
25
|
reviewed_by: josh.pencheon
|
14
26
|
safe_revision: b09e268ff9c8349b914aa1b7ba888e1d39f97e4a
|
15
|
-
".travis.yml":
|
16
|
-
comments:
|
17
|
-
reviewed_by: josh.pencheon
|
18
|
-
safe_revision: d3d9a987befeecb122a448d8d06e66d74da13fb5
|
19
27
|
CHANGELOG.md:
|
20
28
|
comments:
|
21
|
-
reviewed_by:
|
22
|
-
safe_revision:
|
29
|
+
reviewed_by: ollietulloch
|
30
|
+
safe_revision: 2d093cc57a699b527a7d0159e77b91f4409a6e0b
|
23
31
|
CODE_OF_CONDUCT.md:
|
24
32
|
comments:
|
25
33
|
reviewed_by: timgentry
|
@@ -38,8 +46,8 @@ file safety:
|
|
38
46
|
safe_revision: 5d185a0aeba6a9cd2ff5e59efadcaeec9be45d8b
|
39
47
|
README.md:
|
40
48
|
comments:
|
41
|
-
reviewed_by:
|
42
|
-
safe_revision:
|
49
|
+
reviewed_by: ollietulloch
|
50
|
+
safe_revision: b64ff21375dcde2b8fefe622ee9861f0fea21487
|
43
51
|
Rakefile:
|
44
52
|
comments:
|
45
53
|
reviewed_by: josh.pencheon
|
@@ -58,8 +66,8 @@ file safety:
|
|
58
66
|
safe_revision: 02aaf91b116c510a7c16f2b6f2389736b2742f49
|
59
67
|
docs/Gemfile.lock:
|
60
68
|
comments:
|
61
|
-
reviewed_by:
|
62
|
-
safe_revision:
|
69
|
+
reviewed_by: ollietulloch
|
70
|
+
safe_revision: ea0149c7739676463a252ffd9fbe4af238762b2b
|
63
71
|
docs/_config.yml:
|
64
72
|
comments:
|
65
73
|
reviewed_by: josh.pencheon
|
@@ -146,8 +154,8 @@ file safety:
|
|
146
154
|
safe_revision: 24d6449fd0612552f132dfbf4cada2ae28d0469e
|
147
155
|
lib/ndr_import/csv_library.rb:
|
148
156
|
comments:
|
149
|
-
reviewed_by:
|
150
|
-
safe_revision:
|
157
|
+
reviewed_by: ollietulloch
|
158
|
+
safe_revision: 6b8668967dbd42d7893a0fa5f0aa1ec1c11227e1
|
151
159
|
lib/ndr_import/file/acro_form.rb:
|
152
160
|
comments:
|
153
161
|
reviewed_by: josh.pencheon
|
@@ -170,8 +178,8 @@ file safety:
|
|
170
178
|
safe_revision: 897f8b648d633368cf2001d17ab89c06a12d445b
|
171
179
|
lib/ndr_import/file/excel.rb:
|
172
180
|
comments:
|
173
|
-
reviewed_by:
|
174
|
-
safe_revision:
|
181
|
+
reviewed_by: ollietulloch
|
182
|
+
safe_revision: 37482c79448bea80033f6f69d97584df330c9861
|
175
183
|
lib/ndr_import/file/office_file_helper.rb:
|
176
184
|
comments:
|
177
185
|
reviewed_by: josh.pencheon
|
@@ -214,8 +222,8 @@ file safety:
|
|
214
222
|
safe_revision: dfc958d44b6c58355445fa395db08a62213ee709
|
215
223
|
lib/ndr_import/helpers/file/delimited.rb:
|
216
224
|
comments:
|
217
|
-
reviewed_by:
|
218
|
-
safe_revision:
|
225
|
+
reviewed_by: ollietulloch
|
226
|
+
safe_revision: 4a5cc1d362c632fc1f9242c69982fbce33557e17
|
219
227
|
lib/ndr_import/helpers/file/excel.rb:
|
220
228
|
comments:
|
221
229
|
reviewed_by: joshpencheon
|
@@ -230,8 +238,8 @@ file safety:
|
|
230
238
|
safe_revision: 45da71ebd3acbc0fe53755bcd75483ba17cb6924
|
231
239
|
lib/ndr_import/helpers/file/xml.rb:
|
232
240
|
comments:
|
233
|
-
reviewed_by:
|
234
|
-
safe_revision:
|
241
|
+
reviewed_by: joshpencheon
|
242
|
+
safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
|
235
243
|
lib/ndr_import/helpers/file/xml_streaming.rb:
|
236
244
|
comments: uses SafePath and Shellwords when accessing filesystem, or making system
|
237
245
|
calls
|
@@ -271,8 +279,8 @@ file safety:
|
|
271
279
|
safe_revision: bb44ade56a2151706eede2c31142440ccf49e6f6
|
272
280
|
lib/ndr_import/non_tabular/table.rb:
|
273
281
|
comments:
|
274
|
-
reviewed_by:
|
275
|
-
safe_revision:
|
282
|
+
reviewed_by: ollietulloch
|
283
|
+
safe_revision: f9df064adcfd38f09d83ad8c5496c84188faed98
|
276
284
|
lib/ndr_import/non_tabular_file_helper.rb:
|
277
285
|
comments:
|
278
286
|
reviewed_by: josh.pencheon
|
@@ -287,20 +295,20 @@ file safety:
|
|
287
295
|
safe_revision: 3c7f827d17aacbf7b811eea67e27553f3b039070
|
288
296
|
lib/ndr_import/table.rb:
|
289
297
|
comments: uses File.basename
|
290
|
-
reviewed_by:
|
291
|
-
safe_revision:
|
298
|
+
reviewed_by: ollietulloch
|
299
|
+
safe_revision: 3cf7473181f7f835b3dfe7822f6833d751805eaf
|
292
300
|
lib/ndr_import/universal_importer_helper.rb:
|
293
301
|
comments:
|
294
|
-
reviewed_by:
|
295
|
-
safe_revision:
|
302
|
+
reviewed_by: ollietulloch
|
303
|
+
safe_revision: 85869d99ae93252b7f3ef2d0a4db817c88d35c9e
|
296
304
|
lib/ndr_import/unmapped_data_error.rb:
|
297
305
|
comments:
|
298
306
|
reviewed_by: josh.pencheon
|
299
307
|
safe_revision: 5cd2cd0b3a1e254d30d4acc28c6731825a1f84f5
|
300
308
|
lib/ndr_import/version.rb:
|
301
309
|
comments: another check?
|
302
|
-
reviewed_by:
|
303
|
-
safe_revision:
|
310
|
+
reviewed_by: ollietulloch
|
311
|
+
safe_revision: 2d093cc57a699b527a7d0159e77b91f4409a6e0b
|
304
312
|
lib/ndr_import/xml/table.rb:
|
305
313
|
comments:
|
306
314
|
reviewed_by: josh.pencheon
|
@@ -309,6 +317,10 @@ file safety:
|
|
309
317
|
comments:
|
310
318
|
reviewed_by: josh.pencheon
|
311
319
|
safe_revision: 95e6ee9997d06471fe6f2f169c3c701471086371
|
320
|
+
test/csv_library_test.rb:
|
321
|
+
comments:
|
322
|
+
reviewed_by: ollietulloch
|
323
|
+
safe_revision: 6b8668967dbd42d7893a0fa5f0aa1ec1c11227e1
|
312
324
|
test/file/acro_form_test.rb:
|
313
325
|
comments:
|
314
326
|
reviewed_by: josh.pencheon
|
@@ -327,16 +339,16 @@ file safety:
|
|
327
339
|
safe_revision: a69d4a57ddcf13cdc13c27bd2eb91a395fa7ea36
|
328
340
|
test/file/excel_test.rb:
|
329
341
|
comments:
|
330
|
-
reviewed_by:
|
331
|
-
safe_revision:
|
342
|
+
reviewed_by: ollietulloch
|
343
|
+
safe_revision: 85a080deaa93e4220ad1bf566f29cbdac9b31c0f
|
332
344
|
test/file/pdf_test.rb:
|
333
345
|
comments:
|
334
346
|
reviewed_by: josh.pencheon
|
335
347
|
safe_revision: cb24ed3ea8116730d07f74546cd6fed0738b171d
|
336
348
|
test/file/registry_test.rb:
|
337
349
|
comments:
|
338
|
-
reviewed_by:
|
339
|
-
safe_revision:
|
350
|
+
reviewed_by: ollietulloch
|
351
|
+
safe_revision: 85a080deaa93e4220ad1bf566f29cbdac9b31c0f
|
340
352
|
test/file/seven_zip_test.rb:
|
341
353
|
comments:
|
342
354
|
reviewed_by: josh.pencheon
|
@@ -383,8 +395,8 @@ file safety:
|
|
383
395
|
safe_revision: ae75fb49baf028ac8ce08e4bedcd3625ff3ff0cd
|
384
396
|
test/helpers/file/xml_test.rb:
|
385
397
|
comments:
|
386
|
-
reviewed_by:
|
387
|
-
safe_revision:
|
398
|
+
reviewed_by: joshpencheon
|
399
|
+
safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
|
388
400
|
test/helpers/file/zip_test.rb:
|
389
401
|
comments:
|
390
402
|
reviewed_by: josh.pencheon
|
@@ -495,8 +507,8 @@ file safety:
|
|
495
507
|
safe_revision: 71979e0a602ca5a0ce415c194f10add9959f0116
|
496
508
|
test/resources/malformed.xml:
|
497
509
|
comments:
|
498
|
-
reviewed_by:
|
499
|
-
safe_revision:
|
510
|
+
reviewed_by: joshpencheon
|
511
|
+
safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
|
500
512
|
test/resources/malformed_pipe.csv:
|
501
513
|
comments:
|
502
514
|
reviewed_by: josh.pencheon
|
@@ -553,6 +565,10 @@ file safety:
|
|
553
565
|
comments:
|
554
566
|
reviewed_by: timgentry
|
555
567
|
safe_revision: 8c30f89f0562ab120769c166d4e93ff839c055f7
|
568
|
+
test/resources/sample_xlsm.xlsm:
|
569
|
+
comments:
|
570
|
+
reviewed_by: ollietulloch
|
571
|
+
safe_revision: 85a080deaa93e4220ad1bf566f29cbdac9b31c0f
|
556
572
|
test/resources/sample_xlsx.xlsx:
|
557
573
|
comments:
|
558
574
|
reviewed_by: timgentry
|
@@ -569,6 +585,10 @@ file safety:
|
|
569
585
|
comments:
|
570
586
|
reviewed_by: timgentry
|
571
587
|
safe_revision: 31fb1935f4578729d8786eea41cf0ce0a19be1cd
|
588
|
+
test/resources/two_files_single_table_mapping.zip:
|
589
|
+
comments:
|
590
|
+
reviewed_by: ollietulloch
|
591
|
+
safe_revision: 830de0f8cb139c5f61525652b424423935cfc7ac
|
572
592
|
test/resources/txt_file_xls_extension.xls:
|
573
593
|
comments:
|
574
594
|
reviewed_by: timgentry
|
@@ -601,6 +621,10 @@ file safety:
|
|
601
621
|
comments:
|
602
622
|
reviewed_by: timgentry
|
603
623
|
safe_revision: f755c6960182f7dd460c18866cccfdf09178e860
|
624
|
+
test/resources/with-control-chars.xml:
|
625
|
+
comments:
|
626
|
+
reviewed_by: joshpencheon
|
627
|
+
safe_revision: 3947f13e0cbd17f449eba292ad343eeb82116fe9
|
604
628
|
test/resources/xlsx_file_xls_extension.xls:
|
605
629
|
comments:
|
606
630
|
reviewed_by: timgentry
|
@@ -611,16 +635,16 @@ file safety:
|
|
611
635
|
safe_revision: 3c7f827d17aacbf7b811eea67e27553f3b039070
|
612
636
|
test/table_test.rb:
|
613
637
|
comments:
|
614
|
-
reviewed_by:
|
615
|
-
safe_revision:
|
638
|
+
reviewed_by: ollietulloch
|
639
|
+
safe_revision: 3cf7473181f7f835b3dfe7822f6833d751805eaf
|
616
640
|
test/test_helper.rb:
|
617
641
|
comments:
|
618
642
|
reviewed_by: josh.pencheon
|
619
643
|
safe_revision: 93ccee82fc2165d1ca2d9b03d146ae03e769ea96
|
620
644
|
test/universal_importer_helper_test.rb:
|
621
645
|
comments:
|
622
|
-
reviewed_by:
|
623
|
-
safe_revision:
|
646
|
+
reviewed_by: ollietulloch
|
647
|
+
safe_revision: 85869d99ae93252b7f3ef2d0a4db817c88d35c9e
|
624
648
|
test/xml/table_test.rb:
|
625
649
|
comments:
|
626
650
|
reviewed_by: josh.pencheon
|
@@ -1,38 +1,60 @@
|
|
1
1
|
# This file allows us to choose the CSV library we want to use.
|
2
2
|
|
3
3
|
require 'csv'
|
4
|
+
require 'active_support/deprecation'
|
5
|
+
|
4
6
|
# Using relevant core CSV library.
|
5
|
-
CSVLibrary
|
7
|
+
class CSVLibrary < CSV; end
|
6
8
|
|
7
9
|
class << CSVLibrary
|
8
10
|
# Is the library we're using FasterCSV?
|
9
11
|
def fastercsv?
|
12
|
+
deprecate('if you desparately want fastercsv, please use it explicitly')
|
10
13
|
not self.const_defined?(:Reader)
|
11
14
|
end
|
12
15
|
|
13
16
|
# Ensure that we can pass "mode" straight through the underlying IO object
|
17
|
+
#
|
18
|
+
# Note: this could likely be refactored now, as upstream support for something
|
19
|
+
# very similar was added:
|
20
|
+
#
|
21
|
+
# https://github.com/ruby/csv/commit/b4edaf2cf1aa36f5c6264c07514b66739b87ceee
|
22
|
+
#
|
14
23
|
def foreach(path, **options, &block)
|
15
|
-
|
16
|
-
|
24
|
+
deprecate('CSV#foreach exists, with an optional `mode` argument')
|
25
|
+
return to_enum(__method__, path, **options) unless block
|
26
|
+
open(path, options.delete(:mode) || 'r', **options) do |csv|
|
17
27
|
csv.each(&block)
|
18
28
|
end
|
19
29
|
end
|
20
30
|
|
21
31
|
def write_csv_to_string(data)
|
32
|
+
deprecate('write_csv_to_string -> generate')
|
22
33
|
self.generate do |csv|
|
23
34
|
data.each { |line| csv << line }
|
24
35
|
end
|
25
36
|
end
|
26
37
|
|
27
38
|
def write_csv_to_file(data, filepath, mode = 'w')
|
39
|
+
deprecate('write_csv_to_file -> open')
|
28
40
|
self.open(filepath, mode) do |csv|
|
29
41
|
data.each { |line| csv << line }
|
30
42
|
end
|
31
43
|
end
|
32
44
|
|
33
45
|
def read_csv_from_file(filepath)
|
46
|
+
deprecate('read_csv_from_file -> read')
|
34
47
|
self.read(filepath)
|
35
48
|
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def deprecate(additional_message = nil)
|
53
|
+
ActiveSupport::Deprecation.warn(<<~MESSAGE)
|
54
|
+
CSVLibrary is deprecated, and will be removed in a future version of ndr_import.
|
55
|
+
Please use standard functionality provided by Ruby's CSV library (#{additional_message}).
|
56
|
+
MESSAGE
|
57
|
+
end
|
36
58
|
end
|
37
59
|
|
38
60
|
# Forward port CSV::Cell, as it is sometimes
|
@@ -90,14 +90,14 @@ module NdrImport
|
|
90
90
|
case SafeFile.extname(path).downcase
|
91
91
|
when '.xls'
|
92
92
|
Roo::Excel.new(SafeFile.safepath_to_string(path))
|
93
|
-
when '.xlsx'
|
93
|
+
when '.xlsm', '.xlsx'
|
94
94
|
if @options['file_password']
|
95
95
|
Roo::Excelx.new(StringIO.new(decrypted_file_string(path, @options['file_password'])))
|
96
96
|
else
|
97
97
|
Roo::Excelx.new(SafeFile.safepath_to_string(path))
|
98
98
|
end
|
99
99
|
else
|
100
|
-
|
100
|
+
raise "Received file path with unexpected extension #{SafeFile.extname(path)}"
|
101
101
|
end
|
102
102
|
rescue Ole::Storage::FormatError => e
|
103
103
|
# TODO: Do we need to remove the new_file after using it?
|
@@ -105,16 +105,14 @@ module NdrImport
|
|
105
105
|
# try to load the .xls file as an .xlsx file, useful for sources like USOM
|
106
106
|
# roo check file extensions in file_type_check (GenericSpreadsheet),
|
107
107
|
# so we create a duplicate file in xlsx extension
|
108
|
-
|
109
|
-
new_file_name = SafeFile.basename(path).gsub(/(.*)\.xls$/, '\1_amend.xlsx')
|
110
|
-
new_file_path = SafeFile.dirname(path).join(new_file_name)
|
111
|
-
copy_file(path, new_file_path)
|
108
|
+
raise e.message unless /(.*)\.xls$/.match(path)
|
112
109
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
110
|
+
new_file_name = SafeFile.basename(path).gsub(/(.*)\.xls$/, '\1_amend.xlsx')
|
111
|
+
new_file_path = SafeFile.dirname(path).join(new_file_name)
|
112
|
+
copy_file(path, new_file_path)
|
113
|
+
|
114
|
+
load_workbook(new_file_path)
|
115
|
+
rescue RuntimeError, ::Zip::Error => e
|
118
116
|
raise ["Unable to read the file '#{path}'", e.message].join('; ')
|
119
117
|
end
|
120
118
|
|
@@ -133,6 +131,6 @@ module NdrImport
|
|
133
131
|
end
|
134
132
|
end
|
135
133
|
|
136
|
-
Registry.register(Excel, 'xls', 'xlsx')
|
134
|
+
Registry.register(Excel, 'xls', 'xlsm', 'xlsx')
|
137
135
|
end
|
138
136
|
end
|
@@ -32,11 +32,11 @@ module NdrImport
|
|
32
32
|
return enum_for(:delimited_rows, path, col_sep, liberal) unless block_given?
|
33
33
|
|
34
34
|
safe_path = SafeFile.safepath_to_string(path)
|
35
|
-
|
35
|
+
options = determine_encodings!(safe_path, col_sep, liberal)
|
36
36
|
|
37
|
-
# By now, we know `
|
37
|
+
# By now, we know `options` should let us read the whole
|
38
38
|
# file succesfully; if there are problems, we should crash.
|
39
|
-
|
39
|
+
CSV.foreach(safe_path, options.delete(:mode), **options) do |line|
|
40
40
|
yield line.map(&:to_s)
|
41
41
|
end
|
42
42
|
end
|
@@ -46,7 +46,7 @@ module NdrImport
|
|
46
46
|
# Derive the source encoding by trying all supported encodings.
|
47
47
|
# Returns first set of working options, or raises if none could be found.
|
48
48
|
def determine_encodings!(safe_path, col_sep, liberal)
|
49
|
-
# delimiter encoding => #
|
49
|
+
# delimiter encoding => # CSV encoding string
|
50
50
|
supported_encodings = {
|
51
51
|
'UTF-8' => 'r:bom|utf-8',
|
52
52
|
'Windows-1252' => 'r:windows-1252:utf-8'
|
@@ -67,14 +67,13 @@ module NdrImport
|
|
67
67
|
begin
|
68
68
|
options = {
|
69
69
|
col_sep: (col_sep || ',').force_encoding(delimiter_encoding),
|
70
|
-
liberal_parsing: liberal
|
71
|
-
mode: access_mode
|
70
|
+
liberal_parsing: liberal
|
72
71
|
}
|
73
72
|
|
74
73
|
row_num = 0
|
75
74
|
# Iterate through the file; if we reach the end, this encoding worked:
|
76
|
-
|
77
|
-
return options
|
75
|
+
CSV.foreach(safe_path, access_mode, **options) { |_line| row_num += 1 }
|
76
|
+
return options.merge(mode: access_mode)
|
78
77
|
rescue ArgumentError => e
|
79
78
|
next if e.message =~ /invalid byte sequence/ # This encoding didn't work
|
80
79
|
raise(e)
|
@@ -10,15 +10,20 @@ module NdrImport
|
|
10
10
|
|
11
11
|
private
|
12
12
|
|
13
|
-
|
14
|
-
|
13
|
+
# By default, escapes any control characters found in the XML
|
14
|
+
# - their use is forbidden in XML 1.0, and highly discouraged
|
15
|
+
# in XML 1.1; any found are most likely to be erroneous.
|
16
|
+
def read_xml_file(path, preserve_control_chars: false)
|
17
|
+
file_data = ensure_utf8!(SafeFile.read(path))
|
18
|
+
escape_xml_control_chars!(file_data) unless preserve_control_chars
|
15
19
|
|
16
20
|
require 'nokogiri'
|
17
21
|
|
18
|
-
Nokogiri::XML(
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
+
doc = Nokogiri::XML(file_data, &:huge)
|
23
|
+
doc.encoding = 'UTF-8'
|
24
|
+
emulate_strict_mode_fatal_check!(doc)
|
25
|
+
|
26
|
+
doc
|
22
27
|
end
|
23
28
|
|
24
29
|
# Nokogiri can use give a `STRICT` parse option to libxml, but our friendly
|
@@ -37,11 +42,19 @@ module NdrImport
|
|
37
42
|
end
|
38
43
|
|
39
44
|
return unless fatal_errors.any?
|
45
|
+
|
40
46
|
raise Nokogiri::XML::SyntaxError, <<~MSG
|
41
47
|
The file had #{fatal_errors.length} fatal error(s)!"
|
42
48
|
#{fatal_errors.join("\n")}
|
43
49
|
MSG
|
44
50
|
end
|
51
|
+
|
52
|
+
# In place, escape out any control chars that would cause
|
53
|
+
# libxml to crash. Very few are allowable in XML 1.0, and
|
54
|
+
# remain heavily discouraged in XML 1.1.
|
55
|
+
def escape_xml_control_chars!(data)
|
56
|
+
escape_control_chars!(data)
|
57
|
+
end
|
45
58
|
end
|
46
59
|
end
|
47
60
|
end
|
@@ -16,7 +16,7 @@ module NdrImport
|
|
16
16
|
|
17
17
|
include UTF8Encoding
|
18
18
|
|
19
|
-
TABULAR_ONLY_OPTIONS = %w[delimiter liberal_parsing tablename_pattern
|
19
|
+
TABULAR_ONLY_OPTIONS = %w[delimiter last_data_column liberal_parsing tablename_pattern
|
20
20
|
header_lines footer_lines xml_record_xpath].freeze
|
21
21
|
|
22
22
|
NON_TABULAR_OPTIONS = %w[capture_end_line capture_start_line start_line_pattern
|
data/lib/ndr_import/table.rb
CHANGED
@@ -10,8 +10,9 @@ module NdrImport
|
|
10
10
|
include NdrImport::Mapper
|
11
11
|
|
12
12
|
def self.all_valid_options
|
13
|
-
%w[canonical_name delimiter liberal_parsing filename_pattern file_password
|
14
|
-
header_lines footer_lines format klass columns xml_record_xpath
|
13
|
+
%w[canonical_name delimiter liberal_parsing filename_pattern file_password last_data_column
|
14
|
+
tablename_pattern header_lines footer_lines format klass columns xml_record_xpath
|
15
|
+
row_identifier]
|
15
16
|
end
|
16
17
|
|
17
18
|
def all_valid_options
|
@@ -50,8 +51,9 @@ module NdrImport
|
|
50
51
|
@header_best_guess = nil
|
51
52
|
@notifier.try(:started)
|
52
53
|
|
54
|
+
last_col = last_column_to_transform
|
53
55
|
skip_footer_lines(lines, footer_lines).each do |line|
|
54
|
-
process_line(line, &block)
|
56
|
+
line.is_a?(Array) ? process_line(line[0..last_col], &block) : process_line(line, &block)
|
55
57
|
end
|
56
58
|
|
57
59
|
@notifier.try(:finished)
|
@@ -226,5 +228,26 @@ module NdrImport
|
|
226
228
|
def column_names(column_mappings)
|
227
229
|
column_mappings.map { |c| (c['column'] || c['standard_mapping']).downcase }
|
228
230
|
end
|
231
|
+
|
232
|
+
# If specified in the mapping, stop transforming data at a given index (column)
|
233
|
+
def last_column_to_transform
|
234
|
+
return -1 if last_data_column.nil?
|
235
|
+
return last_data_column - 1 if last_data_column.is_a?(Integer)
|
236
|
+
|
237
|
+
error = "Unknown 'last_data_column' format: #{last_data_column} " \
|
238
|
+
"(#{last_data_column.class})"
|
239
|
+
raise error unless last_data_column.is_a?(String) && last_data_column =~ /\A[A-Z]+\z/i
|
240
|
+
|
241
|
+
# If it's an excel column label (eg 'K', 'AF', 'DDE'), convert it to an index
|
242
|
+
index_from_column_label
|
243
|
+
end
|
244
|
+
|
245
|
+
def index_from_column_label
|
246
|
+
alphabet_index_hash = ('A'..'Z').map.with_index.to_h
|
247
|
+
index = last_data_column.upcase.chars.inject(0) do |char_index, char|
|
248
|
+
(char_index * 26) + (alphabet_index_hash[char] + 1)
|
249
|
+
end
|
250
|
+
index - 1
|
251
|
+
end
|
229
252
|
end # class Table
|
230
253
|
end
|
@@ -7,11 +7,30 @@ module NdrImport
|
|
7
7
|
# complexity of enumerating over files and tables (which should be universally useful).
|
8
8
|
# It is assumed that the host module/class defines `unzip_path`.
|
9
9
|
module UniversalImporterHelper
|
10
|
+
# Helper class to allow multiple source enumerators to contribute to one overall table.
|
11
|
+
class TableEnumProxy
|
12
|
+
include Enumerable
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
@table_enums = []
|
16
|
+
end
|
17
|
+
|
18
|
+
def add_table_enum(table_enum)
|
19
|
+
@table_enums << table_enum
|
20
|
+
end
|
21
|
+
|
22
|
+
def each(&block)
|
23
|
+
return enum_for(:each) unless block
|
24
|
+
|
25
|
+
@table_enums.each { |table_enum| table_enum.each(&block) }
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
10
29
|
def table_enumerators(filename)
|
11
|
-
table_enumerators = {}
|
30
|
+
table_enumerators = Hash.new { |hash, key| hash[key] = TableEnumProxy.new }
|
12
31
|
|
13
32
|
extract(filename).each do |table, rows|
|
14
|
-
table_enumerators[table.canonical_name]
|
33
|
+
table_enumerators[table.canonical_name].add_table_enum table.transform(rows)
|
15
34
|
end
|
16
35
|
|
17
36
|
table_enumerators
|
@@ -29,9 +48,7 @@ module NdrImport
|
|
29
48
|
def extract(source_file, &block)
|
30
49
|
return enum_for(:extract, source_file) unless block
|
31
50
|
|
32
|
-
|
33
|
-
'unzip_path' => unzip_path)
|
34
|
-
files.each do |filename|
|
51
|
+
NdrImport::File::Registry.files(source_file, 'unzip_path' => unzip_path).each do |filename|
|
35
52
|
# now at the individual file level, can we find the table mapping?
|
36
53
|
table_mapping = get_table_mapping(filename, nil)
|
37
54
|
|
data/lib/ndr_import/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ndr_import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: '10.1'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- NCRS Development Team
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activemodel
|
@@ -365,6 +365,9 @@ executables:
|
|
365
365
|
extensions: []
|
366
366
|
extra_rdoc_files: []
|
367
367
|
files:
|
368
|
+
- ".github/CODEOWNERS"
|
369
|
+
- ".github/workflows/lint.yml"
|
370
|
+
- ".github/workflows/test.yml"
|
368
371
|
- ".gitignore"
|
369
372
|
- ".hound.yml"
|
370
373
|
- ".rubocop.yml"
|