ndr_import 6.3.0 → 6.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/code_safety.yml +4 -4
- data/lib/ndr_import/mapper.rb +16 -3
- data/lib/ndr_import/version.rb +1 -1
- data/test/mapper_test.rb +10 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5f9be51a2c9d3ec3312fb4f2be9435f5fdddecdd76f35a71cdd321d40f4a39d5
|
4
|
+
data.tar.gz: 9ca49c658395de4d43a268219fde8abf9ef2b38d1494945d0241de4ebcc6fed5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b5f4591fcd249ef471379f33553f7b22cc392058a1adf2b1eac0fb4ecf9597d02edac84f812ddb80eecc2a170cf08f0b6c32bc2df3043fa4b16bf77e5fd3c7d7
|
7
|
+
data.tar.gz: 60188e2b35683b23a2f8142aace35b35e5e09ae49a537a1e8829262985ec3f7858a5bb9328eb37344adebe208d85e5aafa53c75d211d27eaf57f960543ca6a5e
|
data/code_safety.yml
CHANGED
@@ -141,9 +141,9 @@ file safety:
|
|
141
141
|
reviewed_by: timgentry
|
142
142
|
safe_revision: 6c6f204fab2f4232d81cb76aa523c26b0c490ae7
|
143
143
|
lib/ndr_import/mapper.rb:
|
144
|
-
comments:
|
144
|
+
comments: Writes to a Tempfile, but cleans up. Ruby will respect TMP_DIR
|
145
145
|
reviewed_by: josh.pencheon
|
146
|
-
safe_revision:
|
146
|
+
safe_revision: a41595349baf4428e63380c103b524ba2fa8d7df
|
147
147
|
lib/ndr_import/mapping_error.rb:
|
148
148
|
comments:
|
149
149
|
reviewed_by: timgentry
|
@@ -187,7 +187,7 @@ file safety:
|
|
187
187
|
lib/ndr_import/version.rb:
|
188
188
|
comments: another check?
|
189
189
|
reviewed_by: josh.pencheon
|
190
|
-
safe_revision:
|
190
|
+
safe_revision: 32f813db3eaf29fd24f6bba6e3b2a1ba137d5d1d
|
191
191
|
lib/ndr_import/xml/table.rb:
|
192
192
|
comments:
|
193
193
|
reviewed_by: josh.pencheon
|
@@ -267,7 +267,7 @@ file safety:
|
|
267
267
|
test/mapper_test.rb:
|
268
268
|
comments: exposes Mapper internals to test them
|
269
269
|
reviewed_by: josh.pencheon
|
270
|
-
safe_revision:
|
270
|
+
safe_revision: a41595349baf4428e63380c103b524ba2fa8d7df
|
271
271
|
test/non_tabular/mapping_test.rb:
|
272
272
|
comments:
|
273
273
|
reviewed_by: timgentry
|
data/lib/ndr_import/mapper.rb
CHANGED
@@ -3,6 +3,7 @@ require 'ndr_support/string/conversions'
|
|
3
3
|
require 'ndr_import/standard_mappings'
|
4
4
|
require 'base64'
|
5
5
|
require 'msworddoc-extractor'
|
6
|
+
require 'docx'
|
6
7
|
|
7
8
|
# This module provides helper logic for mapping unified sources for import into the system
|
8
9
|
module NdrImport::Mapper
|
@@ -242,11 +243,23 @@ module NdrImport::Mapper
|
|
242
243
|
end
|
243
244
|
end
|
244
245
|
|
245
|
-
# Given an IO stream representing a .doc word document,
|
246
|
-
# this method will extract the text
|
247
|
-
# as NdrImport::
|
246
|
+
# Given an IO stream representing a .doc or .docx word document,
|
247
|
+
# this method will extract the text from the document in the same way
|
248
|
+
# as NdrImport::File::Word or NdrImport::File::Docx respectively
|
248
249
|
def read_word_stream(stream)
|
249
250
|
# whole_contents adds "\n" to end of stream, we remove it
|
250
251
|
MSWordDoc::Extractor.load(stream).whole_contents.sub(/\n\z/, '')
|
252
|
+
rescue Ole::Storage::FormatError
|
253
|
+
stream.rewind
|
254
|
+
read_docx(stream)
|
255
|
+
end
|
256
|
+
|
257
|
+
def read_docx(stream)
|
258
|
+
Tempfile.create(encoding: 'ascii-8bit') do |tempfile|
|
259
|
+
tempfile.write(stream.read)
|
260
|
+
|
261
|
+
docx = ::Docx::Document.open(tempfile.path)
|
262
|
+
docx.paragraphs.map(&:to_s).join("\n")
|
263
|
+
end
|
251
264
|
end
|
252
265
|
end
|
data/lib/ndr_import/version.rb
CHANGED
data/test/mapper_test.rb
CHANGED
@@ -622,6 +622,16 @@ class MapperTest < ActiveSupport::TestCase
|
|
622
622
|
assert_equal 'Hello world, this is a word document', line_hash[:rawtext]['base64']
|
623
623
|
end
|
624
624
|
|
625
|
+
test 'should decode base64 encoded docx document' do
|
626
|
+
test_file = @permanent_test_files.join('hello_world.docx')
|
627
|
+
encoded_content = Base64.encode64(File.binread(test_file))
|
628
|
+
line_hash = TestMapper.new.mapped_line([encoded_content], base64_mapping)
|
629
|
+
expected_content = "Hello world, this is a modern word document\n" \
|
630
|
+
"With more than one line of text\nThree in fact"
|
631
|
+
|
632
|
+
assert_equal expected_content, line_hash[:rawtext]['base64']
|
633
|
+
end
|
634
|
+
|
625
635
|
test 'should decode word.doc' do
|
626
636
|
test_file = @permanent_test_files.join('hello_world.doc')
|
627
637
|
file_content = File.binread(test_file)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ndr_import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 6.
|
4
|
+
version: 6.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- NCRS Development Team
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-10-
|
11
|
+
date: 2018-10-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|