ndr_import 6.3.0 → 6.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/code_safety.yml +4 -4
- data/lib/ndr_import/mapper.rb +16 -3
- data/lib/ndr_import/version.rb +1 -1
- data/test/mapper_test.rb +10 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5f9be51a2c9d3ec3312fb4f2be9435f5fdddecdd76f35a71cdd321d40f4a39d5
|
4
|
+
data.tar.gz: 9ca49c658395de4d43a268219fde8abf9ef2b38d1494945d0241de4ebcc6fed5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b5f4591fcd249ef471379f33553f7b22cc392058a1adf2b1eac0fb4ecf9597d02edac84f812ddb80eecc2a170cf08f0b6c32bc2df3043fa4b16bf77e5fd3c7d7
|
7
|
+
data.tar.gz: 60188e2b35683b23a2f8142aace35b35e5e09ae49a537a1e8829262985ec3f7858a5bb9328eb37344adebe208d85e5aafa53c75d211d27eaf57f960543ca6a5e
|
data/code_safety.yml
CHANGED
@@ -141,9 +141,9 @@ file safety:
|
|
141
141
|
reviewed_by: timgentry
|
142
142
|
safe_revision: 6c6f204fab2f4232d81cb76aa523c26b0c490ae7
|
143
143
|
lib/ndr_import/mapper.rb:
|
144
|
-
comments:
|
144
|
+
comments: Writes to a Tempfile, but cleans up. Ruby will respect TMP_DIR
|
145
145
|
reviewed_by: josh.pencheon
|
146
|
-
safe_revision:
|
146
|
+
safe_revision: a41595349baf4428e63380c103b524ba2fa8d7df
|
147
147
|
lib/ndr_import/mapping_error.rb:
|
148
148
|
comments:
|
149
149
|
reviewed_by: timgentry
|
@@ -187,7 +187,7 @@ file safety:
|
|
187
187
|
lib/ndr_import/version.rb:
|
188
188
|
comments: another check?
|
189
189
|
reviewed_by: josh.pencheon
|
190
|
-
safe_revision:
|
190
|
+
safe_revision: 32f813db3eaf29fd24f6bba6e3b2a1ba137d5d1d
|
191
191
|
lib/ndr_import/xml/table.rb:
|
192
192
|
comments:
|
193
193
|
reviewed_by: josh.pencheon
|
@@ -267,7 +267,7 @@ file safety:
|
|
267
267
|
test/mapper_test.rb:
|
268
268
|
comments: exposes Mapper internals to test them
|
269
269
|
reviewed_by: josh.pencheon
|
270
|
-
safe_revision:
|
270
|
+
safe_revision: a41595349baf4428e63380c103b524ba2fa8d7df
|
271
271
|
test/non_tabular/mapping_test.rb:
|
272
272
|
comments:
|
273
273
|
reviewed_by: timgentry
|
data/lib/ndr_import/mapper.rb
CHANGED
@@ -3,6 +3,7 @@ require 'ndr_support/string/conversions'
|
|
3
3
|
require 'ndr_import/standard_mappings'
|
4
4
|
require 'base64'
|
5
5
|
require 'msworddoc-extractor'
|
6
|
+
require 'docx'
|
6
7
|
|
7
8
|
# This module provides helper logic for mapping unified sources for import into the system
|
8
9
|
module NdrImport::Mapper
|
@@ -242,11 +243,23 @@ module NdrImport::Mapper
|
|
242
243
|
end
|
243
244
|
end
|
244
245
|
|
245
|
-
# Given an IO stream representing a .doc word document,
|
246
|
-
# this method will extract the text
|
247
|
-
# as NdrImport::
|
246
|
+
# Given an IO stream representing a .doc or .docx word document,
|
247
|
+
# this method will extract the text from the document in the same way
|
248
|
+
# as NdrImport::File::Word or NdrImport::File::Docx respectively
|
248
249
|
def read_word_stream(stream)
|
249
250
|
# whole_contents adds "\n" to end of stream, we remove it
|
250
251
|
MSWordDoc::Extractor.load(stream).whole_contents.sub(/\n\z/, '')
|
252
|
+
rescue Ole::Storage::FormatError
|
253
|
+
stream.rewind
|
254
|
+
read_docx(stream)
|
255
|
+
end
|
256
|
+
|
257
|
+
def read_docx(stream)
|
258
|
+
Tempfile.create(encoding: 'ascii-8bit') do |tempfile|
|
259
|
+
tempfile.write(stream.read)
|
260
|
+
|
261
|
+
docx = ::Docx::Document.open(tempfile.path)
|
262
|
+
docx.paragraphs.map(&:to_s).join("\n")
|
263
|
+
end
|
251
264
|
end
|
252
265
|
end
|
data/lib/ndr_import/version.rb
CHANGED
data/test/mapper_test.rb
CHANGED
@@ -622,6 +622,16 @@ class MapperTest < ActiveSupport::TestCase
|
|
622
622
|
assert_equal 'Hello world, this is a word document', line_hash[:rawtext]['base64']
|
623
623
|
end
|
624
624
|
|
625
|
+
test 'should decode base64 encoded docx document' do
|
626
|
+
test_file = @permanent_test_files.join('hello_world.docx')
|
627
|
+
encoded_content = Base64.encode64(File.binread(test_file))
|
628
|
+
line_hash = TestMapper.new.mapped_line([encoded_content], base64_mapping)
|
629
|
+
expected_content = "Hello world, this is a modern word document\n" \
|
630
|
+
"With more than one line of text\nThree in fact"
|
631
|
+
|
632
|
+
assert_equal expected_content, line_hash[:rawtext]['base64']
|
633
|
+
end
|
634
|
+
|
625
635
|
test 'should decode word.doc' do
|
626
636
|
test_file = @permanent_test_files.join('hello_world.doc')
|
627
637
|
file_content = File.binread(test_file)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ndr_import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 6.
|
4
|
+
version: 6.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- NCRS Development Team
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-10-
|
11
|
+
date: 2018-10-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|