ndr_import 6.3.0 → 6.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9ae1ff625cdd7352f4b3306f9a49905b2563557b487762611dfd0391735177ee
4
- data.tar.gz: a7bdc413db721c0f813b9fd7ef862beff3cf6a9a76f042eef15d141b2526f5a7
3
+ metadata.gz: 5f9be51a2c9d3ec3312fb4f2be9435f5fdddecdd76f35a71cdd321d40f4a39d5
4
+ data.tar.gz: 9ca49c658395de4d43a268219fde8abf9ef2b38d1494945d0241de4ebcc6fed5
5
5
  SHA512:
6
- metadata.gz: 3d572905a49329c295a9ab701de99a4f82a9165a399b05ca6f80e48218e2c603fddca14768d4aad2e9139ac07cdbaa6c5ba1f1851527cc50f7f6f4b45ed2392e
7
- data.tar.gz: 3414e9efa2b16b60e9d532075fca1df095bdcab20516738243fc9ed9ac5b30540b3ad1f9139feaedfd3f5c3182c049e2deb5f5cd878a67360bdefaeffd9c6452
6
+ metadata.gz: b5f4591fcd249ef471379f33553f7b22cc392058a1adf2b1eac0fb4ecf9597d02edac84f812ddb80eecc2a170cf08f0b6c32bc2df3043fa4b16bf77e5fd3c7d7
7
+ data.tar.gz: 60188e2b35683b23a2f8142aace35b35e5e09ae49a537a1e8829262985ec3f7858a5bb9328eb37344adebe208d85e5aafa53c75d211d27eaf57f960543ca6a5e
data/code_safety.yml CHANGED
@@ -141,9 +141,9 @@ file safety:
141
141
  reviewed_by: timgentry
142
142
  safe_revision: 6c6f204fab2f4232d81cb76aa523c26b0c490ae7
143
143
  lib/ndr_import/mapper.rb:
144
- comments:
144
+ comments: Writes to a Tempfile, but cleans up. Ruby will respect TMP_DIR
145
145
  reviewed_by: josh.pencheon
146
- safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
146
+ safe_revision: a41595349baf4428e63380c103b524ba2fa8d7df
147
147
  lib/ndr_import/mapping_error.rb:
148
148
  comments:
149
149
  reviewed_by: timgentry
@@ -187,7 +187,7 @@ file safety:
187
187
  lib/ndr_import/version.rb:
188
188
  comments: another check?
189
189
  reviewed_by: josh.pencheon
190
- safe_revision: 0b1ab7c810d0fa46d153238d69627c07f56d1efa
190
+ safe_revision: 32f813db3eaf29fd24f6bba6e3b2a1ba137d5d1d
191
191
  lib/ndr_import/xml/table.rb:
192
192
  comments:
193
193
  reviewed_by: josh.pencheon
@@ -267,7 +267,7 @@ file safety:
267
267
  test/mapper_test.rb:
268
268
  comments: exposes Mapper internals to test them
269
269
  reviewed_by: josh.pencheon
270
- safe_revision: 4ab72f84201c2d5f0147b7dfd041f488f6ff0422
270
+ safe_revision: a41595349baf4428e63380c103b524ba2fa8d7df
271
271
  test/non_tabular/mapping_test.rb:
272
272
  comments:
273
273
  reviewed_by: timgentry
@@ -3,6 +3,7 @@ require 'ndr_support/string/conversions'
3
3
  require 'ndr_import/standard_mappings'
4
4
  require 'base64'
5
5
  require 'msworddoc-extractor'
6
+ require 'docx'
6
7
 
7
8
  # This module provides helper logic for mapping unified sources for import into the system
8
9
  module NdrImport::Mapper
@@ -242,11 +243,23 @@ module NdrImport::Mapper
242
243
  end
243
244
  end
244
245
 
245
- # Given an IO stream representing a .doc word document,
246
- # this method will extract the text for the document in the same way
247
- # as NdrImport::Helpers::File::Word#read_word_file
246
+ # Given an IO stream representing a .doc or .docx word document,
247
+ # this method will extract the text from the document in the same way
248
+ # as NdrImport::File::Word or NdrImport::File::Docx respectively
248
249
  def read_word_stream(stream)
249
250
  # whole_contents adds "\n" to end of stream, we remove it
250
251
  MSWordDoc::Extractor.load(stream).whole_contents.sub(/\n\z/, '')
252
+ rescue Ole::Storage::FormatError
253
+ stream.rewind
254
+ read_docx(stream)
255
+ end
256
+
257
+ def read_docx(stream)
258
+ Tempfile.create(encoding: 'ascii-8bit') do |tempfile|
259
+ tempfile.write(stream.read)
260
+
261
+ docx = ::Docx::Document.open(tempfile.path)
262
+ docx.paragraphs.map(&:to_s).join("\n")
263
+ end
251
264
  end
252
265
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  # This stores the current version of the NdrImport gem
3
3
  module NdrImport
4
- VERSION = '6.3.0'.freeze
4
+ VERSION = '6.4.0'.freeze
5
5
  end
data/test/mapper_test.rb CHANGED
@@ -622,6 +622,16 @@ class MapperTest < ActiveSupport::TestCase
622
622
  assert_equal 'Hello world, this is a word document', line_hash[:rawtext]['base64']
623
623
  end
624
624
 
625
+ test 'should decode base64 encoded docx document' do
626
+ test_file = @permanent_test_files.join('hello_world.docx')
627
+ encoded_content = Base64.encode64(File.binread(test_file))
628
+ line_hash = TestMapper.new.mapped_line([encoded_content], base64_mapping)
629
+ expected_content = "Hello world, this is a modern word document\n" \
630
+ "With more than one line of text\nThree in fact"
631
+
632
+ assert_equal expected_content, line_hash[:rawtext]['base64']
633
+ end
634
+
625
635
  test 'should decode word.doc' do
626
636
  test_file = @permanent_test_files.join('hello_world.doc')
627
637
  file_content = File.binread(test_file)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ndr_import
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.3.0
4
+ version: 6.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - NCRS Development Team
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-12 00:00:00.000000000 Z
11
+ date: 2018-10-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport