act_as_page_extractor 0.6.2 → 0.6.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/Gemfile.lock +7 -4
  4. data/act_as_page_extractor.gemspec +1 -1
  5. data/lib/act_as_page_extractor/modules/extracting.rb +8 -1
  6. data/lib/act_as_page_extractor/modules/interface.rb +1 -1
  7. data/lib/act_as_page_extractor/modules/saving.rb +1 -1
  8. data/lib/act_as_page_extractor/modules/tools.rb +1 -1
  9. data/lib/act_as_page_extractor/version.rb +1 -1
  10. data/lib/act_as_page_extractor.rb +2 -2
  11. data/spec/act_as_page_extractor_spec.rb +13 -13
  12. data/spec/support/models.rb +1 -1
  13. data/test/Oscar_Wilde_The_Happy_Prince_en.doc +0 -0
  14. data/test/Oscar_Wilde_The_Happy_Prince_en.docx +0 -0
  15. data/test/Oscar_Wilde_The_Happy_Prince_en.docx.7z +0 -0
  16. data/test/Oscar_Wilde_The_Happy_Prince_en.docx.rar +0 -0
  17. data/test/Oscar_Wilde_The_Happy_Prince_en.docx.zip +0 -0
  18. data/test/Oscar_Wilde_The_Happy_Prince_en.html +395 -0
  19. data/test/Oscar_Wilde_The_Happy_Prince_en.odt +0 -0
  20. data/test/Oscar_Wilde_The_Happy_Prince_en.pdf +0 -0
  21. data/test/Oscar_Wilde_The_Happy_Prince_en.rtf +257 -0
  22. data/test/Oscar_Wilde_The_Happy_Prince_en.txt +79 -0
  23. data/test/Oscar_Wilde_The_Happy_Prince_en.wrong +0 -0
  24. metadata +26 -26
  25. data/test/test-doc-3-pages.doc +0 -0
  26. data/test/test-doc-3-pages.docx +0 -0
  27. data/test/test-doc-3-pages.docx.7z +0 -0
  28. data/test/test-doc-3-pages.docx.rar +0 -0
  29. data/test/test-doc-3-pages.docx.zip +0 -0
  30. data/test/test-doc-3-pages.html +0 -279
  31. data/test/test-doc-3-pages.odt +0 -0
  32. data/test/test-doc-3-pages.pdf +0 -0
  33. data/test/test-doc-3-pages.rtf +0 -339
  34. data/test/test-doc-3-pages.txt +0 -125
  35. data/test/test-doc-3-pages.wrong +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d0bd64f8e12d0c7bb3a75893738e30af616e4bcc5b958b18853b35363823b5ef
4
- data.tar.gz: d87505b025bd924a545e2f6cbd9071958d65f993a234092f65b3cb7e108b16b1
3
+ metadata.gz: f412c60bccb3fca934efecbc7922af07b41297423e6a2c4fbe04b8110a0e22e8
4
+ data.tar.gz: 4b281d9c93de0955e90b1a9d500213b1fa7103c449d72354caaa3d5d29702ff5
5
5
  SHA512:
6
- metadata.gz: af0708407f3b4546424e1666926c248cdb9fe0813ede2dd642d099836282d2f608d8d47edcd5cd513cef9b3ead231c192f6b815ec7721eb141b6820f561d0f30
7
- data.tar.gz: 6a5969118ff6a6141aaaec8989e38670f75a817afa230636ed84f9f2a4e7c1f160ee569664f940e1355cfc74dff56e30370ca993fbb38d2a7139c17f56858acf
6
+ metadata.gz: b545143db8d5fd51fb4c5c3d95d76b8122576e26e6587bb0b8c1ec62303e7e7bc5509554132ba9e65d47294fd2fa7c803a01634529c8e4ca8ecf9d0b3f1a392c
7
+ data.tar.gz: 3ff648cca05fe842e97db5e5153399bcfcd34f98141cd47ad5ca511de588de67c080778b643cba9be372178a6d6fc497552082bece168d3e10be6788382c8426
data/Gemfile CHANGED
@@ -3,6 +3,8 @@ source 'https://rubygems.org'
3
3
  # Specify your gem's dependencies in total_compressor.gemspec
4
4
  gemspec
5
5
 
6
+ ruby '>= 3.2'
7
+
6
8
  gem 'activerecord', '~> 6'
7
9
 
8
10
  gem 'awesome_print'
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- act_as_page_extractor (0.6.1)
4
+ act_as_page_extractor (0.6.4)
5
5
  activerecord (~> 6)
6
6
  awesome_print (~> 1)
7
7
  docsplit (~> 0)
@@ -80,13 +80,13 @@ GEM
80
80
  zeitwerk (2.6.17)
81
81
 
82
82
  PLATFORMS
83
- ruby
83
+ x86_64-linux
84
84
 
85
85
  DEPENDENCIES
86
86
  act_as_page_extractor!
87
87
  activerecord (~> 6)
88
88
  awesome_print
89
- bundler (~> 1)
89
+ bundler (~> 2)
90
90
  byebug
91
91
  docsplit
92
92
  filesize
@@ -98,5 +98,8 @@ DEPENDENCIES
98
98
  simplecov
99
99
  total_compressor
100
100
 
101
+ RUBY VERSION
102
+ ruby 3.2.0p0
103
+
101
104
  BUNDLED WITH
102
- 1.17.3
105
+ 2.4.1
@@ -17,7 +17,7 @@ Gem::Specification.new do |spec|
17
17
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
18
  spec.require_paths = ['lib']
19
19
 
20
- spec.add_development_dependency 'bundler', '~> 1'
20
+ spec.add_development_dependency 'bundler', '~> 2'
21
21
  spec.add_development_dependency 'rake', '~> 12', '>= 12.3.3'
22
22
  spec.add_development_dependency 'byebug', '~> 0'
23
23
  spec.add_development_dependency 'rspec', '~> 0'
@@ -1,3 +1,10 @@
1
+ # Fix: https://github.com/documentcloud/docsplit/pull/159
2
+ class File
3
+ class << self
4
+ alias_method :exists?, :exist?
5
+ end
6
+ end
7
+
1
8
  module ActAsPageExtractor
2
9
  def extract_pages
3
10
  convert_to_pdf
@@ -10,7 +17,7 @@ module ActAsPageExtractor
10
17
  else
11
18
  if timeout_wrapper{ Docsplit.extract_pdf(@document_path, output: @tmp_dir)}
12
19
  pdf_path = (@document_path.split('.')[0..-2] + ['pdf']).join('.')
13
- pdf_path if File.exists?(pdf_path)
20
+ pdf_path if File.exist?(pdf_path)
14
21
  end
15
22
  end
16
23
  end
@@ -10,7 +10,7 @@ module ActAsPageExtractor
10
10
  end
11
11
 
12
12
  def remove_files
13
- FileUtils::rm_rf(pdf_path) if File.exists?(pdf_path.to_s)
13
+ FileUtils::rm_rf(pdf_path) if File.exist?(pdf_path.to_s)
14
14
  end
15
15
 
16
16
  def self.start_extraction
@@ -11,7 +11,7 @@ module ActAsPageExtractor
11
11
  end
12
12
 
13
13
  def save_to_db
14
- self.update_attributes(page_extraction_state: EXTRACTING_STATES[:extracting])
14
+ self.update(page_extraction_state: EXTRACTING_STATES[:extracting])
15
15
  ExtractedPage.transaction do
16
16
  @pdf_pages&.times&.each do |pdf_page|
17
17
  page_filename = "#{@tmp_dir}/#{@document_filename.split('.').first}_#{(pdf_page + 1).to_s}.txt"
@@ -32,7 +32,7 @@ module ActAsPageExtractor
32
32
  page_extraction_doctype: @document_path&.split('.')&.last,
33
33
  page_extraction_filesize: Filesize.from("#{File.size(@document_path)} B").pretty
34
34
  })
35
- self.update_attributes(updated_attributes)
35
+ self.update(updated_attributes)
36
36
  end
37
37
 
38
38
  def cleanup_pages
@@ -1,5 +1,5 @@
1
1
  # :nocov:
2
2
  module ActAsPageExtractor
3
- VERSION = "0.6.2"
3
+ VERSION = "0.6.4"
4
4
  end
5
5
  # :nocov:
@@ -85,13 +85,13 @@ module ActAsPageExtractor
85
85
 
86
86
  def create_pdf_dir
87
87
  if save_as_pdf
88
- FileUtils::mkdir_p(pdf_storage) unless File.exists?(pdf_storage)
88
+ FileUtils::mkdir_p(pdf_storage) unless File.exist?(pdf_storage)
89
89
  end
90
90
  end
91
91
 
92
92
  def create_tmp_dir
93
93
  @tmp_dir = "#{TMP_EXTRACTION_FILE_STORAGE}/#{SecureRandom.hex(6)}"
94
- FileUtils::mkdir_p(@tmp_dir) unless File.exists?(@tmp_dir)
94
+ FileUtils::mkdir_p(@tmp_dir) unless File.exist?(@tmp_dir)
95
95
  end
96
96
 
97
97
  def copy_document
@@ -4,24 +4,24 @@ require 'act_as_page_extractor'
4
4
  describe ActAsPageExtractor do
5
5
  context 'correct extraction' do
6
6
  [
7
- 'test-doc-3-pages.docx',
8
- 'test-doc-3-pages.doc',
9
- 'test-doc-3-pages.pdf',
10
- 'test-doc-3-pages.rtf',
11
- 'test-doc-3-pages.odt',
12
- 'test-doc-3-pages.html',
13
- 'test-doc-3-pages.txt',
14
- 'test-doc-3-pages.docx.zip',
15
- 'test-doc-3-pages.docx.rar',
16
- 'test-doc-3-pages.docx.7z'
7
+ 'Oscar_Wilde_The_Happy_Prince_en.docx',
8
+ 'Oscar_Wilde_The_Happy_Prince_en.doc',
9
+ 'Oscar_Wilde_The_Happy_Prince_en.pdf',
10
+ 'Oscar_Wilde_The_Happy_Prince_en.rtf',
11
+ 'Oscar_Wilde_The_Happy_Prince_en.odt',
12
+ 'Oscar_Wilde_The_Happy_Prince_en.html',
13
+ 'Oscar_Wilde_The_Happy_Prince_en.txt',
14
+ 'Oscar_Wilde_The_Happy_Prince_en.docx.zip',
15
+ 'Oscar_Wilde_The_Happy_Prince_en.docx.rar',
16
+ 'Oscar_Wilde_The_Happy_Prince_en.docx.7z'
17
17
  ].each do |document|
18
18
  it "extraction valid document #{document}" do
19
19
  book = Book.new({doc_path: document})
20
20
  allow(Book).to receive_message_chain('where') { [book] }
21
21
  ActAsPageExtractor.start_extraction
22
22
  expect(book.page_extraction_state).to eq ActAsPageExtractor::EXTRACTING_STATES[:extracted]
23
- expect(ExtractedPage.array.count).to eq 3
24
- expect(ExtractedPage.array[0][:page]).to match /require \'act_as_page_extractor\/modules\/interface\'/
23
+ expect(ExtractedPage.array.count).to eq 4
24
+ expect(ExtractedPage.array[0][:page]).to match /on a tall column, stood the statue of the Happy Prince/
25
25
  unless document.match /pdf/
26
26
  expect(book.pdf_path).to match /pdf/
27
27
  expect(book.remove_files.count).to eq 1
@@ -33,7 +33,7 @@ describe ActAsPageExtractor do
33
33
 
34
34
  context 'incorrect extraction' do
35
35
  [
36
- 'test-doc-3-pages.wrong',
36
+ 'Oscar_Wilde_The_Happy_Prince_en.wrong',
37
37
  ].each do |document|
38
38
  it "extraction invalid document #{document}" do
39
39
  book = Book.new({doc_path: document})
@@ -60,7 +60,7 @@ class Book
60
60
  array
61
61
  end
62
62
 
63
- def update_attributes params
63
+ def update params
64
64
  params.each do |key, value|
65
65
  instance_eval("self.#{key} = #{value.class == String ? '\'' + value + '\'': value }")
66
66
  end