act_as_page_extractor 0.6.3 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/Gemfile.lock +7 -4
- data/act_as_page_extractor.gemspec +1 -1
- data/lib/act_as_page_extractor/modules/extracting.rb +8 -1
- data/lib/act_as_page_extractor/modules/interface.rb +1 -1
- data/lib/act_as_page_extractor/version.rb +1 -1
- data/lib/act_as_page_extractor.rb +2 -2
- data/spec/act_as_page_extractor_spec.rb +13 -13
- data/test/Oscar_Wilde_The_Happy_Prince_en.doc +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.docx +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.docx.7z +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.docx.rar +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.docx.zip +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.html +395 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.odt +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.pdf +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.rtf +257 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.txt +79 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.wrong +0 -0
- metadata +26 -26
- data/test/test-doc-3-pages.doc +0 -0
- data/test/test-doc-3-pages.docx +0 -0
- data/test/test-doc-3-pages.docx.7z +0 -0
- data/test/test-doc-3-pages.docx.rar +0 -0
- data/test/test-doc-3-pages.docx.zip +0 -0
- data/test/test-doc-3-pages.html +0 -279
- data/test/test-doc-3-pages.odt +0 -0
- data/test/test-doc-3-pages.pdf +0 -0
- data/test/test-doc-3-pages.rtf +0 -339
- data/test/test-doc-3-pages.txt +0 -125
- data/test/test-doc-3-pages.wrong +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f412c60bccb3fca934efecbc7922af07b41297423e6a2c4fbe04b8110a0e22e8
|
4
|
+
data.tar.gz: 4b281d9c93de0955e90b1a9d500213b1fa7103c449d72354caaa3d5d29702ff5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b545143db8d5fd51fb4c5c3d95d76b8122576e26e6587bb0b8c1ec62303e7e7bc5509554132ba9e65d47294fd2fa7c803a01634529c8e4ca8ecf9d0b3f1a392c
|
7
|
+
data.tar.gz: 3ff648cca05fe842e97db5e5153399bcfcd34f98141cd47ad5ca511de588de67c080778b643cba9be372178a6d6fc497552082bece168d3e10be6788382c8426
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
act_as_page_extractor (0.6.
|
4
|
+
act_as_page_extractor (0.6.4)
|
5
5
|
activerecord (~> 6)
|
6
6
|
awesome_print (~> 1)
|
7
7
|
docsplit (~> 0)
|
@@ -80,13 +80,13 @@ GEM
|
|
80
80
|
zeitwerk (2.6.17)
|
81
81
|
|
82
82
|
PLATFORMS
|
83
|
-
|
83
|
+
x86_64-linux
|
84
84
|
|
85
85
|
DEPENDENCIES
|
86
86
|
act_as_page_extractor!
|
87
87
|
activerecord (~> 6)
|
88
88
|
awesome_print
|
89
|
-
bundler (~>
|
89
|
+
bundler (~> 2)
|
90
90
|
byebug
|
91
91
|
docsplit
|
92
92
|
filesize
|
@@ -98,5 +98,8 @@ DEPENDENCIES
|
|
98
98
|
simplecov
|
99
99
|
total_compressor
|
100
100
|
|
101
|
+
RUBY VERSION
|
102
|
+
ruby 3.2.0p0
|
103
|
+
|
101
104
|
BUNDLED WITH
|
102
|
-
|
105
|
+
2.4.1
|
@@ -17,7 +17,7 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
18
|
spec.require_paths = ['lib']
|
19
19
|
|
20
|
-
spec.add_development_dependency 'bundler', '~>
|
20
|
+
spec.add_development_dependency 'bundler', '~> 2'
|
21
21
|
spec.add_development_dependency 'rake', '~> 12', '>= 12.3.3'
|
22
22
|
spec.add_development_dependency 'byebug', '~> 0'
|
23
23
|
spec.add_development_dependency 'rspec', '~> 0'
|
@@ -1,3 +1,10 @@
|
|
1
|
+
# Fix: https://github.com/documentcloud/docsplit/pull/159
|
2
|
+
class File
|
3
|
+
class << self
|
4
|
+
alias_method :exists?, :exist?
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
1
8
|
module ActAsPageExtractor
|
2
9
|
def extract_pages
|
3
10
|
convert_to_pdf
|
@@ -10,7 +17,7 @@ module ActAsPageExtractor
|
|
10
17
|
else
|
11
18
|
if timeout_wrapper{ Docsplit.extract_pdf(@document_path, output: @tmp_dir)}
|
12
19
|
pdf_path = (@document_path.split('.')[0..-2] + ['pdf']).join('.')
|
13
|
-
pdf_path if File.
|
20
|
+
pdf_path if File.exist?(pdf_path)
|
14
21
|
end
|
15
22
|
end
|
16
23
|
end
|
@@ -85,13 +85,13 @@ module ActAsPageExtractor
|
|
85
85
|
|
86
86
|
def create_pdf_dir
|
87
87
|
if save_as_pdf
|
88
|
-
FileUtils::mkdir_p(pdf_storage) unless File.
|
88
|
+
FileUtils::mkdir_p(pdf_storage) unless File.exist?(pdf_storage)
|
89
89
|
end
|
90
90
|
end
|
91
91
|
|
92
92
|
def create_tmp_dir
|
93
93
|
@tmp_dir = "#{TMP_EXTRACTION_FILE_STORAGE}/#{SecureRandom.hex(6)}"
|
94
|
-
FileUtils::mkdir_p(@tmp_dir) unless File.
|
94
|
+
FileUtils::mkdir_p(@tmp_dir) unless File.exist?(@tmp_dir)
|
95
95
|
end
|
96
96
|
|
97
97
|
def copy_document
|
@@ -4,24 +4,24 @@ require 'act_as_page_extractor'
|
|
4
4
|
describe ActAsPageExtractor do
|
5
5
|
context 'correct extraction' do
|
6
6
|
[
|
7
|
-
'
|
8
|
-
'
|
9
|
-
'
|
10
|
-
'
|
11
|
-
'
|
12
|
-
'
|
13
|
-
'
|
14
|
-
'
|
15
|
-
'
|
16
|
-
'
|
7
|
+
'Oscar_Wilde_The_Happy_Prince_en.docx',
|
8
|
+
'Oscar_Wilde_The_Happy_Prince_en.doc',
|
9
|
+
'Oscar_Wilde_The_Happy_Prince_en.pdf',
|
10
|
+
'Oscar_Wilde_The_Happy_Prince_en.rtf',
|
11
|
+
'Oscar_Wilde_The_Happy_Prince_en.odt',
|
12
|
+
'Oscar_Wilde_The_Happy_Prince_en.html',
|
13
|
+
'Oscar_Wilde_The_Happy_Prince_en.txt',
|
14
|
+
'Oscar_Wilde_The_Happy_Prince_en.docx.zip',
|
15
|
+
'Oscar_Wilde_The_Happy_Prince_en.docx.rar',
|
16
|
+
'Oscar_Wilde_The_Happy_Prince_en.docx.7z'
|
17
17
|
].each do |document|
|
18
18
|
it "extraction valid document #{document}" do
|
19
19
|
book = Book.new({doc_path: document})
|
20
20
|
allow(Book).to receive_message_chain('where') { [book] }
|
21
21
|
ActAsPageExtractor.start_extraction
|
22
22
|
expect(book.page_extraction_state).to eq ActAsPageExtractor::EXTRACTING_STATES[:extracted]
|
23
|
-
expect(ExtractedPage.array.count).to eq
|
24
|
-
expect(ExtractedPage.array[0][:page]).to match /
|
23
|
+
expect(ExtractedPage.array.count).to eq 4
|
24
|
+
expect(ExtractedPage.array[0][:page]).to match /on a tall column, stood the statue of the Happy Prince/
|
25
25
|
unless document.match /pdf/
|
26
26
|
expect(book.pdf_path).to match /pdf/
|
27
27
|
expect(book.remove_files.count).to eq 1
|
@@ -33,7 +33,7 @@ describe ActAsPageExtractor do
|
|
33
33
|
|
34
34
|
context 'incorrect extraction' do
|
35
35
|
[
|
36
|
-
'
|
36
|
+
'Oscar_Wilde_The_Happy_Prince_en.wrong',
|
37
37
|
].each do |document|
|
38
38
|
it "extraction invalid document #{document}" do
|
39
39
|
book = Book.new({doc_path: document})
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|