act_as_page_extractor 0.7.2 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +11 -11
- data/lib/act_as_page_extractor/modules/validating.rb +1 -1
- data/lib/act_as_page_extractor/version.rb +1 -1
- data/spec/act_as_page_extractor_spec.rb +47 -3
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 150243f19490b089622edb3ca6e265347bcdadab21701de0865a63274b23fc8d
|
|
4
|
+
data.tar.gz: b3f0d0d90d3b035e4b4d9bee313f6084b46a8e686cc5c873ea5413ea3ab9cf50
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2cd0e7c24705e0eb3e646dba87343a06ac23fbaf16d2b1cacaf164046d096b2e65dc8a890bd923b2a59975ecfaacfd58e5e3ddcf61b8cf4610739b465c2af5b7
|
|
7
|
+
data.tar.gz: ab1e2e4e85117d9a4147ecdcd6ff32be761f3a7bbffee19b1041be8e0c9ea8d665f1358468e27549ab493d36b0fed05d2e2e06fdb7fccd1482b56defb459ac49
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
act_as_page_extractor (0.7.
|
|
4
|
+
act_as_page_extractor (0.7.3)
|
|
5
5
|
activerecord (~> 7.0)
|
|
6
6
|
amazing_print (~> 1)
|
|
7
7
|
docsplit (~> 0)
|
|
@@ -40,18 +40,18 @@ GEM
|
|
|
40
40
|
benchmark (0.4.1)
|
|
41
41
|
bigdecimal (3.2.2)
|
|
42
42
|
byebug (12.0.0)
|
|
43
|
-
concurrent-ruby (1.3.
|
|
43
|
+
concurrent-ruby (1.3.5)
|
|
44
44
|
connection_pool (2.5.3)
|
|
45
|
-
diff-lcs (1.
|
|
45
|
+
diff-lcs (1.6.2)
|
|
46
46
|
docile (1.4.1)
|
|
47
47
|
docsplit (0.7.6)
|
|
48
48
|
drb (2.2.3)
|
|
49
49
|
filesize (0.2.0)
|
|
50
50
|
hashery (2.1.2)
|
|
51
|
-
i18n (1.14.
|
|
51
|
+
i18n (1.14.7)
|
|
52
52
|
concurrent-ruby (~> 1.0)
|
|
53
53
|
logger (1.7.0)
|
|
54
|
-
minitest (5.25.
|
|
54
|
+
minitest (5.25.5)
|
|
55
55
|
pdf-core (0.4.0)
|
|
56
56
|
pdf-reader (1.4.1)
|
|
57
57
|
Ascii85 (~> 1.0.0)
|
|
@@ -64,19 +64,19 @@ GEM
|
|
|
64
64
|
pdf-core (~> 0.4.0)
|
|
65
65
|
ttfunk (~> 1.4.0)
|
|
66
66
|
rake (12.3.3)
|
|
67
|
-
rspec (3.13.
|
|
67
|
+
rspec (3.13.1)
|
|
68
68
|
rspec-core (~> 3.13.0)
|
|
69
69
|
rspec-expectations (~> 3.13.0)
|
|
70
70
|
rspec-mocks (~> 3.13.0)
|
|
71
|
-
rspec-core (3.13.
|
|
71
|
+
rspec-core (3.13.5)
|
|
72
72
|
rspec-support (~> 3.13.0)
|
|
73
|
-
rspec-expectations (3.13.
|
|
73
|
+
rspec-expectations (3.13.5)
|
|
74
74
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
75
75
|
rspec-support (~> 3.13.0)
|
|
76
|
-
rspec-mocks (3.13.
|
|
76
|
+
rspec-mocks (3.13.5)
|
|
77
77
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
78
78
|
rspec-support (~> 3.13.0)
|
|
79
|
-
rspec-support (3.13.
|
|
79
|
+
rspec-support (3.13.5)
|
|
80
80
|
ruby-rc4 (0.1.5)
|
|
81
81
|
rubyzip (1.3.0)
|
|
82
82
|
securerandom (0.4.1)
|
|
@@ -84,7 +84,7 @@ GEM
|
|
|
84
84
|
docile (~> 1.1)
|
|
85
85
|
simplecov-html (~> 0.11)
|
|
86
86
|
simplecov_json_formatter (~> 0.1)
|
|
87
|
-
simplecov-html (0.
|
|
87
|
+
simplecov-html (0.13.2)
|
|
88
88
|
simplecov_json_formatter (0.1.4)
|
|
89
89
|
timeout (0.4.3)
|
|
90
90
|
total_compressor (0.1.11)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
require 'spec_helper'
|
|
2
2
|
require 'act_as_page_extractor'
|
|
3
|
+
require 'tmpdir'
|
|
3
4
|
|
|
4
5
|
describe ActAsPageExtractor do
|
|
5
6
|
context 'correct extraction' do
|
|
@@ -21,9 +22,9 @@ describe ActAsPageExtractor do
|
|
|
21
22
|
ActAsPageExtractor.start_extraction
|
|
22
23
|
expect(book.page_extraction_state).to eq ActAsPageExtractor::EXTRACTING_STATES[:extracted]
|
|
23
24
|
expect(ExtractedPage.array.count).to eq 4
|
|
24
|
-
expect(ExtractedPage.array[0][:page]).to match
|
|
25
|
-
unless document.match
|
|
26
|
-
expect(book.pdf_path).to match
|
|
25
|
+
expect(ExtractedPage.array[0][:page]).to match(/on a tall column, stood the statue of the Happy Prince/)
|
|
26
|
+
unless document.match(/pdf/)
|
|
27
|
+
expect(book.pdf_path).to match(/pdf/)
|
|
27
28
|
expect(book.remove_files.count).to eq 1
|
|
28
29
|
expect(book.pages_extraction_errors).to be_empty
|
|
29
30
|
end
|
|
@@ -79,5 +80,48 @@ describe ActAsPageExtractor do
|
|
|
79
80
|
expect(book.pages_extraction_errors).to match(error_msg)
|
|
80
81
|
end
|
|
81
82
|
end
|
|
83
|
+
|
|
84
|
+
context 'when file is less than 20MB' do
|
|
85
|
+
let(:filename) { 'normal_file.txt' }
|
|
86
|
+
let(:tmp_dir) { File.expand_path("../test/", __dir__) }
|
|
87
|
+
let(:document) { File.join(tmp_dir, filename) }
|
|
88
|
+
|
|
89
|
+
before { build_file(tmp_dir, document, size_mb: 3) }
|
|
90
|
+
|
|
91
|
+
after { File.delete(document) if File.exist?(document) }
|
|
92
|
+
|
|
93
|
+
it 'converts without errors' do
|
|
94
|
+
book = Book.new({ doc_path: filename })
|
|
95
|
+
allow(Book).to receive_message_chain('where') { [book] }
|
|
96
|
+
ActAsPageExtractor.start_extraction
|
|
97
|
+
expect(book.page_extraction_state).to eq ActAsPageExtractor::EXTRACTING_STATES[:extracted]
|
|
98
|
+
expect(book.pages_extraction_errors).to eq ""
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
context 'when file is larger than 20MB' do
|
|
103
|
+
let(:filename) { 'large_file.txt' }
|
|
104
|
+
let(:tmp_dir) { File.expand_path("../test/", __dir__) }
|
|
105
|
+
let(:document) { File.join(tmp_dir, filename) }
|
|
106
|
+
|
|
107
|
+
before { build_file(tmp_dir, document, size_mb: 22) }
|
|
108
|
+
|
|
109
|
+
after { File.delete(document) if File.exist?(document) }
|
|
110
|
+
|
|
111
|
+
it 'sets error_filesize state and logs error' do
|
|
112
|
+
book = Book.new({ doc_path: filename })
|
|
113
|
+
allow(Book).to receive_message_chain('where') { [book] }
|
|
114
|
+
ActAsPageExtractor.start_extraction
|
|
115
|
+
expect(book.page_extraction_state).to eq ActAsPageExtractor::EXTRACTING_STATES[:error_filesize]
|
|
116
|
+
expect(book.pages_extraction_errors).to match('error_filesize')
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def build_file(dir, file, size_mb:)
|
|
123
|
+
FileUtils.mkdir_p(dir)
|
|
124
|
+
File.open(file, "w") do |f|
|
|
125
|
+
size_mb.times { f.write("a " * 1024 * 512) }
|
|
82
126
|
end
|
|
83
127
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: act_as_page_extractor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.7.
|
|
4
|
+
version: 0.7.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- PhlowerTeam
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-08-
|
|
11
|
+
date: 2025-08-26 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|