act_as_page_extractor 0.7.2 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +11 -11
- data/lib/act_as_page_extractor/modules/validating.rb +1 -1
- data/lib/act_as_page_extractor/version.rb +1 -1
- data/spec/act_as_page_extractor_spec.rb +47 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 150243f19490b089622edb3ca6e265347bcdadab21701de0865a63274b23fc8d
|
4
|
+
data.tar.gz: b3f0d0d90d3b035e4b4d9bee313f6084b46a8e686cc5c873ea5413ea3ab9cf50
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2cd0e7c24705e0eb3e646dba87343a06ac23fbaf16d2b1cacaf164046d096b2e65dc8a890bd923b2a59975ecfaacfd58e5e3ddcf61b8cf4610739b465c2af5b7
|
7
|
+
data.tar.gz: ab1e2e4e85117d9a4147ecdcd6ff32be761f3a7bbffee19b1041be8e0c9ea8d665f1358468e27549ab493d36b0fed05d2e2e06fdb7fccd1482b56defb459ac49
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
act_as_page_extractor (0.7.
|
4
|
+
act_as_page_extractor (0.7.3)
|
5
5
|
activerecord (~> 7.0)
|
6
6
|
amazing_print (~> 1)
|
7
7
|
docsplit (~> 0)
|
@@ -40,18 +40,18 @@ GEM
|
|
40
40
|
benchmark (0.4.1)
|
41
41
|
bigdecimal (3.2.2)
|
42
42
|
byebug (12.0.0)
|
43
|
-
concurrent-ruby (1.3.
|
43
|
+
concurrent-ruby (1.3.5)
|
44
44
|
connection_pool (2.5.3)
|
45
|
-
diff-lcs (1.
|
45
|
+
diff-lcs (1.6.2)
|
46
46
|
docile (1.4.1)
|
47
47
|
docsplit (0.7.6)
|
48
48
|
drb (2.2.3)
|
49
49
|
filesize (0.2.0)
|
50
50
|
hashery (2.1.2)
|
51
|
-
i18n (1.14.
|
51
|
+
i18n (1.14.7)
|
52
52
|
concurrent-ruby (~> 1.0)
|
53
53
|
logger (1.7.0)
|
54
|
-
minitest (5.25.
|
54
|
+
minitest (5.25.5)
|
55
55
|
pdf-core (0.4.0)
|
56
56
|
pdf-reader (1.4.1)
|
57
57
|
Ascii85 (~> 1.0.0)
|
@@ -64,19 +64,19 @@ GEM
|
|
64
64
|
pdf-core (~> 0.4.0)
|
65
65
|
ttfunk (~> 1.4.0)
|
66
66
|
rake (12.3.3)
|
67
|
-
rspec (3.13.
|
67
|
+
rspec (3.13.1)
|
68
68
|
rspec-core (~> 3.13.0)
|
69
69
|
rspec-expectations (~> 3.13.0)
|
70
70
|
rspec-mocks (~> 3.13.0)
|
71
|
-
rspec-core (3.13.
|
71
|
+
rspec-core (3.13.5)
|
72
72
|
rspec-support (~> 3.13.0)
|
73
|
-
rspec-expectations (3.13.
|
73
|
+
rspec-expectations (3.13.5)
|
74
74
|
diff-lcs (>= 1.2.0, < 2.0)
|
75
75
|
rspec-support (~> 3.13.0)
|
76
|
-
rspec-mocks (3.13.
|
76
|
+
rspec-mocks (3.13.5)
|
77
77
|
diff-lcs (>= 1.2.0, < 2.0)
|
78
78
|
rspec-support (~> 3.13.0)
|
79
|
-
rspec-support (3.13.
|
79
|
+
rspec-support (3.13.5)
|
80
80
|
ruby-rc4 (0.1.5)
|
81
81
|
rubyzip (1.3.0)
|
82
82
|
securerandom (0.4.1)
|
@@ -84,7 +84,7 @@ GEM
|
|
84
84
|
docile (~> 1.1)
|
85
85
|
simplecov-html (~> 0.11)
|
86
86
|
simplecov_json_formatter (~> 0.1)
|
87
|
-
simplecov-html (0.
|
87
|
+
simplecov-html (0.13.2)
|
88
88
|
simplecov_json_formatter (0.1.4)
|
89
89
|
timeout (0.4.3)
|
90
90
|
total_compressor (0.1.11)
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
require 'act_as_page_extractor'
|
3
|
+
require 'tmpdir'
|
3
4
|
|
4
5
|
describe ActAsPageExtractor do
|
5
6
|
context 'correct extraction' do
|
@@ -21,9 +22,9 @@ describe ActAsPageExtractor do
|
|
21
22
|
ActAsPageExtractor.start_extraction
|
22
23
|
expect(book.page_extraction_state).to eq ActAsPageExtractor::EXTRACTING_STATES[:extracted]
|
23
24
|
expect(ExtractedPage.array.count).to eq 4
|
24
|
-
expect(ExtractedPage.array[0][:page]).to match
|
25
|
-
unless document.match
|
26
|
-
expect(book.pdf_path).to match
|
25
|
+
expect(ExtractedPage.array[0][:page]).to match(/on a tall column, stood the statue of the Happy Prince/)
|
26
|
+
unless document.match(/pdf/)
|
27
|
+
expect(book.pdf_path).to match(/pdf/)
|
27
28
|
expect(book.remove_files.count).to eq 1
|
28
29
|
expect(book.pages_extraction_errors).to be_empty
|
29
30
|
end
|
@@ -79,5 +80,48 @@ describe ActAsPageExtractor do
|
|
79
80
|
expect(book.pages_extraction_errors).to match(error_msg)
|
80
81
|
end
|
81
82
|
end
|
83
|
+
|
84
|
+
context 'when file is less than 20MB' do
|
85
|
+
let(:filename) { 'normal_file.txt' }
|
86
|
+
let(:tmp_dir) { File.expand_path("../test/", __dir__) }
|
87
|
+
let(:document) { File.join(tmp_dir, filename) }
|
88
|
+
|
89
|
+
before { build_file(tmp_dir, document, size_mb: 3) }
|
90
|
+
|
91
|
+
after { File.delete(document) if File.exist?(document) }
|
92
|
+
|
93
|
+
it 'converts without errors' do
|
94
|
+
book = Book.new({ doc_path: filename })
|
95
|
+
allow(Book).to receive_message_chain('where') { [book] }
|
96
|
+
ActAsPageExtractor.start_extraction
|
97
|
+
expect(book.page_extraction_state).to eq ActAsPageExtractor::EXTRACTING_STATES[:extracted]
|
98
|
+
expect(book.pages_extraction_errors).to eq ""
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
context 'when file is larger than 20MB' do
|
103
|
+
let(:filename) { 'large_file.txt' }
|
104
|
+
let(:tmp_dir) { File.expand_path("../test/", __dir__) }
|
105
|
+
let(:document) { File.join(tmp_dir, filename) }
|
106
|
+
|
107
|
+
before { build_file(tmp_dir, document, size_mb: 22) }
|
108
|
+
|
109
|
+
after { File.delete(document) if File.exist?(document) }
|
110
|
+
|
111
|
+
it 'sets error_filesize state and logs error' do
|
112
|
+
book = Book.new({ doc_path: filename })
|
113
|
+
allow(Book).to receive_message_chain('where') { [book] }
|
114
|
+
ActAsPageExtractor.start_extraction
|
115
|
+
expect(book.page_extraction_state).to eq ActAsPageExtractor::EXTRACTING_STATES[:error_filesize]
|
116
|
+
expect(book.pages_extraction_errors).to match('error_filesize')
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def build_file(dir, file, size_mb:)
|
123
|
+
FileUtils.mkdir_p(dir)
|
124
|
+
File.open(file, "w") do |f|
|
125
|
+
size_mb.times { f.write("a " * 1024 * 512) }
|
82
126
|
end
|
83
127
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: act_as_page_extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- PhlowerTeam
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-08-
|
11
|
+
date: 2025-08-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|