act_as_page_extractor 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 150243f19490b089622edb3ca6e265347bcdadab21701de0865a63274b23fc8d
|
4
|
+
data.tar.gz: b3f0d0d90d3b035e4b4d9bee313f6084b46a8e686cc5c873ea5413ea3ab9cf50
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2cd0e7c24705e0eb3e646dba87343a06ac23fbaf16d2b1cacaf164046d096b2e65dc8a890bd923b2a59975ecfaacfd58e5e3ddcf61b8cf4610739b465c2af5b7
|
7
|
+
data.tar.gz: ab1e2e4e85117d9a4147ecdcd6ff32be761f3a7bbffee19b1041be8e0c9ea8d665f1358468e27549ab493d36b0fed05d2e2e06fdb7fccd1482b56defb459ac49
|
data/Gemfile.lock
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
act_as_page_extractor (0.7.
|
5
|
-
activerecord (~>
|
4
|
+
act_as_page_extractor (0.7.3)
|
5
|
+
activerecord (~> 7.0)
|
6
6
|
amazing_print (~> 1)
|
7
7
|
docsplit (~> 0)
|
8
8
|
filesize (~> 0)
|
@@ -15,30 +15,43 @@ GEM
|
|
15
15
|
remote: https://rubygems.org/
|
16
16
|
specs:
|
17
17
|
Ascii85 (1.0.3)
|
18
|
-
activemodel (
|
19
|
-
activesupport (=
|
20
|
-
activerecord (
|
21
|
-
activemodel (=
|
22
|
-
activesupport (=
|
23
|
-
|
24
|
-
|
18
|
+
activemodel (7.2.2.2)
|
19
|
+
activesupport (= 7.2.2.2)
|
20
|
+
activerecord (7.2.2.2)
|
21
|
+
activemodel (= 7.2.2.2)
|
22
|
+
activesupport (= 7.2.2.2)
|
23
|
+
timeout (>= 0.4.0)
|
24
|
+
activesupport (7.2.2.2)
|
25
|
+
base64
|
26
|
+
benchmark (>= 0.3)
|
27
|
+
bigdecimal
|
28
|
+
concurrent-ruby (~> 1.0, >= 1.3.1)
|
29
|
+
connection_pool (>= 2.2.5)
|
30
|
+
drb
|
25
31
|
i18n (>= 1.6, < 2)
|
32
|
+
logger (>= 1.4.2)
|
26
33
|
minitest (>= 5.1)
|
27
|
-
|
28
|
-
|
34
|
+
securerandom (>= 0.3)
|
35
|
+
tzinfo (~> 2.0, >= 2.0.5)
|
29
36
|
afm (0.2.2)
|
30
37
|
amazing_print (1.8.1)
|
31
38
|
awesome_print (1.9.2)
|
39
|
+
base64 (0.3.0)
|
40
|
+
benchmark (0.4.1)
|
41
|
+
bigdecimal (3.2.2)
|
32
42
|
byebug (12.0.0)
|
33
|
-
concurrent-ruby (1.3.
|
34
|
-
|
43
|
+
concurrent-ruby (1.3.5)
|
44
|
+
connection_pool (2.5.3)
|
45
|
+
diff-lcs (1.6.2)
|
35
46
|
docile (1.4.1)
|
36
47
|
docsplit (0.7.6)
|
48
|
+
drb (2.2.3)
|
37
49
|
filesize (0.2.0)
|
38
50
|
hashery (2.1.2)
|
39
|
-
i18n (1.14.
|
51
|
+
i18n (1.14.7)
|
40
52
|
concurrent-ruby (~> 1.0)
|
41
|
-
|
53
|
+
logger (1.7.0)
|
54
|
+
minitest (5.25.5)
|
42
55
|
pdf-core (0.4.0)
|
43
56
|
pdf-reader (1.4.1)
|
44
57
|
Ascii85 (~> 1.0.0)
|
@@ -51,34 +64,35 @@ GEM
|
|
51
64
|
pdf-core (~> 0.4.0)
|
52
65
|
ttfunk (~> 1.4.0)
|
53
66
|
rake (12.3.3)
|
54
|
-
rspec (3.13.
|
67
|
+
rspec (3.13.1)
|
55
68
|
rspec-core (~> 3.13.0)
|
56
69
|
rspec-expectations (~> 3.13.0)
|
57
70
|
rspec-mocks (~> 3.13.0)
|
58
|
-
rspec-core (3.13.
|
71
|
+
rspec-core (3.13.5)
|
59
72
|
rspec-support (~> 3.13.0)
|
60
|
-
rspec-expectations (3.13.
|
73
|
+
rspec-expectations (3.13.5)
|
61
74
|
diff-lcs (>= 1.2.0, < 2.0)
|
62
75
|
rspec-support (~> 3.13.0)
|
63
|
-
rspec-mocks (3.13.
|
76
|
+
rspec-mocks (3.13.5)
|
64
77
|
diff-lcs (>= 1.2.0, < 2.0)
|
65
78
|
rspec-support (~> 3.13.0)
|
66
|
-
rspec-support (3.13.
|
79
|
+
rspec-support (3.13.5)
|
67
80
|
ruby-rc4 (0.1.5)
|
68
81
|
rubyzip (1.3.0)
|
82
|
+
securerandom (0.4.1)
|
69
83
|
simplecov (0.22.0)
|
70
84
|
docile (~> 1.1)
|
71
85
|
simplecov-html (~> 0.11)
|
72
86
|
simplecov_json_formatter (~> 0.1)
|
73
|
-
simplecov-html (0.
|
87
|
+
simplecov-html (0.13.2)
|
74
88
|
simplecov_json_formatter (0.1.4)
|
89
|
+
timeout (0.4.3)
|
75
90
|
total_compressor (0.1.11)
|
76
91
|
awesome_print (~> 1.1, >= 1.1.0)
|
77
92
|
rubyzip (~> 1.2, >= 1.2.2)
|
78
93
|
ttfunk (1.4.0)
|
79
94
|
tzinfo (2.0.6)
|
80
95
|
concurrent-ruby (~> 1.0)
|
81
|
-
zeitwerk (2.6.17)
|
82
96
|
|
83
97
|
PLATFORMS
|
84
98
|
x86_64-linux
|
@@ -24,7 +24,7 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_development_dependency 'rspec', '~> 0'
|
25
25
|
spec.add_development_dependency 'simplecov', '~> 0'
|
26
26
|
|
27
|
-
spec.add_runtime_dependency 'activerecord', '~>
|
27
|
+
spec.add_runtime_dependency 'activerecord', '~> 7.0'
|
28
28
|
spec.add_runtime_dependency 'amazing_print', '~> 1'
|
29
29
|
spec.add_runtime_dependency 'docsplit', '~> 0' # API for OpenOffice jodconverter (any to pdf)
|
30
30
|
spec.add_runtime_dependency 'pdf_utils', '~> 0' # getting text from pdf
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
require 'act_as_page_extractor'
|
3
|
+
require 'tmpdir'
|
3
4
|
|
4
5
|
describe ActAsPageExtractor do
|
5
6
|
context 'correct extraction' do
|
@@ -21,9 +22,9 @@ describe ActAsPageExtractor do
|
|
21
22
|
ActAsPageExtractor.start_extraction
|
22
23
|
expect(book.page_extraction_state).to eq ActAsPageExtractor::EXTRACTING_STATES[:extracted]
|
23
24
|
expect(ExtractedPage.array.count).to eq 4
|
24
|
-
expect(ExtractedPage.array[0][:page]).to match
|
25
|
-
unless document.match
|
26
|
-
expect(book.pdf_path).to match
|
25
|
+
expect(ExtractedPage.array[0][:page]).to match(/on a tall column, stood the statue of the Happy Prince/)
|
26
|
+
unless document.match(/pdf/)
|
27
|
+
expect(book.pdf_path).to match(/pdf/)
|
27
28
|
expect(book.remove_files.count).to eq 1
|
28
29
|
expect(book.pages_extraction_errors).to be_empty
|
29
30
|
end
|
@@ -79,5 +80,48 @@ describe ActAsPageExtractor do
|
|
79
80
|
expect(book.pages_extraction_errors).to match(error_msg)
|
80
81
|
end
|
81
82
|
end
|
83
|
+
|
84
|
+
context 'when file is less than 20MB' do
|
85
|
+
let(:filename) { 'normal_file.txt' }
|
86
|
+
let(:tmp_dir) { File.expand_path("../test/", __dir__) }
|
87
|
+
let(:document) { File.join(tmp_dir, filename) }
|
88
|
+
|
89
|
+
before { build_file(tmp_dir, document, size_mb: 3) }
|
90
|
+
|
91
|
+
after { File.delete(document) if File.exist?(document) }
|
92
|
+
|
93
|
+
it 'converts without errors' do
|
94
|
+
book = Book.new({ doc_path: filename })
|
95
|
+
allow(Book).to receive_message_chain('where') { [book] }
|
96
|
+
ActAsPageExtractor.start_extraction
|
97
|
+
expect(book.page_extraction_state).to eq ActAsPageExtractor::EXTRACTING_STATES[:extracted]
|
98
|
+
expect(book.pages_extraction_errors).to eq ""
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
context 'when file is larger than 20MB' do
|
103
|
+
let(:filename) { 'large_file.txt' }
|
104
|
+
let(:tmp_dir) { File.expand_path("../test/", __dir__) }
|
105
|
+
let(:document) { File.join(tmp_dir, filename) }
|
106
|
+
|
107
|
+
before { build_file(tmp_dir, document, size_mb: 22) }
|
108
|
+
|
109
|
+
after { File.delete(document) if File.exist?(document) }
|
110
|
+
|
111
|
+
it 'sets error_filesize state and logs error' do
|
112
|
+
book = Book.new({ doc_path: filename })
|
113
|
+
allow(Book).to receive_message_chain('where') { [book] }
|
114
|
+
ActAsPageExtractor.start_extraction
|
115
|
+
expect(book.page_extraction_state).to eq ActAsPageExtractor::EXTRACTING_STATES[:error_filesize]
|
116
|
+
expect(book.pages_extraction_errors).to match('error_filesize')
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def build_file(dir, file, size_mb:)
|
123
|
+
FileUtils.mkdir_p(dir)
|
124
|
+
File.open(file, "w") do |f|
|
125
|
+
size_mb.times { f.write("a " * 1024 * 512) }
|
82
126
|
end
|
83
127
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: act_as_page_extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- PhlowerTeam
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-08-
|
11
|
+
date: 2025-08-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -98,14 +98,14 @@ dependencies:
|
|
98
98
|
requirements:
|
99
99
|
- - "~>"
|
100
100
|
- !ruby/object:Gem::Version
|
101
|
-
version: '
|
101
|
+
version: '7.0'
|
102
102
|
type: :runtime
|
103
103
|
prerelease: false
|
104
104
|
version_requirements: !ruby/object:Gem::Requirement
|
105
105
|
requirements:
|
106
106
|
- - "~>"
|
107
107
|
- !ruby/object:Gem::Version
|
108
|
-
version: '
|
108
|
+
version: '7.0'
|
109
109
|
- !ruby/object:Gem::Dependency
|
110
110
|
name: amazing_print
|
111
111
|
requirement: !ruby/object:Gem::Requirement
|