act_as_page_extractor 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4a11b311c9575aace2a74e468f35b062c6e444d58e6f62caed99b70bab60703b
4
- data.tar.gz: 9c463a553f4e3490110f46626d2c4c6c0f124b2522c63ef0e7db2acc856b12a2
3
+ metadata.gz: 150243f19490b089622edb3ca6e265347bcdadab21701de0865a63274b23fc8d
4
+ data.tar.gz: b3f0d0d90d3b035e4b4d9bee313f6084b46a8e686cc5c873ea5413ea3ab9cf50
5
5
  SHA512:
6
- metadata.gz: 4db62f37880a270dfe39dcedf5a537b2e5699633709348a030ae2274218478493c994099d38e7ff21729bdf543e681c66b818d2bc321b6b49f1478fa348d00c7
7
- data.tar.gz: e2df6c4f723418d1098b0d0afac87fef847b50a87a766198e13ee42b8ff2cf4fac5e2788f09a7105ee13dbec4d84bfaf5084f9bcae7ad127915ab08d652f9deb
6
+ metadata.gz: 2cd0e7c24705e0eb3e646dba87343a06ac23fbaf16d2b1cacaf164046d096b2e65dc8a890bd923b2a59975ecfaacfd58e5e3ddcf61b8cf4610739b465c2af5b7
7
+ data.tar.gz: ab1e2e4e85117d9a4147ecdcd6ff32be761f3a7bbffee19b1041be8e0c9ea8d665f1358468e27549ab493d36b0fed05d2e2e06fdb7fccd1482b56defb459ac49
data/Gemfile.lock CHANGED
@@ -1,8 +1,8 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- act_as_page_extractor (0.7.1)
5
- activerecord (~> 6.0)
4
+ act_as_page_extractor (0.7.3)
5
+ activerecord (~> 7.0)
6
6
  amazing_print (~> 1)
7
7
  docsplit (~> 0)
8
8
  filesize (~> 0)
@@ -15,30 +15,43 @@ GEM
15
15
  remote: https://rubygems.org/
16
16
  specs:
17
17
  Ascii85 (1.0.3)
18
- activemodel (6.1.7.8)
19
- activesupport (= 6.1.7.8)
20
- activerecord (6.1.7.8)
21
- activemodel (= 6.1.7.8)
22
- activesupport (= 6.1.7.8)
23
- activesupport (6.1.7.8)
24
- concurrent-ruby (~> 1.0, >= 1.0.2)
18
+ activemodel (7.2.2.2)
19
+ activesupport (= 7.2.2.2)
20
+ activerecord (7.2.2.2)
21
+ activemodel (= 7.2.2.2)
22
+ activesupport (= 7.2.2.2)
23
+ timeout (>= 0.4.0)
24
+ activesupport (7.2.2.2)
25
+ base64
26
+ benchmark (>= 0.3)
27
+ bigdecimal
28
+ concurrent-ruby (~> 1.0, >= 1.3.1)
29
+ connection_pool (>= 2.2.5)
30
+ drb
25
31
  i18n (>= 1.6, < 2)
32
+ logger (>= 1.4.2)
26
33
  minitest (>= 5.1)
27
- tzinfo (~> 2.0)
28
- zeitwerk (~> 2.3)
34
+ securerandom (>= 0.3)
35
+ tzinfo (~> 2.0, >= 2.0.5)
29
36
  afm (0.2.2)
30
37
  amazing_print (1.8.1)
31
38
  awesome_print (1.9.2)
39
+ base64 (0.3.0)
40
+ benchmark (0.4.1)
41
+ bigdecimal (3.2.2)
32
42
  byebug (12.0.0)
33
- concurrent-ruby (1.3.4)
34
- diff-lcs (1.5.1)
43
+ concurrent-ruby (1.3.5)
44
+ connection_pool (2.5.3)
45
+ diff-lcs (1.6.2)
35
46
  docile (1.4.1)
36
47
  docsplit (0.7.6)
48
+ drb (2.2.3)
37
49
  filesize (0.2.0)
38
50
  hashery (2.1.2)
39
- i18n (1.14.5)
51
+ i18n (1.14.7)
40
52
  concurrent-ruby (~> 1.0)
41
- minitest (5.25.1)
53
+ logger (1.7.0)
54
+ minitest (5.25.5)
42
55
  pdf-core (0.4.0)
43
56
  pdf-reader (1.4.1)
44
57
  Ascii85 (~> 1.0.0)
@@ -51,34 +64,35 @@ GEM
51
64
  pdf-core (~> 0.4.0)
52
65
  ttfunk (~> 1.4.0)
53
66
  rake (12.3.3)
54
- rspec (3.13.0)
67
+ rspec (3.13.1)
55
68
  rspec-core (~> 3.13.0)
56
69
  rspec-expectations (~> 3.13.0)
57
70
  rspec-mocks (~> 3.13.0)
58
- rspec-core (3.13.0)
71
+ rspec-core (3.13.5)
59
72
  rspec-support (~> 3.13.0)
60
- rspec-expectations (3.13.2)
73
+ rspec-expectations (3.13.5)
61
74
  diff-lcs (>= 1.2.0, < 2.0)
62
75
  rspec-support (~> 3.13.0)
63
- rspec-mocks (3.13.1)
76
+ rspec-mocks (3.13.5)
64
77
  diff-lcs (>= 1.2.0, < 2.0)
65
78
  rspec-support (~> 3.13.0)
66
- rspec-support (3.13.1)
79
+ rspec-support (3.13.5)
67
80
  ruby-rc4 (0.1.5)
68
81
  rubyzip (1.3.0)
82
+ securerandom (0.4.1)
69
83
  simplecov (0.22.0)
70
84
  docile (~> 1.1)
71
85
  simplecov-html (~> 0.11)
72
86
  simplecov_json_formatter (~> 0.1)
73
- simplecov-html (0.12.3)
87
+ simplecov-html (0.13.2)
74
88
  simplecov_json_formatter (0.1.4)
89
+ timeout (0.4.3)
75
90
  total_compressor (0.1.11)
76
91
  awesome_print (~> 1.1, >= 1.1.0)
77
92
  rubyzip (~> 1.2, >= 1.2.2)
78
93
  ttfunk (1.4.0)
79
94
  tzinfo (2.0.6)
80
95
  concurrent-ruby (~> 1.0)
81
- zeitwerk (2.6.17)
82
96
 
83
97
  PLATFORMS
84
98
  x86_64-linux
@@ -24,7 +24,7 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency 'rspec', '~> 0'
25
25
  spec.add_development_dependency 'simplecov', '~> 0'
26
26
 
27
- spec.add_runtime_dependency 'activerecord', '~> 6.0'
27
+ spec.add_runtime_dependency 'activerecord', '~> 7.0'
28
28
  spec.add_runtime_dependency 'amazing_print', '~> 1'
29
29
  spec.add_runtime_dependency 'docsplit', '~> 0' # API for OpenOffice jodconverter (any to pdf)
30
30
  spec.add_runtime_dependency 'pdf_utils', '~> 0' # getting text from pdf
@@ -5,7 +5,7 @@ module ActAsPageExtractor
5
5
 
6
6
  def validate_size
7
7
  mb = 2**20
8
- valid = File.size(@copy_document_path) <= 1*mb
8
+ valid = File.size(@copy_document_path) <= 20*mb
9
9
 
10
10
  unless valid
11
11
  @page_extraction_state = EXTRACTING_STATES[:error_filesize]
@@ -1,5 +1,5 @@
1
1
  # :nocov:
2
2
  module ActAsPageExtractor
3
- VERSION = "0.7.1"
3
+ VERSION = "0.7.3"
4
4
  end
5
5
  # :nocov:
@@ -1,5 +1,6 @@
1
1
  require 'spec_helper'
2
2
  require 'act_as_page_extractor'
3
+ require 'tmpdir'
3
4
 
4
5
  describe ActAsPageExtractor do
5
6
  context 'correct extraction' do
@@ -21,9 +22,9 @@ describe ActAsPageExtractor do
21
22
  ActAsPageExtractor.start_extraction
22
23
  expect(book.page_extraction_state).to eq ActAsPageExtractor::EXTRACTING_STATES[:extracted]
23
24
  expect(ExtractedPage.array.count).to eq 4
24
- expect(ExtractedPage.array[0][:page]).to match /on a tall column, stood the statue of the Happy Prince/
25
- unless document.match /pdf/
26
- expect(book.pdf_path).to match /pdf/
25
+ expect(ExtractedPage.array[0][:page]).to match(/on a tall column, stood the statue of the Happy Prince/)
26
+ unless document.match(/pdf/)
27
+ expect(book.pdf_path).to match(/pdf/)
27
28
  expect(book.remove_files.count).to eq 1
28
29
  expect(book.pages_extraction_errors).to be_empty
29
30
  end
@@ -79,5 +80,48 @@ describe ActAsPageExtractor do
79
80
  expect(book.pages_extraction_errors).to match(error_msg)
80
81
  end
81
82
  end
83
+
84
+ context 'when file is less than 20MB' do
85
+ let(:filename) { 'normal_file.txt' }
86
+ let(:tmp_dir) { File.expand_path("../test/", __dir__) }
87
+ let(:document) { File.join(tmp_dir, filename) }
88
+
89
+ before { build_file(tmp_dir, document, size_mb: 3) }
90
+
91
+ after { File.delete(document) if File.exist?(document) }
92
+
93
+ it 'converts without errors' do
94
+ book = Book.new({ doc_path: filename })
95
+ allow(Book).to receive_message_chain('where') { [book] }
96
+ ActAsPageExtractor.start_extraction
97
+ expect(book.page_extraction_state).to eq ActAsPageExtractor::EXTRACTING_STATES[:extracted]
98
+ expect(book.pages_extraction_errors).to eq ""
99
+ end
100
+ end
101
+
102
+ context 'when file is larger than 20MB' do
103
+ let(:filename) { 'large_file.txt' }
104
+ let(:tmp_dir) { File.expand_path("../test/", __dir__) }
105
+ let(:document) { File.join(tmp_dir, filename) }
106
+
107
+ before { build_file(tmp_dir, document, size_mb: 22) }
108
+
109
+ after { File.delete(document) if File.exist?(document) }
110
+
111
+ it 'sets error_filesize state and logs error' do
112
+ book = Book.new({ doc_path: filename })
113
+ allow(Book).to receive_message_chain('where') { [book] }
114
+ ActAsPageExtractor.start_extraction
115
+ expect(book.page_extraction_state).to eq ActAsPageExtractor::EXTRACTING_STATES[:error_filesize]
116
+ expect(book.pages_extraction_errors).to match('error_filesize')
117
+ end
118
+ end
119
+ end
120
+ end
121
+
122
+ def build_file(dir, file, size_mb:)
123
+ FileUtils.mkdir_p(dir)
124
+ File.open(file, "w") do |f|
125
+ size_mb.times { f.write("a " * 1024 * 512) }
82
126
  end
83
127
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: act_as_page_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.7.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - PhlowerTeam
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-08-23 00:00:00.000000000 Z
11
+ date: 2025-08-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -98,14 +98,14 @@ dependencies:
98
98
  requirements:
99
99
  - - "~>"
100
100
  - !ruby/object:Gem::Version
101
- version: '6.0'
101
+ version: '7.0'
102
102
  type: :runtime
103
103
  prerelease: false
104
104
  version_requirements: !ruby/object:Gem::Requirement
105
105
  requirements:
106
106
  - - "~>"
107
107
  - !ruby/object:Gem::Version
108
- version: '6.0'
108
+ version: '7.0'
109
109
  - !ruby/object:Gem::Dependency
110
110
  name: amazing_print
111
111
  requirement: !ruby/object:Gem::Requirement