paperclip-document 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/paperclip/document.rb +4 -6
- data/lib/paperclip/document/attachment_extension.rb +1 -5
- data/lib/paperclip/document/processor.rb +3 -6
- data/lib/paperclip/document/processors.rb +4 -5
- data/lib/paperclip/document/processors/counter.rb +6 -10
- data/lib/paperclip/document/processors/freezer.rb +4 -8
- data/lib/paperclip/document/processors/reader.rb +10 -14
- data/lib/paperclip/document/processors/sketcher.rb +5 -10
- data/lib/paperclip/document/version.rb +1 -1
- metadata +9 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ad4fc35c9bbaacd7c35f76007645a0fc1b559b49
|
4
|
+
data.tar.gz: 0446b4d6c51297634e1ecebcacd83ed3010973a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6095f13d9ee3379c9bb2077db885a41131f330c6d8fdc1fffce59e551c86cb6f87f8a1e10462e79c3d85170f0cee5331bb7303616c98b9cfe2f748eef9f4adab
|
7
|
+
data.tar.gz: 75850319470348136ebb14e876390b886445ff0943675d8b7653a0143e6b2c84c3415429975359260685874933470b82c6e83da477b8341818654c51d5e61435
|
data/lib/paperclip/document.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
1
|
+
require 'paperclip/document/version'
|
2
|
+
require 'paperclip/document/attachment_extension'
|
3
|
+
require 'docsplit'
|
4
|
+
require 'pathname'
|
5
5
|
|
6
6
|
module Paperclip
|
7
7
|
module Document
|
@@ -15,6 +15,4 @@ module Paperclip
|
|
15
15
|
c.register_processor :freezer, Document::Processors::Freezer
|
16
16
|
c.register_processor :counter, Document::Processors::Counter
|
17
17
|
end
|
18
|
-
|
19
18
|
end
|
20
|
-
|
@@ -1,17 +1,13 @@
|
|
1
1
|
module Paperclip
|
2
|
-
|
3
2
|
class Attachment
|
4
|
-
|
5
3
|
# Returns the content_text of the file as originally extracted, and lives in the <attachment>_content_text attribute of the model.
|
6
4
|
def content_text
|
7
5
|
instance_read(:content_text)
|
8
6
|
end
|
9
|
-
|
7
|
+
|
10
8
|
# Returns the pages_count of the file as originally computed, and lives in the <attachment>_pages_count attribute of the model.
|
11
9
|
def pages_count
|
12
10
|
instance_read(:pages_count)
|
13
11
|
end
|
14
|
-
|
15
12
|
end
|
16
|
-
|
17
13
|
end
|
@@ -1,17 +1,15 @@
|
|
1
1
|
module Paperclip
|
2
2
|
module Document
|
3
|
-
|
4
3
|
# Main processor
|
5
4
|
class Processor < Paperclip::Processor
|
6
|
-
|
7
5
|
attr_reader :instance, :tmp_dir
|
8
|
-
|
6
|
+
|
9
7
|
def initialize(file, options = {}, attachment = nil)
|
10
8
|
super(file, options, attachment)
|
11
9
|
@instance = @attachment.instance
|
12
|
-
@tmp_dir = Pathname.new(Dir.tmpdir).join(
|
10
|
+
@tmp_dir = Pathname.new(Dir.tmpdir).join('paperclip-document-' + Time.now.to_i.to_s(36) + rand(1_000_000_000).to_s(36))
|
13
11
|
end
|
14
|
-
|
12
|
+
|
15
13
|
def file_path
|
16
14
|
Pathname.new(@file.path)
|
17
15
|
end
|
@@ -19,7 +17,6 @@ module Paperclip
|
|
19
17
|
def basename
|
20
18
|
file_path.basename.to_s.gsub(/\.[^\.]+/, '')
|
21
19
|
end
|
22
|
-
|
23
20
|
end
|
24
21
|
end
|
25
22
|
end
|
@@ -1,11 +1,10 @@
|
|
1
1
|
module Paperclip
|
2
2
|
module Document
|
3
3
|
module Processors
|
4
|
-
autoload :Sketcher,
|
5
|
-
autoload :Reader,
|
6
|
-
autoload :Freezer,
|
7
|
-
autoload :Counter,
|
4
|
+
autoload :Sketcher, 'paperclip/document/processors/sketcher'
|
5
|
+
autoload :Reader, 'paperclip/document/processors/reader'
|
6
|
+
autoload :Freezer, 'paperclip/document/processors/freezer'
|
7
|
+
autoload :Counter, 'paperclip/document/processors/counter'
|
8
8
|
end
|
9
9
|
end
|
10
10
|
end
|
11
|
-
|
@@ -1,21 +1,19 @@
|
|
1
1
|
module Paperclip
|
2
2
|
module Document
|
3
3
|
module Processors
|
4
|
-
|
5
4
|
# This processor extract the OCR text of the file
|
6
5
|
class Counter < Paperclip::Document::Processor
|
7
|
-
|
8
6
|
attr_accessor :pages_count_column
|
9
7
|
|
10
8
|
def initialize(file, options = {}, attachment = nil)
|
11
9
|
super(file, options, attachment)
|
12
|
-
if @options[:pages_count_column].nil?
|
10
|
+
if @options[:pages_count_column].nil? && pages_count_column?
|
13
11
|
@options[:pages_count_column] = default_pages_count_column
|
14
12
|
end
|
15
13
|
@pages_count_column = @options[:pages_count_column]
|
16
14
|
|
17
15
|
unless @pages_count_column
|
18
|
-
raise Paperclip::Error,
|
16
|
+
raise Paperclip::Error, 'No pages count column given'
|
19
17
|
end
|
20
18
|
end
|
21
19
|
|
@@ -26,24 +24,22 @@ module Paperclip
|
|
26
24
|
instance[pages_count_column] = count
|
27
25
|
instance.run_callbacks(:save) { false }
|
28
26
|
|
29
|
-
|
27
|
+
File.open(file.path)
|
30
28
|
end
|
31
|
-
|
29
|
+
|
32
30
|
# Check if a pages count column is present
|
33
31
|
def pages_count_column?
|
34
32
|
expected_column = default_pages_count_column
|
35
|
-
|
33
|
+
@attachment.instance.class.columns.detect do |column|
|
36
34
|
column.name.to_s == expected_column
|
37
35
|
end
|
38
36
|
end
|
39
37
|
|
40
38
|
# Returns the name of the default pages count column
|
41
39
|
def default_pages_count_column
|
42
|
-
@attachment.name.to_s +
|
40
|
+
@attachment.name.to_s + '_pages_count'
|
43
41
|
end
|
44
|
-
|
45
42
|
end
|
46
|
-
|
47
43
|
end
|
48
44
|
end
|
49
45
|
end
|
@@ -1,14 +1,13 @@
|
|
1
1
|
module Paperclip
|
2
2
|
module Document
|
3
3
|
module Processors
|
4
|
-
|
5
4
|
# This processor converts document to PDF
|
6
5
|
class Freezer < Paperclip::Document::Processor
|
7
6
|
def initialize(file, options = {}, attachment = nil)
|
8
7
|
super
|
9
8
|
@format = options[:format]
|
10
9
|
unless @format == :pdf
|
11
|
-
raise Paperclip::Error,
|
10
|
+
raise Paperclip::Error, 'Valid format (pdf) must be specified'
|
12
11
|
end
|
13
12
|
end
|
14
13
|
|
@@ -19,18 +18,15 @@ module Paperclip
|
|
19
18
|
if pdf_format?
|
20
19
|
destination_file = file_path.to_s
|
21
20
|
else
|
22
|
-
Docsplit.extract_pdf(file_path.to_s, :
|
21
|
+
Docsplit.extract_pdf(file_path.to_s, output: destination_path)
|
23
22
|
end
|
24
|
-
|
23
|
+
File.open(destination_file)
|
25
24
|
end
|
26
25
|
|
27
|
-
|
28
26
|
def pdf_format?
|
29
|
-
File.open(file_path,
|
27
|
+
File.open(file_path, 'rb', &:readline).to_s =~ /\A\%PDF-\d+(\.\d+)?$/
|
30
28
|
end
|
31
|
-
|
32
29
|
end
|
33
|
-
|
34
30
|
end
|
35
31
|
end
|
36
32
|
end
|
@@ -1,57 +1,53 @@
|
|
1
1
|
module Paperclip
|
2
2
|
module Document
|
3
3
|
module Processors
|
4
|
-
|
5
4
|
# This processor extract the OCR text of the file
|
6
5
|
class Reader < Paperclip::Document::Processor
|
7
|
-
|
8
6
|
attr_accessor :clean, :text_column, :language
|
9
7
|
|
10
8
|
def initialize(file, options = {}, attachment = nil)
|
11
9
|
super(file, options, attachment)
|
12
|
-
if @options[:text_column].nil?
|
10
|
+
if @options[:text_column].nil? && text_column?
|
13
11
|
@options[:text_column] = default_text_column
|
14
12
|
end
|
15
13
|
@language = @options[:language]
|
16
14
|
@text_column = @options[:text_column]
|
17
15
|
unless @text_column
|
18
|
-
raise Paperclip::Error,
|
16
|
+
raise Paperclip::Error, 'No content text column given'
|
19
17
|
end
|
20
|
-
@clean = (RUBY_VERSION >=
|
18
|
+
@clean = (RUBY_VERSION >= '2.0' ? false : options.key?(:clean) ? !!options[:clean] : true)
|
21
19
|
end
|
22
20
|
|
23
21
|
# Extract the text of all the document
|
24
22
|
def make
|
25
23
|
destination_path = tmp_dir.to_s
|
26
|
-
options = {output: destination_path, clean: @clean}
|
24
|
+
options = { output: destination_path, clean: @clean }
|
27
25
|
options[:language] = (language.is_a?(Proc) ? language.call(attachment.instance) : language)
|
28
26
|
Docsplit.extract_text(file_path.to_s, options)
|
29
|
-
|
30
|
-
destination_file = File.join(destination_path, basename +
|
27
|
+
|
28
|
+
destination_file = File.join(destination_path, basename + '.txt')
|
31
29
|
instance = @attachment.instance
|
32
30
|
f = File.open(destination_file)
|
33
31
|
instance[text_column] = f.read
|
34
32
|
instance.run_callbacks(:save) { false }
|
35
33
|
f.close
|
36
34
|
|
37
|
-
|
35
|
+
File.open(file.path)
|
38
36
|
end
|
39
|
-
|
37
|
+
|
40
38
|
# Check if the default text column is present
|
41
39
|
def text_column?
|
42
40
|
expected_column = default_text_column
|
43
|
-
|
41
|
+
instance.class.columns.detect do |column|
|
44
42
|
column.name.to_s == expected_column
|
45
43
|
end
|
46
44
|
end
|
47
45
|
|
48
46
|
# Returns the name of the default text column
|
49
47
|
def default_text_column
|
50
|
-
@attachment.name.to_s +
|
48
|
+
@attachment.name.to_s + '_content_text'
|
51
49
|
end
|
52
|
-
|
53
50
|
end
|
54
|
-
|
55
51
|
end
|
56
52
|
end
|
57
53
|
end
|
@@ -1,17 +1,15 @@
|
|
1
1
|
module Paperclip
|
2
2
|
module Document
|
3
3
|
module Processors
|
4
|
-
|
5
4
|
# This processor extract first page as thumbnail
|
6
5
|
class Sketcher < Paperclip::Document::Processor
|
7
|
-
|
8
|
-
attr_accessor :format, :density, :format
|
6
|
+
attr_accessor :format, :density
|
9
7
|
|
10
8
|
def initialize(file, options = {}, attachment = nil)
|
11
9
|
super(file, options, attachment)
|
12
|
-
@format
|
10
|
+
@format = (options[:format] || :jpg).to_sym
|
13
11
|
unless [:jpg, :png].include?(@format)
|
14
|
-
raise Paperclip::Error,
|
12
|
+
raise Paperclip::Error, 'Valid format must be specified'
|
15
13
|
end
|
16
14
|
unless @size = options[:size]
|
17
15
|
@density = (options[:density] || 150).to_f
|
@@ -21,7 +19,7 @@ module Paperclip
|
|
21
19
|
# Extract the page
|
22
20
|
def make
|
23
21
|
destination_path = tmp_dir.to_s
|
24
|
-
options = {:
|
22
|
+
options = { output: destination_path, pages: [1], format: [@format] }
|
25
23
|
if @size
|
26
24
|
options[:size] = @size
|
27
25
|
elsif @density
|
@@ -32,12 +30,9 @@ module Paperclip
|
|
32
30
|
rescue
|
33
31
|
raise Paperclip::Error, "There was an error extracting the first thumbnail from #{basename}"
|
34
32
|
end
|
35
|
-
|
33
|
+
File.open(File.join(destination_path, basename + "_1.#{@format}"))
|
36
34
|
end
|
37
|
-
|
38
35
|
end
|
39
|
-
|
40
|
-
|
41
36
|
end
|
42
37
|
end
|
43
38
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: paperclip-document
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brice Texier
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-09-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: paperclip
|
@@ -19,7 +19,7 @@ dependencies:
|
|
19
19
|
version: '3.1'
|
20
20
|
- - "<"
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: '5'
|
22
|
+
version: '5.2'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -29,21 +29,21 @@ dependencies:
|
|
29
29
|
version: '3.1'
|
30
30
|
- - "<"
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: '5'
|
32
|
+
version: '5.2'
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: burisu-docsplit
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
36
36
|
requirements:
|
37
|
-
- - "
|
37
|
+
- - ">="
|
38
38
|
- !ruby/object:Gem::Version
|
39
|
-
version: 0.7.
|
39
|
+
version: 0.7.9
|
40
40
|
type: :runtime
|
41
41
|
prerelease: false
|
42
42
|
version_requirements: !ruby/object:Gem::Requirement
|
43
43
|
requirements:
|
44
|
-
- - "
|
44
|
+
- - ">="
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version: 0.7.
|
46
|
+
version: 0.7.9
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: bundler
|
49
49
|
requirement: !ruby/object:Gem::Requirement
|
@@ -166,9 +166,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
166
166
|
version: '0'
|
167
167
|
requirements: []
|
168
168
|
rubyforge_project:
|
169
|
-
rubygems_version: 2.4.5
|
169
|
+
rubygems_version: 2.4.5.1
|
170
170
|
signing_key:
|
171
171
|
specification_version: 4
|
172
172
|
summary: Processors for paperclip
|
173
173
|
test_files: []
|
174
|
-
has_rdoc:
|