paperclip-document 0.0.9 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/paperclip/document.rb +4 -6
- data/lib/paperclip/document/attachment_extension.rb +1 -5
- data/lib/paperclip/document/processor.rb +3 -6
- data/lib/paperclip/document/processors.rb +4 -5
- data/lib/paperclip/document/processors/counter.rb +6 -10
- data/lib/paperclip/document/processors/freezer.rb +4 -8
- data/lib/paperclip/document/processors/reader.rb +10 -14
- data/lib/paperclip/document/processors/sketcher.rb +5 -10
- data/lib/paperclip/document/version.rb +1 -1
- metadata +9 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ad4fc35c9bbaacd7c35f76007645a0fc1b559b49
|
4
|
+
data.tar.gz: 0446b4d6c51297634e1ecebcacd83ed3010973a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6095f13d9ee3379c9bb2077db885a41131f330c6d8fdc1fffce59e551c86cb6f87f8a1e10462e79c3d85170f0cee5331bb7303616c98b9cfe2f748eef9f4adab
|
7
|
+
data.tar.gz: 75850319470348136ebb14e876390b886445ff0943675d8b7653a0143e6b2c84c3415429975359260685874933470b82c6e83da477b8341818654c51d5e61435
|
data/lib/paperclip/document.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
1
|
+
require 'paperclip/document/version'
|
2
|
+
require 'paperclip/document/attachment_extension'
|
3
|
+
require 'docsplit'
|
4
|
+
require 'pathname'
|
5
5
|
|
6
6
|
module Paperclip
|
7
7
|
module Document
|
@@ -15,6 +15,4 @@ module Paperclip
|
|
15
15
|
c.register_processor :freezer, Document::Processors::Freezer
|
16
16
|
c.register_processor :counter, Document::Processors::Counter
|
17
17
|
end
|
18
|
-
|
19
18
|
end
|
20
|
-
|
@@ -1,17 +1,13 @@
|
|
1
1
|
module Paperclip
|
2
|
-
|
3
2
|
class Attachment
|
4
|
-
|
5
3
|
# Returns the content_text of the file as originally extracted, and lives in the <attachment>_content_text attribute of the model.
|
6
4
|
def content_text
|
7
5
|
instance_read(:content_text)
|
8
6
|
end
|
9
|
-
|
7
|
+
|
10
8
|
# Returns the pages_count of the file as originally computed, and lives in the <attachment>_pages_count attribute of the model.
|
11
9
|
def pages_count
|
12
10
|
instance_read(:pages_count)
|
13
11
|
end
|
14
|
-
|
15
12
|
end
|
16
|
-
|
17
13
|
end
|
@@ -1,17 +1,15 @@
|
|
1
1
|
module Paperclip
|
2
2
|
module Document
|
3
|
-
|
4
3
|
# Main processor
|
5
4
|
class Processor < Paperclip::Processor
|
6
|
-
|
7
5
|
attr_reader :instance, :tmp_dir
|
8
|
-
|
6
|
+
|
9
7
|
def initialize(file, options = {}, attachment = nil)
|
10
8
|
super(file, options, attachment)
|
11
9
|
@instance = @attachment.instance
|
12
|
-
@tmp_dir = Pathname.new(Dir.tmpdir).join(
|
10
|
+
@tmp_dir = Pathname.new(Dir.tmpdir).join('paperclip-document-' + Time.now.to_i.to_s(36) + rand(1_000_000_000).to_s(36))
|
13
11
|
end
|
14
|
-
|
12
|
+
|
15
13
|
def file_path
|
16
14
|
Pathname.new(@file.path)
|
17
15
|
end
|
@@ -19,7 +17,6 @@ module Paperclip
|
|
19
17
|
def basename
|
20
18
|
file_path.basename.to_s.gsub(/\.[^\.]+/, '')
|
21
19
|
end
|
22
|
-
|
23
20
|
end
|
24
21
|
end
|
25
22
|
end
|
@@ -1,11 +1,10 @@
|
|
1
1
|
module Paperclip
|
2
2
|
module Document
|
3
3
|
module Processors
|
4
|
-
autoload :Sketcher,
|
5
|
-
autoload :Reader,
|
6
|
-
autoload :Freezer,
|
7
|
-
autoload :Counter,
|
4
|
+
autoload :Sketcher, 'paperclip/document/processors/sketcher'
|
5
|
+
autoload :Reader, 'paperclip/document/processors/reader'
|
6
|
+
autoload :Freezer, 'paperclip/document/processors/freezer'
|
7
|
+
autoload :Counter, 'paperclip/document/processors/counter'
|
8
8
|
end
|
9
9
|
end
|
10
10
|
end
|
11
|
-
|
@@ -1,21 +1,19 @@
|
|
1
1
|
module Paperclip
|
2
2
|
module Document
|
3
3
|
module Processors
|
4
|
-
|
5
4
|
# This processor extract the OCR text of the file
|
6
5
|
class Counter < Paperclip::Document::Processor
|
7
|
-
|
8
6
|
attr_accessor :pages_count_column
|
9
7
|
|
10
8
|
def initialize(file, options = {}, attachment = nil)
|
11
9
|
super(file, options, attachment)
|
12
|
-
if @options[:pages_count_column].nil?
|
10
|
+
if @options[:pages_count_column].nil? && pages_count_column?
|
13
11
|
@options[:pages_count_column] = default_pages_count_column
|
14
12
|
end
|
15
13
|
@pages_count_column = @options[:pages_count_column]
|
16
14
|
|
17
15
|
unless @pages_count_column
|
18
|
-
raise Paperclip::Error,
|
16
|
+
raise Paperclip::Error, 'No pages count column given'
|
19
17
|
end
|
20
18
|
end
|
21
19
|
|
@@ -26,24 +24,22 @@ module Paperclip
|
|
26
24
|
instance[pages_count_column] = count
|
27
25
|
instance.run_callbacks(:save) { false }
|
28
26
|
|
29
|
-
|
27
|
+
File.open(file.path)
|
30
28
|
end
|
31
|
-
|
29
|
+
|
32
30
|
# Check if a pages count column is present
|
33
31
|
def pages_count_column?
|
34
32
|
expected_column = default_pages_count_column
|
35
|
-
|
33
|
+
@attachment.instance.class.columns.detect do |column|
|
36
34
|
column.name.to_s == expected_column
|
37
35
|
end
|
38
36
|
end
|
39
37
|
|
40
38
|
# Returns the name of the default pages count column
|
41
39
|
def default_pages_count_column
|
42
|
-
@attachment.name.to_s +
|
40
|
+
@attachment.name.to_s + '_pages_count'
|
43
41
|
end
|
44
|
-
|
45
42
|
end
|
46
|
-
|
47
43
|
end
|
48
44
|
end
|
49
45
|
end
|
@@ -1,14 +1,13 @@
|
|
1
1
|
module Paperclip
|
2
2
|
module Document
|
3
3
|
module Processors
|
4
|
-
|
5
4
|
# This processor converts document to PDF
|
6
5
|
class Freezer < Paperclip::Document::Processor
|
7
6
|
def initialize(file, options = {}, attachment = nil)
|
8
7
|
super
|
9
8
|
@format = options[:format]
|
10
9
|
unless @format == :pdf
|
11
|
-
raise Paperclip::Error,
|
10
|
+
raise Paperclip::Error, 'Valid format (pdf) must be specified'
|
12
11
|
end
|
13
12
|
end
|
14
13
|
|
@@ -19,18 +18,15 @@ module Paperclip
|
|
19
18
|
if pdf_format?
|
20
19
|
destination_file = file_path.to_s
|
21
20
|
else
|
22
|
-
Docsplit.extract_pdf(file_path.to_s, :
|
21
|
+
Docsplit.extract_pdf(file_path.to_s, output: destination_path)
|
23
22
|
end
|
24
|
-
|
23
|
+
File.open(destination_file)
|
25
24
|
end
|
26
25
|
|
27
|
-
|
28
26
|
def pdf_format?
|
29
|
-
File.open(file_path,
|
27
|
+
File.open(file_path, 'rb', &:readline).to_s =~ /\A\%PDF-\d+(\.\d+)?$/
|
30
28
|
end
|
31
|
-
|
32
29
|
end
|
33
|
-
|
34
30
|
end
|
35
31
|
end
|
36
32
|
end
|
@@ -1,57 +1,53 @@
|
|
1
1
|
module Paperclip
|
2
2
|
module Document
|
3
3
|
module Processors
|
4
|
-
|
5
4
|
# This processor extract the OCR text of the file
|
6
5
|
class Reader < Paperclip::Document::Processor
|
7
|
-
|
8
6
|
attr_accessor :clean, :text_column, :language
|
9
7
|
|
10
8
|
def initialize(file, options = {}, attachment = nil)
|
11
9
|
super(file, options, attachment)
|
12
|
-
if @options[:text_column].nil?
|
10
|
+
if @options[:text_column].nil? && text_column?
|
13
11
|
@options[:text_column] = default_text_column
|
14
12
|
end
|
15
13
|
@language = @options[:language]
|
16
14
|
@text_column = @options[:text_column]
|
17
15
|
unless @text_column
|
18
|
-
raise Paperclip::Error,
|
16
|
+
raise Paperclip::Error, 'No content text column given'
|
19
17
|
end
|
20
|
-
@clean = (RUBY_VERSION >=
|
18
|
+
@clean = (RUBY_VERSION >= '2.0' ? false : options.key?(:clean) ? !!options[:clean] : true)
|
21
19
|
end
|
22
20
|
|
23
21
|
# Extract the text of all the document
|
24
22
|
def make
|
25
23
|
destination_path = tmp_dir.to_s
|
26
|
-
options = {output: destination_path, clean: @clean}
|
24
|
+
options = { output: destination_path, clean: @clean }
|
27
25
|
options[:language] = (language.is_a?(Proc) ? language.call(attachment.instance) : language)
|
28
26
|
Docsplit.extract_text(file_path.to_s, options)
|
29
|
-
|
30
|
-
destination_file = File.join(destination_path, basename +
|
27
|
+
|
28
|
+
destination_file = File.join(destination_path, basename + '.txt')
|
31
29
|
instance = @attachment.instance
|
32
30
|
f = File.open(destination_file)
|
33
31
|
instance[text_column] = f.read
|
34
32
|
instance.run_callbacks(:save) { false }
|
35
33
|
f.close
|
36
34
|
|
37
|
-
|
35
|
+
File.open(file.path)
|
38
36
|
end
|
39
|
-
|
37
|
+
|
40
38
|
# Check if the default text column is present
|
41
39
|
def text_column?
|
42
40
|
expected_column = default_text_column
|
43
|
-
|
41
|
+
instance.class.columns.detect do |column|
|
44
42
|
column.name.to_s == expected_column
|
45
43
|
end
|
46
44
|
end
|
47
45
|
|
48
46
|
# Returns the name of the default text column
|
49
47
|
def default_text_column
|
50
|
-
@attachment.name.to_s +
|
48
|
+
@attachment.name.to_s + '_content_text'
|
51
49
|
end
|
52
|
-
|
53
50
|
end
|
54
|
-
|
55
51
|
end
|
56
52
|
end
|
57
53
|
end
|
@@ -1,17 +1,15 @@
|
|
1
1
|
module Paperclip
|
2
2
|
module Document
|
3
3
|
module Processors
|
4
|
-
|
5
4
|
# This processor extract first page as thumbnail
|
6
5
|
class Sketcher < Paperclip::Document::Processor
|
7
|
-
|
8
|
-
attr_accessor :format, :density, :format
|
6
|
+
attr_accessor :format, :density
|
9
7
|
|
10
8
|
def initialize(file, options = {}, attachment = nil)
|
11
9
|
super(file, options, attachment)
|
12
|
-
@format
|
10
|
+
@format = (options[:format] || :jpg).to_sym
|
13
11
|
unless [:jpg, :png].include?(@format)
|
14
|
-
raise Paperclip::Error,
|
12
|
+
raise Paperclip::Error, 'Valid format must be specified'
|
15
13
|
end
|
16
14
|
unless @size = options[:size]
|
17
15
|
@density = (options[:density] || 150).to_f
|
@@ -21,7 +19,7 @@ module Paperclip
|
|
21
19
|
# Extract the page
|
22
20
|
def make
|
23
21
|
destination_path = tmp_dir.to_s
|
24
|
-
options = {:
|
22
|
+
options = { output: destination_path, pages: [1], format: [@format] }
|
25
23
|
if @size
|
26
24
|
options[:size] = @size
|
27
25
|
elsif @density
|
@@ -32,12 +30,9 @@ module Paperclip
|
|
32
30
|
rescue
|
33
31
|
raise Paperclip::Error, "There was an error extracting the first thumbnail from #{basename}"
|
34
32
|
end
|
35
|
-
|
33
|
+
File.open(File.join(destination_path, basename + "_1.#{@format}"))
|
36
34
|
end
|
37
|
-
|
38
35
|
end
|
39
|
-
|
40
|
-
|
41
36
|
end
|
42
37
|
end
|
43
38
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: paperclip-document
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brice Texier
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-09-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: paperclip
|
@@ -19,7 +19,7 @@ dependencies:
|
|
19
19
|
version: '3.1'
|
20
20
|
- - "<"
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: '5'
|
22
|
+
version: '5.2'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -29,21 +29,21 @@ dependencies:
|
|
29
29
|
version: '3.1'
|
30
30
|
- - "<"
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: '5'
|
32
|
+
version: '5.2'
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: burisu-docsplit
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
36
36
|
requirements:
|
37
|
-
- - "
|
37
|
+
- - ">="
|
38
38
|
- !ruby/object:Gem::Version
|
39
|
-
version: 0.7.
|
39
|
+
version: 0.7.9
|
40
40
|
type: :runtime
|
41
41
|
prerelease: false
|
42
42
|
version_requirements: !ruby/object:Gem::Requirement
|
43
43
|
requirements:
|
44
|
-
- - "
|
44
|
+
- - ">="
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version: 0.7.
|
46
|
+
version: 0.7.9
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: bundler
|
49
49
|
requirement: !ruby/object:Gem::Requirement
|
@@ -166,9 +166,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
166
166
|
version: '0'
|
167
167
|
requirements: []
|
168
168
|
rubyforge_project:
|
169
|
-
rubygems_version: 2.4.5
|
169
|
+
rubygems_version: 2.4.5.1
|
170
170
|
signing_key:
|
171
171
|
specification_version: 4
|
172
172
|
summary: Processors for paperclip
|
173
173
|
test_files: []
|
174
|
-
has_rdoc:
|