paperclip-document 0.0.2 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e34d0f58cb3b703bf031a5bd79ea8e44ec7c1a54
4
- data.tar.gz: 71f1f58a05a92bb9b9356e7ccd9ffc5756749ed3
3
+ metadata.gz: 0eea80135b657f2b47ecc5948d4443be118cec19
4
+ data.tar.gz: f9b90dc80e759fb9809388ef49e1d0f1bfb30d3d
5
5
  SHA512:
6
- metadata.gz: b025b1102d126c9de8996907fcff766727a37f9883c1517faad68ca95fb4ebc5fe5017995ee354934d182028bb942d11a9a40dc3da741d044104a05372ad7f72
7
- data.tar.gz: 9461dd9fc91488397191adf30611e28a7d3d442f60b328d064a884aa040cadbcc5d9ba6f8279ed7b740ea2fb9fe836dd2e3eb6ff0e1dbde6e41fc16788ef24a9
6
+ metadata.gz: ede250c619b01cfa830a5b99a48d7c6153173244134782a1b53902b1f3d8b9e6f00c0ab036d5ccc660db4b7d6a5e5ec025afb27f8c5e642bae44cfedd2e0118a
7
+ data.tar.gz: f0e8f980c2eca47d43d19334f2119a0f6c211ed4df1e550c43086a5540238a5adb106c9091f1fcf54aa3a4d7be2111c3e9e75653292a166e173cefc40951bd49
@@ -1,46 +1,20 @@
1
1
  require "paperclip/document/version"
2
+ require "paperclip/document/attachment_extension"
2
3
  require "docsplit"
3
4
  require "pathname"
4
5
 
5
6
  module Paperclip
6
- # Main processor
7
- class DocumentProcessor < Processor
8
-
9
- attr_reader :instance, :tmp_dir
10
-
11
- def initialize(file, options = {}, attachment = nil)
12
- super(file, options, attachment)
13
- @instance = @attachment.instance
14
- # @tmp_dir = Pathname.new(Dir.tmpdir).join("paperclip-document", instance.class.name, attachment.name.to_s, basename, Time.now.to_i.to_s(36) + "-" + rand(1_000_000).to_s(36))
15
- # @tmp_dir = Rails.root.join("paperclip-document", instance.class.name, attachment.name.to_s, basename, Time.now.to_i.to_s(36) + "-" + rand(1_000_000).to_s(36))
16
- @tmp_dir = Pathname.new(Dir.tmpdir).join("paperclip-document-" + rand(1_000_000_000).to_s(36) + "-" + rand(1_000_000).to_s(36) + "-" + Time.now.to_i.to_s(36) + "-" + rand(1_000_000).to_s(36))
17
- end
18
-
19
- def file_path
20
- Pathname.new(@file.path)
21
- end
22
-
23
- def basename
24
- file_path.basename.to_s.gsub(/\.[^\.]+/, '')
25
- end
26
-
7
+ module Document
8
+ autoload :Processor, 'paperclip/document/processor'
9
+ autoload :Processors, 'paperclip/document/processors'
27
10
  end
28
11
 
29
- class Attachment
30
-
31
- # Returns the content_text of the file as originally extracted, and lives in the <attachment>_content_text attribute of the model.
32
- def content_text
33
- instance_read(:content_text)
34
- end
35
-
36
- # Returns the pages_count of the file as originally computed, and lives in the <attachment>_pages_count attribute of the model.
37
- def pages_count
38
- instance_read(:pages_count)
39
- end
40
-
12
+ configure do |c|
13
+ c.register_processor :sketcher, Document::Processors::Sketcher
14
+ c.register_processor :reader, Document::Processors::Reader
15
+ c.register_processor :freezer, Document::Processors::Freezer
16
+ c.register_processor :counter, Document::Processors::Counter
41
17
  end
42
-
43
18
 
44
19
  end
45
20
 
46
- require "paperclip/document/processors"
@@ -0,0 +1,17 @@
1
+ module Paperclip
2
+
3
+ class Attachment
4
+
5
+ # Returns the content_text of the file as originally extracted, and lives in the <attachment>_content_text attribute of the model.
6
+ def content_text
7
+ instance_read(:content_text)
8
+ end
9
+
10
+ # Returns the pages_count of the file as originally computed, and lives in the <attachment>_pages_count attribute of the model.
11
+ def pages_count
12
+ instance_read(:pages_count)
13
+ end
14
+
15
+ end
16
+
17
+ end
@@ -0,0 +1,25 @@
1
+ module Paperclip
2
+ module Document
3
+
4
+ # Main processor
5
+ class Processor < Paperclip::Processor
6
+
7
+ attr_reader :instance, :tmp_dir
8
+
9
+ def initialize(file, options = {}, attachment = nil)
10
+ super(file, options, attachment)
11
+ @instance = @attachment.instance
12
+ @tmp_dir = Pathname.new(Dir.tmpdir).join("paperclip-document-" + Time.now.to_i.to_s(36) + rand(1_000_000_000).to_s(36))
13
+ end
14
+
15
+ def file_path
16
+ Pathname.new(@file.path)
17
+ end
18
+
19
+ def basename
20
+ file_path.basename.to_s.gsub(/\.[^\.]+/, '')
21
+ end
22
+
23
+ end
24
+ end
25
+ end
@@ -1,4 +1,11 @@
1
- require "paperclip/document/processors/sketcher"
2
- require "paperclip/document/processors/reader"
3
- require "paperclip/document/processors/freezer"
4
- require "paperclip/document/processors/counter"
1
+ module Paperclip
2
+ module Document
3
+ module Processors
4
+ autoload :Sketcher, "paperclip/document/processors/sketcher"
5
+ autoload :Reader, "paperclip/document/processors/reader"
6
+ autoload :Freezer, "paperclip/document/processors/freezer"
7
+ autoload :Counter, "paperclip/document/processors/counter"
8
+ end
9
+ end
10
+ end
11
+
@@ -1,45 +1,49 @@
1
1
  module Paperclip
2
+ module Document
3
+ module Processors
4
+
5
+ # This processor extract the OCR text of the file
6
+ class Counter < Paperclip::Document::Processor
7
+
8
+ attr_accessor :pages_count_column
9
+
10
+ def initialize(file, options = {}, attachment = nil)
11
+ super(file, options, attachment)
12
+ if @options[:pages_count_column].nil? and pages_count_column?
13
+ @options[:pages_count_column] = default_pages_count_column
14
+ end
15
+ @pages_count_column = @options[:pages_count_column]
16
+
17
+ unless @pages_count_column
18
+ raise Paperclip::Error, "No pages count column given"
19
+ end
20
+ end
21
+
22
+ # Extract the pages count of all the document
23
+ def make
24
+ count = Docsplit.extract_length(file_path.to_s)
25
+
26
+ instance[pages_count_column] = count
27
+ instance.run_callbacks(:save) { false }
28
+
29
+ return file
30
+ end
31
+
32
+ # Check if a pages count column is present
33
+ def pages_count_column?
34
+ expected_column = default_pages_count_column
35
+ return @attachment.instance.class.columns.detect do |column|
36
+ column.name.to_s == expected_column
37
+ end
38
+ end
39
+
40
+ # Returns the name of the default pages count column
41
+ def default_pages_count_column
42
+ @attachment.name.to_s + "_pages_count"
43
+ end
2
44
 
3
- # This processor extract the OCR text of the file
4
- class Counter < DocumentProcessor
5
-
6
- attr_accessor :pages_count_column
7
-
8
- def initialize(file, options = {}, attachment = nil)
9
- super(file, options, attachment)
10
- if @options[:pages_count_column].nil? and pages_count_column?
11
- @options[:pages_count_column] = default_pages_count_column
12
- end
13
- @pages_count_column = @options[:pages_count_column]
14
-
15
- unless @pages_count_column
16
- raise Paperclip::Error, "No pages count column given"
17
45
  end
18
- end
19
-
20
- # Extract the pages count of all the document
21
- def make
22
- count = Docsplit.extract_length(file_path.to_s)
23
-
24
- instance[pages_count_column] = count
25
- instance.run_callbacks(:save) { false }
26
46
 
27
- return file
28
47
  end
29
-
30
- # Check if a pages count column is present
31
- def pages_count_column?
32
- expected_column = default_pages_count_column
33
- return @attachment.instance.class.columns.detect do |column|
34
- column.name.to_s == expected_column
35
- end
36
- end
37
-
38
- # Returns the name of the default pages count column
39
- def default_pages_count_column
40
- @attachment.name.to_s + "_pages_count"
41
- end
42
-
43
48
  end
44
-
45
49
  end
@@ -1,38 +1,36 @@
1
- require 'filemagic'
2
-
3
1
  module Paperclip
2
+ module Document
3
+ module Processors
4
+
5
+ # This processor converts document to PDF
6
+ class Freezer < Paperclip::Document::Processor
7
+ def initialize(file, options = {}, attachment = nil)
8
+ super
9
+ @format = options[:format]
10
+ unless @format == :pdf
11
+ raise Paperclip::Error, "Valid format (pdf) must be specified"
12
+ end
13
+ end
14
+
15
+ # Convert the document to pdf
16
+ def make
17
+ destination_path = tmp_dir.to_s
18
+ destination_file = File.join(destination_path, basename + ".#{@format}")
19
+ if pdf_format?
20
+ destination_file = file_path.to_s
21
+ else
22
+ Docsplit.extract_pdf(file_path.to_s, :output => destination_path)
23
+ end
24
+ return File.open(destination_file)
25
+ end
26
+
27
+
28
+ def pdf_format?
29
+ File.open(file_path, "rb", &:readline) =~ /\A\%PDF-\d+(\.\d+)?$/
30
+ end
4
31
 
5
- # This processor extract first page as thumbnail
6
- class Freezer < DocumentProcessor
7
- def initialize(file, options = {}, attachment = nil)
8
- super
9
- @format = options[:format]
10
- unless @format == :pdf
11
- raise Paperclip::Error, "Valid format (pdf) must be specified"
12
32
  end
13
- end
14
33
 
15
- # Convert the document to pdf
16
- def make
17
- destination_path = tmp_dir.to_s
18
- destination_file = File.join(destination_path, basename + ".#{@format}")
19
- if pdf_format?
20
- destination_file = file_path.to_s
21
- else
22
- Docsplit.extract_pdf(file_path.to_s, :output => destination_path)
23
- end
24
- return File.open(destination_file)
25
34
  end
26
-
27
-
28
- def pdf_format?
29
- file_magic = FileMagic.new
30
- type = file_magic.file(file_path.to_s)
31
- file_magic.close
32
- type =~ /pdf/i
33
- end
34
-
35
35
  end
36
-
37
-
38
36
  end
@@ -1,53 +1,57 @@
1
1
  module Paperclip
2
+ module Document
3
+ module Processors
4
+
5
+ # This processor extract the OCR text of the file
6
+ class Reader < Paperclip::Document::Processor
7
+
8
+ attr_accessor :clean, :text_column, :language
9
+
10
+ def initialize(file, options = {}, attachment = nil)
11
+ super(file, options, attachment)
12
+ if @options[:text_column].nil? and text_column?
13
+ @options[:text_column] = default_text_column
14
+ end
15
+ @language = @options[:language]
16
+ @text_column = @options[:text_column]
17
+ unless @text_column
18
+ raise Paperclip::Error, "No content text column given"
19
+ end
20
+ @clean = (RUBY_VERSION >= "2.0" ? false : options.has_key?(:clean) ? !!options[:clean] : true)
21
+ end
22
+
23
+ # Extract the text of all the document
24
+ def make
25
+ destination_path = tmp_dir.to_s
26
+ options = {output: destination_path, clean: @clean}
27
+ options[:language] = (language.is_a?(Proc) ? language.call(attachment.instance) : language)
28
+ Docsplit.extract_text(file_path.to_s, options)
29
+
30
+ destination_file = File.join(destination_path, basename + ".txt")
31
+ instance = @attachment.instance
32
+ f = File.open(destination_file)
33
+ instance[text_column] = f.read
34
+ instance.run_callbacks(:save) { false }
35
+ f.close
36
+
37
+ return file
38
+ end
39
+
40
+ # Check if the default text column is present
41
+ def text_column?
42
+ expected_column = default_text_column
43
+ return instance.class.columns.detect do |column|
44
+ column.name.to_s == expected_column
45
+ end
46
+ end
47
+
48
+ # Returns the name of the default text column
49
+ def default_text_column
50
+ @attachment.name.to_s + "_content_text"
51
+ end
2
52
 
3
- # This processor extract the OCR text of the file
4
- class Reader < DocumentProcessor
5
-
6
- attr_accessor :clean, :text_column, :language
7
-
8
- def initialize(file, options = {}, attachment = nil)
9
- super(file, options, attachment)
10
- if @options[:text_column].nil? and text_column?
11
- @options[:text_column] = default_text_column
12
- end
13
- @language = @options[:language]
14
- @text_column = @options[:text_column]
15
- unless @text_column
16
- raise Paperclip::Error, "No content text column given"
17
- end
18
- @clean = (RUBY_VERSION >= "2.0" ? false : options.has_key?(:clean) ? !!options[:clean] : true)
19
- end
20
-
21
- # Extract the text of all the document
22
- def make
23
- destination_path = tmp_dir.to_s
24
- options = {output: destination_path, clean: @clean}
25
- options[:language] = (language.is_a?(Proc) ? language.call(attachment.instance) : language)
26
- Docsplit.extract_text(file_path.to_s, options)
27
-
28
- destination_file = File.join(destination_path, basename + ".txt")
29
- instance = @attachment.instance
30
- f = File.open(destination_file)
31
- instance[text_column] = f.read
32
- instance.run_callbacks(:save) { false }
33
- f.close
34
-
35
- return file
36
- end
37
-
38
- # Check if the default text column is present
39
- def text_column?
40
- expected_column = default_text_column
41
- return instance.class.columns.detect do |column|
42
- column.name.to_s == expected_column
43
53
  end
44
- end
45
54
 
46
- # Returns the name of the default text column
47
- def default_text_column
48
- @attachment.name.to_s + "_content_text"
49
55
  end
50
-
51
56
  end
52
-
53
57
  end
@@ -1,39 +1,43 @@
1
1
  module Paperclip
2
+ module Document
3
+ module Processors
2
4
 
3
- # This processor extract first page as thumbnail
4
- class Sketcher < DocumentProcessor
5
+ # This processor extract first page as thumbnail
6
+ class Sketcher < Paperclip::Document::Processor
5
7
 
6
- attr_accessor :format, :density, :format
8
+ attr_accessor :format, :density, :format
7
9
 
8
- def initialize(file, options = {}, attachment = nil)
9
- super(file, options, attachment)
10
- @format = (options[:format] || :jpg).to_sym
11
- unless [:jpg, :png].include?(@format)
12
- raise Paperclip::Error, "Valid format must be specified"
13
- end
14
- unless @size = options[:size]
15
- @density = (options[:density] || 150).to_f
16
- end
17
- end
10
+ def initialize(file, options = {}, attachment = nil)
11
+ super(file, options, attachment)
12
+ @format = (options[:format] || :jpg).to_sym
13
+ unless [:jpg, :png].include?(@format)
14
+ raise Paperclip::Error, "Valid format must be specified"
15
+ end
16
+ unless @size = options[:size]
17
+ @density = (options[:density] || 150).to_f
18
+ end
19
+ end
18
20
 
19
- # Extract the page
20
- def make
21
- destination_path = tmp_dir.to_s
22
- options = {:output => destination_path, :pages => [1], :format => [@format]}
23
- if @size
24
- options[:size] = @size
25
- elsif @density
26
- options[:density] = @density
27
- end
28
- Docsplit.extract_images(file_path.to_s, options)
29
- begin
30
- rescue
31
- raise Paperclip::Error, "There was an error extracting the first thumbnail from #{basename}"
32
- end
33
- return File.open(File.join(destination_path, basename + "_1.#{@format}"))
34
- end
21
+ # Extract the page
22
+ def make
23
+ destination_path = tmp_dir.to_s
24
+ options = {:output => destination_path, :pages => [1], :format => [@format]}
25
+ if @size
26
+ options[:size] = @size
27
+ elsif @density
28
+ options[:density] = @density
29
+ end
30
+ Docsplit.extract_images(file_path.to_s, options)
31
+ begin
32
+ rescue
33
+ raise Paperclip::Error, "There was an error extracting the first thumbnail from #{basename}"
34
+ end
35
+ return File.open(File.join(destination_path, basename + "_1.#{@format}"))
36
+ end
35
37
 
36
- end
38
+ end
37
39
 
38
40
 
41
+ end
42
+ end
39
43
  end
@@ -1,5 +1,5 @@
1
1
  module Paperclip
2
2
  module Document
3
- VERSION = "0.0.2"
3
+ VERSION = "0.0.5"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,73 +1,99 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: paperclip-document
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brice Texier
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-07 00:00:00.000000000 Z
11
+ date: 2014-03-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: paperclip
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - '>='
18
18
  - !ruby/object:Gem::Version
19
19
  version: '3.1'
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: '5'
20
23
  type: :runtime
21
24
  prerelease: false
22
25
  version_requirements: !ruby/object:Gem::Requirement
23
26
  requirements:
24
- - - ~>
27
+ - - '>='
25
28
  - !ruby/object:Gem::Version
26
29
  version: '3.1'
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: '5'
27
33
  - !ruby/object:Gem::Dependency
28
- name: docsplit
34
+ name: burisu-docsplit
29
35
  requirement: !ruby/object:Gem::Requirement
30
36
  requirements:
31
- - - ~>
37
+ - - '>='
32
38
  - !ruby/object:Gem::Version
33
- version: 0.7.2
39
+ version: 0.7.6
34
40
  type: :runtime
35
41
  prerelease: false
36
42
  version_requirements: !ruby/object:Gem::Requirement
37
43
  requirements:
38
- - - ~>
44
+ - - '>='
39
45
  - !ruby/object:Gem::Version
40
- version: 0.7.2
46
+ version: 0.7.6
41
47
  - !ruby/object:Gem::Dependency
42
- name: ruby-filemagic
48
+ name: bundler
43
49
  requirement: !ruby/object:Gem::Requirement
44
50
  requirements:
45
51
  - - ~>
46
52
  - !ruby/object:Gem::Version
47
- version: 0.4.2
48
- type: :runtime
53
+ version: '1.3'
54
+ type: :development
49
55
  prerelease: false
50
56
  version_requirements: !ruby/object:Gem::Requirement
51
57
  requirements:
52
58
  - - ~>
53
59
  - !ruby/object:Gem::Version
54
- version: 0.4.2
60
+ version: '1.3'
55
61
  - !ruby/object:Gem::Dependency
56
- name: bundler
62
+ name: rake
57
63
  requirement: !ruby/object:Gem::Requirement
58
64
  requirements:
59
- - - ~>
65
+ - - '>='
60
66
  - !ruby/object:Gem::Version
61
- version: '1.3'
67
+ version: '0'
62
68
  type: :development
63
69
  prerelease: false
64
70
  version_requirements: !ruby/object:Gem::Requirement
65
71
  requirements:
66
- - - ~>
72
+ - - '>='
67
73
  - !ruby/object:Gem::Version
68
- version: '1.3'
74
+ version: '0'
69
75
  - !ruby/object:Gem::Dependency
70
- name: rake
76
+ name: rails
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - '>='
80
+ - !ruby/object:Gem::Version
81
+ version: '3.2'
82
+ - - <
83
+ - !ruby/object:Gem::Version
84
+ version: '5'
85
+ type: :development
86
+ prerelease: false
87
+ version_requirements: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '3.2'
92
+ - - <
93
+ - !ruby/object:Gem::Version
94
+ version: '5'
95
+ - !ruby/object:Gem::Dependency
96
+ name: coveralls
71
97
  requirement: !ruby/object:Gem::Requirement
72
98
  requirements:
73
99
  - - '>='
@@ -118,6 +144,8 @@ files:
118
144
  - LICENSE.txt
119
145
  - README.md
120
146
  - lib/paperclip/document.rb
147
+ - lib/paperclip/document/attachment_extension.rb
148
+ - lib/paperclip/document/processor.rb
121
149
  - lib/paperclip/document/processors.rb
122
150
  - lib/paperclip/document/processors/counter.rb
123
151
  - lib/paperclip/document/processors/freezer.rb