paperclip-document 0.0.2 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e34d0f58cb3b703bf031a5bd79ea8e44ec7c1a54
4
- data.tar.gz: 71f1f58a05a92bb9b9356e7ccd9ffc5756749ed3
3
+ metadata.gz: 0eea80135b657f2b47ecc5948d4443be118cec19
4
+ data.tar.gz: f9b90dc80e759fb9809388ef49e1d0f1bfb30d3d
5
5
  SHA512:
6
- metadata.gz: b025b1102d126c9de8996907fcff766727a37f9883c1517faad68ca95fb4ebc5fe5017995ee354934d182028bb942d11a9a40dc3da741d044104a05372ad7f72
7
- data.tar.gz: 9461dd9fc91488397191adf30611e28a7d3d442f60b328d064a884aa040cadbcc5d9ba6f8279ed7b740ea2fb9fe836dd2e3eb6ff0e1dbde6e41fc16788ef24a9
6
+ metadata.gz: ede250c619b01cfa830a5b99a48d7c6153173244134782a1b53902b1f3d8b9e6f00c0ab036d5ccc660db4b7d6a5e5ec025afb27f8c5e642bae44cfedd2e0118a
7
+ data.tar.gz: f0e8f980c2eca47d43d19334f2119a0f6c211ed4df1e550c43086a5540238a5adb106c9091f1fcf54aa3a4d7be2111c3e9e75653292a166e173cefc40951bd49
@@ -1,46 +1,20 @@
1
1
  require "paperclip/document/version"
2
+ require "paperclip/document/attachment_extension"
2
3
  require "docsplit"
3
4
  require "pathname"
4
5
 
5
6
  module Paperclip
6
- # Main processor
7
- class DocumentProcessor < Processor
8
-
9
- attr_reader :instance, :tmp_dir
10
-
11
- def initialize(file, options = {}, attachment = nil)
12
- super(file, options, attachment)
13
- @instance = @attachment.instance
14
- # @tmp_dir = Pathname.new(Dir.tmpdir).join("paperclip-document", instance.class.name, attachment.name.to_s, basename, Time.now.to_i.to_s(36) + "-" + rand(1_000_000).to_s(36))
15
- # @tmp_dir = Rails.root.join("paperclip-document", instance.class.name, attachment.name.to_s, basename, Time.now.to_i.to_s(36) + "-" + rand(1_000_000).to_s(36))
16
- @tmp_dir = Pathname.new(Dir.tmpdir).join("paperclip-document-" + rand(1_000_000_000).to_s(36) + "-" + rand(1_000_000).to_s(36) + "-" + Time.now.to_i.to_s(36) + "-" + rand(1_000_000).to_s(36))
17
- end
18
-
19
- def file_path
20
- Pathname.new(@file.path)
21
- end
22
-
23
- def basename
24
- file_path.basename.to_s.gsub(/\.[^\.]+/, '')
25
- end
26
-
7
+ module Document
8
+ autoload :Processor, 'paperclip/document/processor'
9
+ autoload :Processors, 'paperclip/document/processors'
27
10
  end
28
11
 
29
- class Attachment
30
-
31
- # Returns the content_text of the file as originally extracted, and lives in the <attachment>_content_text attribute of the model.
32
- def content_text
33
- instance_read(:content_text)
34
- end
35
-
36
- # Returns the pages_count of the file as originally computed, and lives in the <attachment>_pages_count attribute of the model.
37
- def pages_count
38
- instance_read(:pages_count)
39
- end
40
-
12
+ configure do |c|
13
+ c.register_processor :sketcher, Document::Processors::Sketcher
14
+ c.register_processor :reader, Document::Processors::Reader
15
+ c.register_processor :freezer, Document::Processors::Freezer
16
+ c.register_processor :counter, Document::Processors::Counter
41
17
  end
42
-
43
18
 
44
19
  end
45
20
 
46
- require "paperclip/document/processors"
@@ -0,0 +1,17 @@
1
+ module Paperclip
2
+
3
+ class Attachment
4
+
5
+ # Returns the content_text of the file as originally extracted, and lives in the <attachment>_content_text attribute of the model.
6
+ def content_text
7
+ instance_read(:content_text)
8
+ end
9
+
10
+ # Returns the pages_count of the file as originally computed, and lives in the <attachment>_pages_count attribute of the model.
11
+ def pages_count
12
+ instance_read(:pages_count)
13
+ end
14
+
15
+ end
16
+
17
+ end
@@ -0,0 +1,25 @@
1
+ module Paperclip
2
+ module Document
3
+
4
+ # Main processor
5
+ class Processor < Paperclip::Processor
6
+
7
+ attr_reader :instance, :tmp_dir
8
+
9
+ def initialize(file, options = {}, attachment = nil)
10
+ super(file, options, attachment)
11
+ @instance = @attachment.instance
12
+ @tmp_dir = Pathname.new(Dir.tmpdir).join("paperclip-document-" + Time.now.to_i.to_s(36) + rand(1_000_000_000).to_s(36))
13
+ end
14
+
15
+ def file_path
16
+ Pathname.new(@file.path)
17
+ end
18
+
19
+ def basename
20
+ file_path.basename.to_s.gsub(/\.[^\.]+/, '')
21
+ end
22
+
23
+ end
24
+ end
25
+ end
@@ -1,4 +1,11 @@
1
- require "paperclip/document/processors/sketcher"
2
- require "paperclip/document/processors/reader"
3
- require "paperclip/document/processors/freezer"
4
- require "paperclip/document/processors/counter"
1
+ module Paperclip
2
+ module Document
3
+ module Processors
4
+ autoload :Sketcher, "paperclip/document/processors/sketcher"
5
+ autoload :Reader, "paperclip/document/processors/reader"
6
+ autoload :Freezer, "paperclip/document/processors/freezer"
7
+ autoload :Counter, "paperclip/document/processors/counter"
8
+ end
9
+ end
10
+ end
11
+
@@ -1,45 +1,49 @@
1
1
  module Paperclip
2
+ module Document
3
+ module Processors
4
+
5
+ # This processor extract the OCR text of the file
6
+ class Counter < Paperclip::Document::Processor
7
+
8
+ attr_accessor :pages_count_column
9
+
10
+ def initialize(file, options = {}, attachment = nil)
11
+ super(file, options, attachment)
12
+ if @options[:pages_count_column].nil? and pages_count_column?
13
+ @options[:pages_count_column] = default_pages_count_column
14
+ end
15
+ @pages_count_column = @options[:pages_count_column]
16
+
17
+ unless @pages_count_column
18
+ raise Paperclip::Error, "No pages count column given"
19
+ end
20
+ end
21
+
22
+ # Extract the pages count of all the document
23
+ def make
24
+ count = Docsplit.extract_length(file_path.to_s)
25
+
26
+ instance[pages_count_column] = count
27
+ instance.run_callbacks(:save) { false }
28
+
29
+ return file
30
+ end
31
+
32
+ # Check if a pages count column is present
33
+ def pages_count_column?
34
+ expected_column = default_pages_count_column
35
+ return @attachment.instance.class.columns.detect do |column|
36
+ column.name.to_s == expected_column
37
+ end
38
+ end
39
+
40
+ # Returns the name of the default pages count column
41
+ def default_pages_count_column
42
+ @attachment.name.to_s + "_pages_count"
43
+ end
2
44
 
3
- # This processor extract the OCR text of the file
4
- class Counter < DocumentProcessor
5
-
6
- attr_accessor :pages_count_column
7
-
8
- def initialize(file, options = {}, attachment = nil)
9
- super(file, options, attachment)
10
- if @options[:pages_count_column].nil? and pages_count_column?
11
- @options[:pages_count_column] = default_pages_count_column
12
- end
13
- @pages_count_column = @options[:pages_count_column]
14
-
15
- unless @pages_count_column
16
- raise Paperclip::Error, "No pages count column given"
17
45
  end
18
- end
19
-
20
- # Extract the pages count of all the document
21
- def make
22
- count = Docsplit.extract_length(file_path.to_s)
23
-
24
- instance[pages_count_column] = count
25
- instance.run_callbacks(:save) { false }
26
46
 
27
- return file
28
47
  end
29
-
30
- # Check if a pages count column is present
31
- def pages_count_column?
32
- expected_column = default_pages_count_column
33
- return @attachment.instance.class.columns.detect do |column|
34
- column.name.to_s == expected_column
35
- end
36
- end
37
-
38
- # Returns the name of the default pages count column
39
- def default_pages_count_column
40
- @attachment.name.to_s + "_pages_count"
41
- end
42
-
43
48
  end
44
-
45
49
  end
@@ -1,38 +1,36 @@
1
- require 'filemagic'
2
-
3
1
  module Paperclip
2
+ module Document
3
+ module Processors
4
+
5
+ # This processor converts document to PDF
6
+ class Freezer < Paperclip::Document::Processor
7
+ def initialize(file, options = {}, attachment = nil)
8
+ super
9
+ @format = options[:format]
10
+ unless @format == :pdf
11
+ raise Paperclip::Error, "Valid format (pdf) must be specified"
12
+ end
13
+ end
14
+
15
+ # Convert the document to pdf
16
+ def make
17
+ destination_path = tmp_dir.to_s
18
+ destination_file = File.join(destination_path, basename + ".#{@format}")
19
+ if pdf_format?
20
+ destination_file = file_path.to_s
21
+ else
22
+ Docsplit.extract_pdf(file_path.to_s, :output => destination_path)
23
+ end
24
+ return File.open(destination_file)
25
+ end
26
+
27
+
28
+ def pdf_format?
29
+ File.open(file_path, "rb", &:readline) =~ /\A\%PDF-\d+(\.\d+)?$/
30
+ end
4
31
 
5
- # This processor extract first page as thumbnail
6
- class Freezer < DocumentProcessor
7
- def initialize(file, options = {}, attachment = nil)
8
- super
9
- @format = options[:format]
10
- unless @format == :pdf
11
- raise Paperclip::Error, "Valid format (pdf) must be specified"
12
32
  end
13
- end
14
33
 
15
- # Convert the document to pdf
16
- def make
17
- destination_path = tmp_dir.to_s
18
- destination_file = File.join(destination_path, basename + ".#{@format}")
19
- if pdf_format?
20
- destination_file = file_path.to_s
21
- else
22
- Docsplit.extract_pdf(file_path.to_s, :output => destination_path)
23
- end
24
- return File.open(destination_file)
25
34
  end
26
-
27
-
28
- def pdf_format?
29
- file_magic = FileMagic.new
30
- type = file_magic.file(file_path.to_s)
31
- file_magic.close
32
- type =~ /pdf/i
33
- end
34
-
35
35
  end
36
-
37
-
38
36
  end
@@ -1,53 +1,57 @@
1
1
  module Paperclip
2
+ module Document
3
+ module Processors
4
+
5
+ # This processor extract the OCR text of the file
6
+ class Reader < Paperclip::Document::Processor
7
+
8
+ attr_accessor :clean, :text_column, :language
9
+
10
+ def initialize(file, options = {}, attachment = nil)
11
+ super(file, options, attachment)
12
+ if @options[:text_column].nil? and text_column?
13
+ @options[:text_column] = default_text_column
14
+ end
15
+ @language = @options[:language]
16
+ @text_column = @options[:text_column]
17
+ unless @text_column
18
+ raise Paperclip::Error, "No content text column given"
19
+ end
20
+ @clean = (RUBY_VERSION >= "2.0" ? false : options.has_key?(:clean) ? !!options[:clean] : true)
21
+ end
22
+
23
+ # Extract the text of all the document
24
+ def make
25
+ destination_path = tmp_dir.to_s
26
+ options = {output: destination_path, clean: @clean}
27
+ options[:language] = (language.is_a?(Proc) ? language.call(attachment.instance) : language)
28
+ Docsplit.extract_text(file_path.to_s, options)
29
+
30
+ destination_file = File.join(destination_path, basename + ".txt")
31
+ instance = @attachment.instance
32
+ f = File.open(destination_file)
33
+ instance[text_column] = f.read
34
+ instance.run_callbacks(:save) { false }
35
+ f.close
36
+
37
+ return file
38
+ end
39
+
40
+ # Check if the default text column is present
41
+ def text_column?
42
+ expected_column = default_text_column
43
+ return instance.class.columns.detect do |column|
44
+ column.name.to_s == expected_column
45
+ end
46
+ end
47
+
48
+ # Returns the name of the default text column
49
+ def default_text_column
50
+ @attachment.name.to_s + "_content_text"
51
+ end
2
52
 
3
- # This processor extract the OCR text of the file
4
- class Reader < DocumentProcessor
5
-
6
- attr_accessor :clean, :text_column, :language
7
-
8
- def initialize(file, options = {}, attachment = nil)
9
- super(file, options, attachment)
10
- if @options[:text_column].nil? and text_column?
11
- @options[:text_column] = default_text_column
12
- end
13
- @language = @options[:language]
14
- @text_column = @options[:text_column]
15
- unless @text_column
16
- raise Paperclip::Error, "No content text column given"
17
- end
18
- @clean = (RUBY_VERSION >= "2.0" ? false : options.has_key?(:clean) ? !!options[:clean] : true)
19
- end
20
-
21
- # Extract the text of all the document
22
- def make
23
- destination_path = tmp_dir.to_s
24
- options = {output: destination_path, clean: @clean}
25
- options[:language] = (language.is_a?(Proc) ? language.call(attachment.instance) : language)
26
- Docsplit.extract_text(file_path.to_s, options)
27
-
28
- destination_file = File.join(destination_path, basename + ".txt")
29
- instance = @attachment.instance
30
- f = File.open(destination_file)
31
- instance[text_column] = f.read
32
- instance.run_callbacks(:save) { false }
33
- f.close
34
-
35
- return file
36
- end
37
-
38
- # Check if the default text column is present
39
- def text_column?
40
- expected_column = default_text_column
41
- return instance.class.columns.detect do |column|
42
- column.name.to_s == expected_column
43
53
  end
44
- end
45
54
 
46
- # Returns the name of the default text column
47
- def default_text_column
48
- @attachment.name.to_s + "_content_text"
49
55
  end
50
-
51
56
  end
52
-
53
57
  end
@@ -1,39 +1,43 @@
1
1
  module Paperclip
2
+ module Document
3
+ module Processors
2
4
 
3
- # This processor extract first page as thumbnail
4
- class Sketcher < DocumentProcessor
5
+ # This processor extract first page as thumbnail
6
+ class Sketcher < Paperclip::Document::Processor
5
7
 
6
- attr_accessor :format, :density, :format
8
+ attr_accessor :format, :density, :format
7
9
 
8
- def initialize(file, options = {}, attachment = nil)
9
- super(file, options, attachment)
10
- @format = (options[:format] || :jpg).to_sym
11
- unless [:jpg, :png].include?(@format)
12
- raise Paperclip::Error, "Valid format must be specified"
13
- end
14
- unless @size = options[:size]
15
- @density = (options[:density] || 150).to_f
16
- end
17
- end
10
+ def initialize(file, options = {}, attachment = nil)
11
+ super(file, options, attachment)
12
+ @format = (options[:format] || :jpg).to_sym
13
+ unless [:jpg, :png].include?(@format)
14
+ raise Paperclip::Error, "Valid format must be specified"
15
+ end
16
+ unless @size = options[:size]
17
+ @density = (options[:density] || 150).to_f
18
+ end
19
+ end
18
20
 
19
- # Extract the page
20
- def make
21
- destination_path = tmp_dir.to_s
22
- options = {:output => destination_path, :pages => [1], :format => [@format]}
23
- if @size
24
- options[:size] = @size
25
- elsif @density
26
- options[:density] = @density
27
- end
28
- Docsplit.extract_images(file_path.to_s, options)
29
- begin
30
- rescue
31
- raise Paperclip::Error, "There was an error extracting the first thumbnail from #{basename}"
32
- end
33
- return File.open(File.join(destination_path, basename + "_1.#{@format}"))
34
- end
21
+ # Extract the page
22
+ def make
23
+ destination_path = tmp_dir.to_s
24
+ options = {:output => destination_path, :pages => [1], :format => [@format]}
25
+ if @size
26
+ options[:size] = @size
27
+ elsif @density
28
+ options[:density] = @density
29
+ end
30
+ Docsplit.extract_images(file_path.to_s, options)
31
+ begin
32
+ rescue
33
+ raise Paperclip::Error, "There was an error extracting the first thumbnail from #{basename}"
34
+ end
35
+ return File.open(File.join(destination_path, basename + "_1.#{@format}"))
36
+ end
35
37
 
36
- end
38
+ end
37
39
 
38
40
 
41
+ end
42
+ end
39
43
  end
@@ -1,5 +1,5 @@
1
1
  module Paperclip
2
2
  module Document
3
- VERSION = "0.0.2"
3
+ VERSION = "0.0.5"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,73 +1,99 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: paperclip-document
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brice Texier
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-07 00:00:00.000000000 Z
11
+ date: 2014-03-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: paperclip
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - '>='
18
18
  - !ruby/object:Gem::Version
19
19
  version: '3.1'
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: '5'
20
23
  type: :runtime
21
24
  prerelease: false
22
25
  version_requirements: !ruby/object:Gem::Requirement
23
26
  requirements:
24
- - - ~>
27
+ - - '>='
25
28
  - !ruby/object:Gem::Version
26
29
  version: '3.1'
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: '5'
27
33
  - !ruby/object:Gem::Dependency
28
- name: docsplit
34
+ name: burisu-docsplit
29
35
  requirement: !ruby/object:Gem::Requirement
30
36
  requirements:
31
- - - ~>
37
+ - - '>='
32
38
  - !ruby/object:Gem::Version
33
- version: 0.7.2
39
+ version: 0.7.6
34
40
  type: :runtime
35
41
  prerelease: false
36
42
  version_requirements: !ruby/object:Gem::Requirement
37
43
  requirements:
38
- - - ~>
44
+ - - '>='
39
45
  - !ruby/object:Gem::Version
40
- version: 0.7.2
46
+ version: 0.7.6
41
47
  - !ruby/object:Gem::Dependency
42
- name: ruby-filemagic
48
+ name: bundler
43
49
  requirement: !ruby/object:Gem::Requirement
44
50
  requirements:
45
51
  - - ~>
46
52
  - !ruby/object:Gem::Version
47
- version: 0.4.2
48
- type: :runtime
53
+ version: '1.3'
54
+ type: :development
49
55
  prerelease: false
50
56
  version_requirements: !ruby/object:Gem::Requirement
51
57
  requirements:
52
58
  - - ~>
53
59
  - !ruby/object:Gem::Version
54
- version: 0.4.2
60
+ version: '1.3'
55
61
  - !ruby/object:Gem::Dependency
56
- name: bundler
62
+ name: rake
57
63
  requirement: !ruby/object:Gem::Requirement
58
64
  requirements:
59
- - - ~>
65
+ - - '>='
60
66
  - !ruby/object:Gem::Version
61
- version: '1.3'
67
+ version: '0'
62
68
  type: :development
63
69
  prerelease: false
64
70
  version_requirements: !ruby/object:Gem::Requirement
65
71
  requirements:
66
- - - ~>
72
+ - - '>='
67
73
  - !ruby/object:Gem::Version
68
- version: '1.3'
74
+ version: '0'
69
75
  - !ruby/object:Gem::Dependency
70
- name: rake
76
+ name: rails
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - '>='
80
+ - !ruby/object:Gem::Version
81
+ version: '3.2'
82
+ - - <
83
+ - !ruby/object:Gem::Version
84
+ version: '5'
85
+ type: :development
86
+ prerelease: false
87
+ version_requirements: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '3.2'
92
+ - - <
93
+ - !ruby/object:Gem::Version
94
+ version: '5'
95
+ - !ruby/object:Gem::Dependency
96
+ name: coveralls
71
97
  requirement: !ruby/object:Gem::Requirement
72
98
  requirements:
73
99
  - - '>='
@@ -118,6 +144,8 @@ files:
118
144
  - LICENSE.txt
119
145
  - README.md
120
146
  - lib/paperclip/document.rb
147
+ - lib/paperclip/document/attachment_extension.rb
148
+ - lib/paperclip/document/processor.rb
121
149
  - lib/paperclip/document/processors.rb
122
150
  - lib/paperclip/document/processors/counter.rb
123
151
  - lib/paperclip/document/processors/freezer.rb