paperclip-document 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *~
2
+ *.gem
3
+ *.rbc
4
+ .bundle
5
+ .config
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in paperclip-document.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Brice Texier
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Paperclip::Document
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'paperclip-document'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install paperclip-document
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+ Rake::TestTask.new(:test) do |test|
5
+ test.libs << 'lib' << 'test'
6
+ test.pattern = 'test/**/test_*.rb'
7
+ test.verbose = true
8
+ end
9
+
10
+ task :default => :test
@@ -0,0 +1,44 @@
1
+ require "paperclip/document/version"
2
+ require "docsplit"
3
+ require "pathname"
4
+
5
+ module Paperclip
6
+ # Main processor
7
+ class DocumentProcessor < Processor
8
+
9
+ attr_reader :instance, :tmp_dir
10
+
11
+ def initialize(file, options = {}, attachment = nil)
12
+ super(file, options, attachment)
13
+ @instance = @attachment.instance
14
+ @tmp_dir = Pathname.new(Dir.tmpdir).join("paperclip-document", instance.class.name, attachment.name.to_s, basename, Time.now.to_i.to_s(36) + "-" + rand(1_000_000).to_s(36))
15
+ end
16
+
17
+ def file_path
18
+ Pathname.new(@file.path)
19
+ end
20
+
21
+ def basename
22
+ file_path.basename.to_s.gsub(/\.[^\.]+/, '')
23
+ end
24
+
25
+ end
26
+
27
+ class Attachment
28
+
29
+ # Returns the content_text of the file as originally extracted, and lives in the <attachment>_content_text attribute of the model.
30
+ def content_text
31
+ instance_read(:content_text)
32
+ end
33
+
34
+ # Returns the pages_count of the file as originally computed, and lives in the <attachment>_pages_count attribute of the model.
35
+ def pages_count
36
+ instance_read(:pages_count)
37
+ end
38
+
39
+ end
40
+
41
+
42
+ end
43
+
44
+ require "paperclip/document/processors"
@@ -0,0 +1,4 @@
1
+ require "paperclip/document/processors/sketcher"
2
+ require "paperclip/document/processors/reader"
3
+ require "paperclip/document/processors/freezer"
4
+ require "paperclip/document/processors/counter"
@@ -0,0 +1,45 @@
1
+ module Paperclip
2
+
3
+ # This processor extract the OCR text of the file
4
+ class Counter < DocumentProcessor
5
+
6
+ attr_accessor :pages_count_column
7
+
8
+ def initialize(file, options = {}, attachment = nil)
9
+ super(file, options, attachment)
10
+ if @options[:pages_count_column].nil? and pages_count_column?
11
+ @options[:pages_count_column] = default_pages_count_column
12
+ end
13
+ @pages_count_column = @options[:pages_count_column]
14
+
15
+ unless @pages_count_column
16
+ raise Paperclip::Error, "No pages count column given"
17
+ end
18
+ end
19
+
20
+ # Extract the pages count of all the document
21
+ def make
22
+ count = Docsplit.extract_length(file_path.to_s)
23
+
24
+ instance[pages_count_column] = count
25
+ instance.run_callbacks(:save) { false }
26
+
27
+ return file
28
+ end
29
+
30
+ # Check if a pages count column is present
31
+ def pages_count_column?
32
+ expected_column = default_pages_count_column
33
+ return @attachment.instance.class.columns.detect do |column|
34
+ column.name.to_s == expected_column
35
+ end
36
+ end
37
+
38
+ # Returns the name of the default pages count column
39
+ def default_pages_count_column
40
+ @attachment.name.to_s + "_pages_count"
41
+ end
42
+
43
+ end
44
+
45
+ end
@@ -0,0 +1,38 @@
1
+ require 'filemagic'
2
+
3
+ module Paperclip
4
+
5
+ # This processor extract first page as thumbnail
6
+ class Freezer < DocumentProcessor
7
+ def initialize(file, options = {}, attachment = nil)
8
+ super
9
+ @format = options[:format]
10
+ unless @format == :pdf
11
+ raise Paperclip::Error, "Valid format (pdf) must be specified"
12
+ end
13
+ end
14
+
15
+ # Convert the document to pdf
16
+ def make
17
+ destination_path = tmp_dir.to_s
18
+ destination_file = File.join(destination_path, basename + ".#{@format}")
19
+ if pdf_format?
20
+ destination_file = file_path.to_s
21
+ else
22
+ Docsplit.extract_pdf(file_path.to_s, :output => destination_path)
23
+ end
24
+ return File.open(destination_file)
25
+ end
26
+
27
+
28
+ def pdf_format?
29
+ file_magic = FileMagic.new
30
+ type = file_magic.file(file_path.to_s)
31
+ file_magic.close
32
+ type =~ /pdf/i
33
+ end
34
+
35
+ end
36
+
37
+
38
+ end
@@ -0,0 +1,53 @@
1
+ module Paperclip
2
+
3
+ # This processor extract the OCR text of the file
4
+ class Reader < DocumentProcessor
5
+
6
+ attr_accessor :clean, :text_column, :language
7
+
8
+ def initialize(file, options = {}, attachment = nil)
9
+ super(file, options, attachment)
10
+ if @options[:text_column].nil? and text_column?
11
+ @options[:text_column] = default_text_column
12
+ end
13
+ @language = @options[:language]
14
+ @text_column = @options[:text_column]
15
+ unless @text_column
16
+ raise Paperclip::Error, "No content text column given"
17
+ end
18
+ @clean = !!(options.has_key?(:clean) ? options[:clean] : true)
19
+ end
20
+
21
+ # Extract the text of all the document
22
+ def make
23
+ destination_path = tmp_dir.to_s
24
+ options = {:output => destination_path, :clean => @clean}
25
+ options[:language] = (language.is_a?(Proc) ? language.call(attachment.instance) : language)
26
+ Docsplit.extract_text(file_path.to_s, options)
27
+
28
+ destination_file = File.join(destination_path, basename + ".txt")
29
+ instance = @attachment.instance
30
+ f = File.open(destination_file)
31
+ instance[text_column] = f.read
32
+ instance.run_callbacks(:save) { false }
33
+ f.close
34
+
35
+ return file
36
+ end
37
+
38
+ # Check if the default text column is present
39
+ def text_column?
40
+ expected_column = default_text_column
41
+ return instance.class.columns.detect do |column|
42
+ column.name.to_s == expected_column
43
+ end
44
+ end
45
+
46
+ # Returns the name of the default text column
47
+ def default_text_column
48
+ @attachment.name.to_s + "_content_text"
49
+ end
50
+
51
+ end
52
+
53
+ end
@@ -0,0 +1,39 @@
1
+ module Paperclip
2
+
3
+ # This processor extract first page as thumbnail
4
+ class Sketcher < DocumentProcessor
5
+
6
+ attr_accessor :format, :density, :format
7
+
8
+ def initialize(file, options = {}, attachment = nil)
9
+ super(file, options, attachment)
10
+ @format = (options[:format] || :jpg).to_sym
11
+ unless [:jpg, :png].include?(@format)
12
+ raise Paperclip::Error, "Valid format must be specified"
13
+ end
14
+ unless @size = options[:size]
15
+ @density = (options[:density] || 150).to_f
16
+ end
17
+ end
18
+
19
+ # Extract the page
20
+ def make
21
+ destination_path = tmp_dir.to_s
22
+ options = {:output => destination_path, :pages => [1], :format => [@format]}
23
+ if @size
24
+ options[:size] = @size
25
+ elsif @density
26
+ options[:density] = @density
27
+ end
28
+ Docsplit.extract_images(file_path.to_s, options)
29
+ begin
30
+ rescue
31
+ raise Paperclip::Error, "There was an error extracting the first thumbnail from #{basename}"
32
+ end
33
+ return File.open(File.join(destination_path, basename + "_1.#{@format}"))
34
+ end
35
+
36
+ end
37
+
38
+
39
+ end
@@ -0,0 +1,5 @@
1
+ module Paperclip
2
+ module Document
3
+ VERSION = "0.0.0"
4
+ end
5
+ end
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'paperclip/document/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "paperclip-document"
8
+ spec.version = Paperclip::Document::VERSION
9
+ spec.authors = ["Brice Texier"]
10
+ spec.email = ["burisu@oneiros.fr"]
11
+ spec.summary = %q{Processors for paperclip}
12
+ spec.homepage = "http://github.com/burisu/paperclip-document"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files`.split($/)
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_dependency "paperclip", "~> 3.1"
21
+ spec.add_dependency "docsplit", "~> 0.7.2"
22
+ spec.add_dependency "ruby-filemagic", "~> 0.4.2"
23
+ spec.add_development_dependency "bundler", "~> 1.3"
24
+ spec.add_development_dependency "rake"
25
+ spec.add_development_dependency 'activerecord', "~> 3.2"
26
+ spec.add_development_dependency 'sqlite3'
27
+ end
Binary file
Binary file
Binary file
data/test/helper.rb ADDED
@@ -0,0 +1,37 @@
1
+ require 'paperclip'
2
+ require 'paperclip/railtie'
3
+ require 'active_record'
4
+ require 'pathname'
5
+ require 'paperclip/document'
6
+ require 'test/unit'
7
+
8
+ # Connect to sqlite
9
+ ActiveRecord::Base.establish_connection(
10
+ "adapter" => "sqlite3",
11
+ "database" => ":memory:"
12
+ )
13
+
14
+ ActiveRecord::Base.logger = Logger.new(nil)
15
+ load(File.join(File.dirname(__FILE__), 'schema.rb'))
16
+
17
+ Paperclip::Railtie.insert
18
+
19
+ class Document < ActiveRecord::Base
20
+ has_attached_file(:original,
21
+ :storage => :filesystem,
22
+ :path => "./tmp/documents/:id/:style.:extension",
23
+ :url => "/tmp/:id.:extension",
24
+ :styles => {
25
+ :archive => {:clean => true, :format =>:pdf, :processors => [:reader, :counter, :freezer]},
26
+ :thumbnail => {:processors => [:sketcher], :format => :jpg}
27
+ })
28
+ end
29
+
30
+
31
+
32
+ class Paperclip::Document::TestCase < Test::Unit::TestCase
33
+
34
+ def fixtures
35
+ Pathname.new(__FILE__).dirname.join("fixtures")
36
+ end
37
+ end
data/test/schema.rb ADDED
@@ -0,0 +1,11 @@
1
+ ActiveRecord::Schema.define :version => "001" do
2
+ create_table "documents", :force => true do |t|
3
+ t.string :name
4
+ t.string :original_file_name
5
+ t.string :original_content_type
6
+ t.integer :original_updated_at
7
+ t.integer :original_file_size
8
+ t.text :original_content_text
9
+ t.integer :original_pages_count
10
+ end
11
+ end
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+ require 'helper'
3
+
4
+ class TestProcessors < Paperclip::Document::TestCase
5
+
6
+ def test_odt
7
+ f = File.open(fixtures.join("example.odt"))
8
+ document = Document.create!(:name => "My first document", :original => f)
9
+ f.close
10
+
11
+ document.reload
12
+ assert_equal 1, document.original.pages_count
13
+
14
+ assert !document.original.content_text.nil?
15
+ assert document.original.content_text.match("This is an example.")
16
+ end
17
+
18
+ def test_pdf
19
+ f = File.open(fixtures.join("example.pdf"))
20
+ document = Document.create!(:name => "My second document", :original => f)
21
+ f.close
22
+
23
+ document.reload
24
+ assert_equal 1, document.original.pages_count
25
+
26
+ assert !document.original.content_text.nil?
27
+ assert document.original.content_text.match("This is an example.")
28
+ end
29
+
30
+ def test_docx
31
+ f = File.open(fixtures.join("example.docx"))
32
+ document = Document.create!(:name => "My third document", :original => f)
33
+ f.close
34
+
35
+ document.reload
36
+ assert_equal 1, document.original.pages_count
37
+
38
+ assert !document.original.content_text.nil?
39
+ assert document.original.content_text.match("This is an example.")
40
+ end
41
+
42
+ end
metadata ADDED
@@ -0,0 +1,184 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: paperclip-document
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Brice Texier
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-04-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: paperclip
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '3.1'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '3.1'
30
+ - !ruby/object:Gem::Dependency
31
+ name: docsplit
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 0.7.2
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 0.7.2
46
+ - !ruby/object:Gem::Dependency
47
+ name: ruby-filemagic
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 0.4.2
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 0.4.2
62
+ - !ruby/object:Gem::Dependency
63
+ name: bundler
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: '1.3'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: '1.3'
78
+ - !ruby/object:Gem::Dependency
79
+ name: rake
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: activerecord
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ~>
100
+ - !ruby/object:Gem::Version
101
+ version: '3.2'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: '3.2'
110
+ - !ruby/object:Gem::Dependency
111
+ name: sqlite3
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ description:
127
+ email:
128
+ - burisu@oneiros.fr
129
+ executables: []
130
+ extensions: []
131
+ extra_rdoc_files: []
132
+ files:
133
+ - .gitignore
134
+ - Gemfile
135
+ - LICENSE.txt
136
+ - README.md
137
+ - Rakefile
138
+ - lib/paperclip/document.rb
139
+ - lib/paperclip/document/processors.rb
140
+ - lib/paperclip/document/processors/counter.rb
141
+ - lib/paperclip/document/processors/freezer.rb
142
+ - lib/paperclip/document/processors/reader.rb
143
+ - lib/paperclip/document/processors/sketcher.rb
144
+ - lib/paperclip/document/version.rb
145
+ - paperclip-document.gemspec
146
+ - test/fixtures/example.docx
147
+ - test/fixtures/example.odt
148
+ - test/fixtures/example.pdf
149
+ - test/helper.rb
150
+ - test/schema.rb
151
+ - test/test_processors.rb
152
+ homepage: http://github.com/burisu/paperclip-document
153
+ licenses:
154
+ - MIT
155
+ post_install_message:
156
+ rdoc_options: []
157
+ require_paths:
158
+ - lib
159
+ required_ruby_version: !ruby/object:Gem::Requirement
160
+ none: false
161
+ requirements:
162
+ - - ! '>='
163
+ - !ruby/object:Gem::Version
164
+ version: '0'
165
+ required_rubygems_version: !ruby/object:Gem::Requirement
166
+ none: false
167
+ requirements:
168
+ - - ! '>='
169
+ - !ruby/object:Gem::Version
170
+ version: '0'
171
+ requirements: []
172
+ rubyforge_project:
173
+ rubygems_version: 1.8.23
174
+ signing_key:
175
+ specification_version: 3
176
+ summary: Processors for paperclip
177
+ test_files:
178
+ - test/fixtures/example.docx
179
+ - test/fixtures/example.odt
180
+ - test/fixtures/example.pdf
181
+ - test/helper.rb
182
+ - test/schema.rb
183
+ - test/test_processors.rb
184
+ has_rdoc: