docsplit_images 0.1.9 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +68 -26
- data/VERSION +1 -1
- data/docsplit_images.gemspec +3 -3
- data/lib/docsplit_images/conversion.rb +26 -3
- data/lib/docsplit_images.rb +1 -0
- metadata +4 -4
data/README.markdown
CHANGED
@@ -10,62 +10,80 @@ Docsplit images is used to convert a document file (pdf, xls, xlsx, ppt, pptx, d
|
|
10
10
|
|
11
11
|
#### 1. Install GraphicsMagick. Its ‘gm’ command is used to generate images. Either compile it from source, or use a package manager:
|
12
12
|
|
13
|
-
|
14
|
-
|
13
|
+
```bash
|
14
|
+
[aptitude | port | brew] install graphicsmagick
|
15
|
+
```
|
16
|
+
|
15
17
|
#### 2. Install Poppler. On Linux, use aptitude, apt-get or yum:
|
16
18
|
|
17
|
-
|
19
|
+
```bash
|
20
|
+
aptitude install poppler-utils poppler-data
|
21
|
+
```
|
18
22
|
|
19
23
|
On Mac, you can install from source or use MacPorts:
|
20
24
|
|
21
|
-
|
25
|
+
```bash
|
26
|
+
sudo port install poppler | brew install poppler
|
27
|
+
```
|
22
28
|
|
23
29
|
#### 3. (Optional) Install Ghostscript:
|
24
30
|
|
25
|
-
|
31
|
+
```bash
|
32
|
+
[aptitude | port | brew] install ghostscript
|
33
|
+
```
|
26
34
|
|
27
35
|
Ghostscript is required to convert PDF and Postscript files.
|
28
36
|
|
29
37
|
#### 4. (Optional) Install Tesseract:
|
30
38
|
|
31
|
-
|
39
|
+
```bash
|
40
|
+
[aptitude | port | brew] install [tesseract | tesseract-ocr]
|
41
|
+
```
|
32
42
|
|
33
43
|
Without Tesseract installed, you'll still be able to extract text from documents, but you won't be able to automatically OCR them.
|
34
44
|
|
35
45
|
#### 5. (Optional) Install pdftk. On Linux, use aptitude, apt-get or yum:
|
36
46
|
|
37
|
-
|
47
|
+
```bash
|
48
|
+
aptitude install pdftk
|
49
|
+
```
|
38
50
|
|
39
51
|
On the Mac, you can download a [http://www.pdflabs.com/docs/install-pdftk/](recent installer for the binary). Without pdftk installed, you can use Docsplit, but won't be able to split apart a multi-page PDF into single-page PDFs.
|
40
52
|
|
41
53
|
#### 6. (Optional) Install OpenOffice. On Linux, use aptitude, apt-get or yum:
|
42
54
|
|
43
|
-
|
44
|
-
|
45
|
-
|
55
|
+
```bash
|
56
|
+
aptitude install openoffice.org openoffice.org-java-common
|
57
|
+
```
|
58
|
+
|
59
|
+
On Mac, download and install [http://www.openoffice.org/download/index.html](http://www.openoffice.org/download/index.html).
|
46
60
|
|
47
61
|
### Install Gem
|
48
62
|
|
49
|
-
gem 'docsplit_images', :git => 'git@github.com:jameshuynh/docsplit_images.git', tag: "v0.
|
63
|
+
gem 'docsplit_images', :git => 'git@github.com:jameshuynh/docsplit_images.git', tag: "v0.2.0"
|
50
64
|
|
51
65
|
## Setting Up
|
52
66
|
|
53
67
|
From terminal, type the command to install
|
54
68
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
69
|
+
```bash
|
70
|
+
bundle
|
71
|
+
rails g docsplit_images <table_name> <attachment_field_name>
|
72
|
+
# e.g. rails generate docsplit_images asset document
|
73
|
+
rake db:migrate
|
74
|
+
```
|
59
75
|
|
60
76
|
In your model:
|
61
77
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
78
|
+
```ruby
|
79
|
+
class Asset < ActiveRecord::Base
|
80
|
+
...
|
81
|
+
attr_accessible :mydocument
|
82
|
+
has_attached_file :mydocument
|
83
|
+
docsplit_images_conversion_for :mydocument, {size: "800x"}
|
84
|
+
...
|
85
|
+
end
|
86
|
+
```
|
69
87
|
|
70
88
|
## Processing Images
|
71
89
|
|
@@ -75,15 +93,39 @@ docsplit_images requires delayed_job to be turned on the process.
|
|
75
93
|
|
76
94
|
While it is processing using [https://github.com/collectiveidea/delayed_job](delayed_job), you can check if it is processing by accessing attribute ``is_processing_image``
|
77
95
|
|
78
|
-
|
96
|
+
```ruby
|
97
|
+
asset.is_processing_image?
|
98
|
+
```
|
99
|
+
|
100
|
+
## Total number of pages
|
101
|
+
|
102
|
+
* If your document file is not PDF, this will be non-zero after the internal conversion to PDF has been completed.
|
103
|
+
|
104
|
+
```ruby
|
105
|
+
asset.number_of_images_entry
|
106
|
+
```
|
107
|
+
|
108
|
+
## Checking the number of images which has been completed
|
109
|
+
|
110
|
+
```ruby
|
111
|
+
asset.number_of_completed_images
|
112
|
+
```
|
113
|
+
|
114
|
+
## Checking the overall conversion progress
|
115
|
+
|
116
|
+
```ruby
|
117
|
+
asset.images_conversion_progress
|
118
|
+
# => 0.45 (which is 45%)
|
119
|
+
```
|
79
120
|
|
80
121
|
## Accessing list of images using ``document_images_list``
|
81
122
|
|
82
123
|
``document_images_list`` will return a list of URL of images converting from the document
|
83
124
|
|
84
|
-
|
85
|
-
|
86
|
-
|
125
|
+
```ruby
|
126
|
+
asset.document_images_list
|
127
|
+
# => ["/system/myfile_revisions/files/000/000/019/images/SBA_Admin_workflow_1.png", "/system/myfile_revisions/files/000/000/019/images/SBA_Admin_workflow_2.png", ...]
|
128
|
+
```
|
87
129
|
|
88
130
|
Contributing to docsplit_images
|
89
131
|
-------------
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/docsplit_images.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "docsplit_images"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["jameshuynh"]
|
12
|
-
s.date = "2013-
|
12
|
+
s.date = "2013-06-01"
|
13
13
|
s.description = "Split Images for your document in one line of code"
|
14
14
|
s.email = "james@rubify.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -37,7 +37,7 @@ Gem::Specification.new do |s|
|
|
37
37
|
s.homepage = "http://github.com/jameshuynh/docsplit_images"
|
38
38
|
s.licenses = ["MIT"]
|
39
39
|
s.require_paths = ["lib"]
|
40
|
-
s.rubygems_version = "1.8.
|
40
|
+
s.rubygems_version = "1.8.25"
|
41
41
|
s.summary = "Split Images for your document"
|
42
42
|
|
43
43
|
if s.respond_to? :specification_version then
|
@@ -24,11 +24,34 @@ module DocsplitImages
|
|
24
24
|
parent_dir = File.dirname(File.dirname(self.send(self.class.docsplit_attachment_name).path))
|
25
25
|
FileUtils.rm_rf("#{parent_dir}/images")
|
26
26
|
FileUtils.mkdir("#{parent_dir}/images")
|
27
|
-
|
28
|
-
|
27
|
+
doc_path = self.send(self.class.docsplit_attachment_name).path
|
28
|
+
ext = File.extname(doc_path)
|
29
|
+
temp_pdf_path = if ext.downcase == '.pdf'
|
30
|
+
doc_path
|
31
|
+
else
|
32
|
+
tempdir = File.join(Dir.tmpdir, 'docsplit')
|
33
|
+
Docsplit.extract_pdf([doc_path], {:output => tempdir})
|
34
|
+
File.join(tempdir, File.basename(doc, ext) + '.pdf')
|
35
|
+
end
|
36
|
+
self.number_of_images_entry = Docsplit.extract_length(temp_pdf_path)
|
37
|
+
self.save(validate: false)
|
38
|
+
|
39
|
+
# Going to convert to images
|
40
|
+
Docsplit::ImageExtractor.new.extract(temp_pdf_path, self.class.docsplit_attachment_options.merge({:output => "#{parent_dir}/images"}))
|
29
41
|
@file_has_changed = false
|
30
42
|
self.is_processing_image = false
|
31
|
-
self.save(:validate => false)
|
43
|
+
self.save(:validate => false)
|
44
|
+
end
|
45
|
+
|
46
|
+
def number_of_completed_images
|
47
|
+
parent_dir = File.dirname(File.dirname(self.send(self.class.docsplit_attachment_name).path))
|
48
|
+
return Dir.entries("#{parent_dir}/images").size - 2
|
49
|
+
end
|
50
|
+
|
51
|
+
# return the progress in term of percentage
|
52
|
+
def images_conversion_progress
|
53
|
+
return ("%.2f" % (number_of_completed_images * 1.0 / self.number_of_images_entry)).to_f if self.is_pdf_convertible?
|
54
|
+
return 1
|
32
55
|
end
|
33
56
|
|
34
57
|
## paperclip overriding
|
data/lib/docsplit_images.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: docsplit_images
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-06-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: paperclip
|
@@ -162,7 +162,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
162
162
|
version: '0'
|
163
163
|
segments:
|
164
164
|
- 0
|
165
|
-
hash:
|
165
|
+
hash: -352764726966913135
|
166
166
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
167
167
|
none: false
|
168
168
|
requirements:
|
@@ -171,7 +171,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
171
171
|
version: '0'
|
172
172
|
requirements: []
|
173
173
|
rubyforge_project:
|
174
|
-
rubygems_version: 1.8.
|
174
|
+
rubygems_version: 1.8.25
|
175
175
|
signing_key:
|
176
176
|
specification_version: 3
|
177
177
|
summary: Split Images for your document
|