docsplit_images 0.1.9 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +68 -26
- data/VERSION +1 -1
- data/docsplit_images.gemspec +3 -3
- data/lib/docsplit_images/conversion.rb +26 -3
- data/lib/docsplit_images.rb +1 -0
- metadata +4 -4
data/README.markdown
CHANGED
@@ -10,62 +10,80 @@ Docsplit images is used to convert a document file (pdf, xls, xlsx, ppt, pptx, d
|
|
10
10
|
|
11
11
|
#### 1. Install GraphicsMagick. Its ‘gm’ command is used to generate images. Either compile it from source, or use a package manager:
|
12
12
|
|
13
|
-
|
14
|
-
|
13
|
+
```bash
|
14
|
+
[aptitude | port | brew] install graphicsmagick
|
15
|
+
```
|
16
|
+
|
15
17
|
#### 2. Install Poppler. On Linux, use aptitude, apt-get or yum:
|
16
18
|
|
17
|
-
|
19
|
+
```bash
|
20
|
+
aptitude install poppler-utils poppler-data
|
21
|
+
```
|
18
22
|
|
19
23
|
On Mac, you can install from source or use MacPorts:
|
20
24
|
|
21
|
-
|
25
|
+
```bash
|
26
|
+
sudo port install poppler | brew install poppler
|
27
|
+
```
|
22
28
|
|
23
29
|
#### 3. (Optional) Install Ghostscript:
|
24
30
|
|
25
|
-
|
31
|
+
```bash
|
32
|
+
[aptitude | port | brew] install ghostscript
|
33
|
+
```
|
26
34
|
|
27
35
|
Ghostscript is required to convert PDF and Postscript files.
|
28
36
|
|
29
37
|
#### 4. (Optional) Install Tesseract:
|
30
38
|
|
31
|
-
|
39
|
+
```bash
|
40
|
+
[aptitude | port | brew] install [tesseract | tesseract-ocr]
|
41
|
+
```
|
32
42
|
|
33
43
|
Without Tesseract installed, you'll still be able to extract text from documents, but you won't be able to automatically OCR them.
|
34
44
|
|
35
45
|
#### 5. (Optional) Install pdftk. On Linux, use aptitude, apt-get or yum:
|
36
46
|
|
37
|
-
|
47
|
+
```bash
|
48
|
+
aptitude install pdftk
|
49
|
+
```
|
38
50
|
|
39
51
|
On the Mac, you can download a [http://www.pdflabs.com/docs/install-pdftk/](recent installer for the binary). Without pdftk installed, you can use Docsplit, but won't be able to split apart a multi-page PDF into single-page PDFs.
|
40
52
|
|
41
53
|
#### 6. (Optional) Install OpenOffice. On Linux, use aptitude, apt-get or yum:
|
42
54
|
|
43
|
-
|
44
|
-
|
45
|
-
|
55
|
+
```bash
|
56
|
+
aptitude install openoffice.org openoffice.org-java-common
|
57
|
+
```
|
58
|
+
|
59
|
+
On Mac, download and install [http://www.openoffice.org/download/index.html](http://www.openoffice.org/download/index.html).
|
46
60
|
|
47
61
|
### Install Gem
|
48
62
|
|
49
|
-
gem 'docsplit_images', :git => 'git@github.com:jameshuynh/docsplit_images.git', tag: "v0.
|
63
|
+
gem 'docsplit_images', :git => 'git@github.com:jameshuynh/docsplit_images.git', tag: "v0.2.0"
|
50
64
|
|
51
65
|
## Setting Up
|
52
66
|
|
53
67
|
From terminal, type the command to install
|
54
68
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
69
|
+
```bash
|
70
|
+
bundle
|
71
|
+
rails g docsplit_images <table_name> <attachment_field_name>
|
72
|
+
# e.g. rails generate docsplit_images asset document
|
73
|
+
rake db:migrate
|
74
|
+
```
|
59
75
|
|
60
76
|
In your model:
|
61
77
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
78
|
+
```ruby
|
79
|
+
class Asset < ActiveRecord::Base
|
80
|
+
...
|
81
|
+
attr_accessible :mydocument
|
82
|
+
has_attached_file :mydocument
|
83
|
+
docsplit_images_conversion_for :mydocument, {size: "800x"}
|
84
|
+
...
|
85
|
+
end
|
86
|
+
```
|
69
87
|
|
70
88
|
## Processing Images
|
71
89
|
|
@@ -75,15 +93,39 @@ docsplit_images requires delayed_job to be turned on the process.
|
|
75
93
|
|
76
94
|
While it is processing using [https://github.com/collectiveidea/delayed_job](delayed_job), you can check if it is processing by accessing attribute ``is_processing_image``
|
77
95
|
|
78
|
-
|
96
|
+
```ruby
|
97
|
+
asset.is_processing_image?
|
98
|
+
```
|
99
|
+
|
100
|
+
## Total number of pages
|
101
|
+
|
102
|
+
* If your document file is not PDF, this will be non-zero after the internal conversion to PDF has been completed.
|
103
|
+
|
104
|
+
```ruby
|
105
|
+
asset.number_of_images_entry
|
106
|
+
```
|
107
|
+
|
108
|
+
## Checking the number of images which has been completed
|
109
|
+
|
110
|
+
```ruby
|
111
|
+
asset.number_of_completed_images
|
112
|
+
```
|
113
|
+
|
114
|
+
## Checking the overall conversion progress
|
115
|
+
|
116
|
+
```ruby
|
117
|
+
asset.images_conversion_progress
|
118
|
+
# => 0.45 (which is 45%)
|
119
|
+
```
|
79
120
|
|
80
121
|
## Accessing list of images using ``document_images_list``
|
81
122
|
|
82
123
|
``document_images_list`` will return a list of URL of images converting from the document
|
83
124
|
|
84
|
-
|
85
|
-
|
86
|
-
|
125
|
+
```ruby
|
126
|
+
asset.document_images_list
|
127
|
+
# => ["/system/myfile_revisions/files/000/000/019/images/SBA_Admin_workflow_1.png", "/system/myfile_revisions/files/000/000/019/images/SBA_Admin_workflow_2.png", ...]
|
128
|
+
```
|
87
129
|
|
88
130
|
Contributing to docsplit_images
|
89
131
|
-------------
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/docsplit_images.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "docsplit_images"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["jameshuynh"]
|
12
|
-
s.date = "2013-
|
12
|
+
s.date = "2013-06-01"
|
13
13
|
s.description = "Split Images for your document in one line of code"
|
14
14
|
s.email = "james@rubify.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -37,7 +37,7 @@ Gem::Specification.new do |s|
|
|
37
37
|
s.homepage = "http://github.com/jameshuynh/docsplit_images"
|
38
38
|
s.licenses = ["MIT"]
|
39
39
|
s.require_paths = ["lib"]
|
40
|
-
s.rubygems_version = "1.8.
|
40
|
+
s.rubygems_version = "1.8.25"
|
41
41
|
s.summary = "Split Images for your document"
|
42
42
|
|
43
43
|
if s.respond_to? :specification_version then
|
@@ -24,11 +24,34 @@ module DocsplitImages
|
|
24
24
|
parent_dir = File.dirname(File.dirname(self.send(self.class.docsplit_attachment_name).path))
|
25
25
|
FileUtils.rm_rf("#{parent_dir}/images")
|
26
26
|
FileUtils.mkdir("#{parent_dir}/images")
|
27
|
-
|
28
|
-
|
27
|
+
doc_path = self.send(self.class.docsplit_attachment_name).path
|
28
|
+
ext = File.extname(doc_path)
|
29
|
+
temp_pdf_path = if ext.downcase == '.pdf'
|
30
|
+
doc_path
|
31
|
+
else
|
32
|
+
tempdir = File.join(Dir.tmpdir, 'docsplit')
|
33
|
+
Docsplit.extract_pdf([doc_path], {:output => tempdir})
|
34
|
+
File.join(tempdir, File.basename(doc, ext) + '.pdf')
|
35
|
+
end
|
36
|
+
self.number_of_images_entry = Docsplit.extract_length(temp_pdf_path)
|
37
|
+
self.save(validate: false)
|
38
|
+
|
39
|
+
# Going to convert to images
|
40
|
+
Docsplit::ImageExtractor.new.extract(temp_pdf_path, self.class.docsplit_attachment_options.merge({:output => "#{parent_dir}/images"}))
|
29
41
|
@file_has_changed = false
|
30
42
|
self.is_processing_image = false
|
31
|
-
self.save(:validate => false)
|
43
|
+
self.save(:validate => false)
|
44
|
+
end
|
45
|
+
|
46
|
+
def number_of_completed_images
|
47
|
+
parent_dir = File.dirname(File.dirname(self.send(self.class.docsplit_attachment_name).path))
|
48
|
+
return Dir.entries("#{parent_dir}/images").size - 2
|
49
|
+
end
|
50
|
+
|
51
|
+
# return the progress in term of percentage
|
52
|
+
def images_conversion_progress
|
53
|
+
return ("%.2f" % (number_of_completed_images * 1.0 / self.number_of_images_entry)).to_f if self.is_pdf_convertible?
|
54
|
+
return 1
|
32
55
|
end
|
33
56
|
|
34
57
|
## paperclip overriding
|
data/lib/docsplit_images.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: docsplit_images
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-06-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: paperclip
|
@@ -162,7 +162,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
162
162
|
version: '0'
|
163
163
|
segments:
|
164
164
|
- 0
|
165
|
-
hash:
|
165
|
+
hash: -352764726966913135
|
166
166
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
167
167
|
none: false
|
168
168
|
requirements:
|
@@ -171,7 +171,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
171
171
|
version: '0'
|
172
172
|
requirements: []
|
173
173
|
rubyforge_project:
|
174
|
-
rubygems_version: 1.8.
|
174
|
+
rubygems_version: 1.8.25
|
175
175
|
signing_key:
|
176
176
|
specification_version: 3
|
177
177
|
summary: Split Images for your document
|