grim 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README.textile +25 -8
- data/grim.gemspec +2 -1
- data/lib/grim.rb +15 -75
- data/lib/grim/page.rb +50 -0
- data/lib/grim/pdf.rb +60 -0
- data/spec/lib/grim/page_spec.rb +36 -0
- data/spec/lib/grim/pdf_spec.rb +48 -0
- data/spec/lib/grim_spec.rb +12 -82
- metadata +29 -7
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 Jonathan Hoyt
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.textile
CHANGED
@@ -1,17 +1,34 @@
|
|
1
1
|
h1. Grim
|
2
2
|
|
3
|
-
Grim is a simple gem for extracting a page from a pdf and converting it to an image as well as extract the text from the page as a string. It basically gives you an easy to use api to ghostscript, imagemagick, and pdftotext specific to this use case.
|
3
|
+
Grim is a simple gem for extracting (reaping) a page from a pdf and converting it to an image as well as extract the text from the page as a string. It basically gives you an easy to use api to ghostscript, imagemagick, and pdftotext specific to this use case.
|
4
4
|
|
5
5
|
h2. Prerequisites
|
6
6
|
|
7
|
-
You will need ghostscript, imagemagick, and xpdf installed. On the Mac (OSX) I highly recommend using "Homebrew":http://mxcl.github.com/homebrew/ to get them installed
|
7
|
+
You will need ghostscript, imagemagick, and xpdf installed. On the Mac (OSX) I highly recommend using "Homebrew":http://mxcl.github.com/homebrew/ to get them installed.
|
8
|
+
|
9
|
+
<pre><code>
|
10
|
+
brew install ghostscript imagemagick xpdf
|
11
|
+
</code></pre>
|
12
|
+
|
13
|
+
h2. Installation
|
14
|
+
|
15
|
+
<pre><code>
|
16
|
+
gem install grim
|
17
|
+
</code></pre>
|
8
18
|
|
9
19
|
h2. Usage
|
10
20
|
|
11
21
|
<pre><code>
|
12
|
-
|
13
|
-
|
14
|
-
png
|
15
|
-
|
16
|
-
|
17
|
-
|
22
|
+
pdf = Grim.reap("/path/to/pdf") # returns Grim::Pdf instance for pdf
|
23
|
+
count = pdf.count # returns the number of pages in the pdf
|
24
|
+
png = pdf[3].save('/path/to/image.png') # will return true if page was saved or false if not
|
25
|
+
text = pdf[3].text # returns text as a String
|
26
|
+
|
27
|
+
pdf.each do |page|
|
28
|
+
puts page.text
|
29
|
+
end
|
30
|
+
</pre></code>
|
31
|
+
|
32
|
+
h2. License
|
33
|
+
|
34
|
+
See LICENSE for details.
|
data/grim.gemspec
CHANGED
@@ -7,11 +7,12 @@ Gem::Specification.new do |s|
|
|
7
7
|
s.version = Grim::VERSION
|
8
8
|
s.authors = ["Jonathan Hoyt"]
|
9
9
|
s.email = ["jonmagic@gmail.com"]
|
10
|
-
s.homepage = ""
|
10
|
+
s.homepage = "http://github.com/jonmagic/grim"
|
11
11
|
s.summary = %q{Extract slides and text from a PDF.}
|
12
12
|
s.description = %q{Grim is a simple gem for extracting a page from a pdf and converting it to an image as well as extract the text from the page as a string. It basically gives you an easy to use api to ghostscript, imagemagick, and pdftotext specific to this use case.}
|
13
13
|
|
14
14
|
s.rubyforge_project = "grim"
|
15
|
+
s.add_dependency 'safe_shell', '~> 1.0.0'
|
15
16
|
|
16
17
|
s.files = `git ls-files`.split("\n")
|
17
18
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
data/lib/grim.rb
CHANGED
@@ -1,17 +1,8 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
# For example:
|
5
|
-
#
|
6
|
-
# instance = Grim.new("/path/to/pdf")
|
7
|
-
# page_count = instance.page_count
|
8
|
-
# png = instance.page(1).to_png("/path/to/save/png")
|
9
|
-
# jpeg = instance.page(2).to_jpeg("/path/to/save/jpeg")
|
10
|
-
# text = instance.page(3).text
|
11
|
-
#
|
12
|
-
class Grim
|
1
|
+
require 'safe_shell'
|
2
|
+
|
3
|
+
module Grim
|
13
4
|
# VERSION
|
14
|
-
VERSION = "0.
|
5
|
+
VERSION = "0.2.0"
|
15
6
|
|
16
7
|
# Default resize output width, any positive integer
|
17
8
|
WIDTH = 1024
|
@@ -30,75 +21,24 @@ class Grim
|
|
30
21
|
class PdfNotFound < Grim::Exception
|
31
22
|
end
|
32
23
|
|
33
|
-
#
|
34
|
-
|
35
|
-
|
36
|
-
# initialize is called when a new instance is created and accepts path.
|
37
|
-
def initialize(path)
|
38
|
-
raise Grim::PdfNotFound unless File.exists?(path)
|
39
|
-
@page_number = 1
|
40
|
-
@path = path
|
24
|
+
# Exception that is raised if pdf does not have page
|
25
|
+
class PageNotFound < Grim::Exception
|
41
26
|
end
|
42
27
|
|
43
|
-
#
|
44
|
-
# to read the pdf with the pdf_info.ps script as a filter,
|
45
|
-
# returning the number of pages in the pdf as an integer.
|
46
|
-
#
|
47
|
-
# For example:
|
48
|
-
#
|
49
|
-
# instance.page_count
|
50
|
-
# => 4
|
28
|
+
# Creates and returns a new instance of Grim::Pdf
|
51
29
|
#
|
52
|
-
#
|
53
|
-
def page_count
|
54
|
-
@page_count ||= begin
|
55
|
-
`gs -dNODISPLAY -q -sFile=#{@path} ./lib/pdf_info.ps`.to_i
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
# page just sets the page attribute on the instance.
|
30
|
+
# path - a path string or object
|
60
31
|
#
|
61
32
|
# For example:
|
62
33
|
#
|
63
|
-
#
|
64
|
-
# => instance
|
34
|
+
# pdf = Grim.reap(/path/to/pdf)
|
65
35
|
#
|
66
|
-
# Returns
|
67
|
-
def page(number)
|
68
|
-
@page_number = number
|
69
|
-
self
|
70
|
-
end
|
71
|
-
|
72
|
-
# Returns page_number minus 1
|
73
|
-
def index
|
74
|
-
@page_number - 1
|
75
|
-
end
|
76
|
-
|
77
|
-
# to_image extracts the selected page and turns it into an image.
|
78
|
-
# Tested on png and jpeg.
|
79
|
-
#
|
80
|
-
# For example:
|
36
|
+
# Returns an instance of Grim::Pdf
|
81
37
|
#
|
82
|
-
|
83
|
-
|
84
|
-
#
|
85
|
-
# Returns an instance of File
|
86
|
-
def to_image(path)
|
87
|
-
`convert -resize #{Grim::WIDTH} -antialias -render -quality #{Grim::QUALITY} -colorspace RGB -interlace none -density #{Grim::DENSITY} #{@path}[#{index}] #{path}`
|
88
|
-
file = File.open(path)
|
89
|
-
file.rewind
|
90
|
-
file
|
38
|
+
def self.reap(path)
|
39
|
+
Grim::Pdf.new(path)
|
91
40
|
end
|
41
|
+
end
|
92
42
|
|
93
|
-
|
94
|
-
|
95
|
-
# For example:
|
96
|
-
#
|
97
|
-
# instance.page(2).text
|
98
|
-
# => "This is text from slide 2.\n\nAnd even more text from slide 2."
|
99
|
-
#
|
100
|
-
# Returns a string
|
101
|
-
def text
|
102
|
-
`pdftotext -enc UTF-8 -f #{@page_number} -l #{@page_number} #{@path} -`
|
103
|
-
end
|
104
|
-
end
|
43
|
+
require 'grim/pdf'
|
44
|
+
require 'grim/page'
|
data/lib/grim/page.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
module Grim
|
2
|
+
class Page
|
3
|
+
|
4
|
+
attr_reader :number
|
5
|
+
|
6
|
+
# Sets up some instance variables on new instance.
|
7
|
+
#
|
8
|
+
# pdf - the pdf this page belongs to
|
9
|
+
# index - the index of the page in the array of pages
|
10
|
+
#
|
11
|
+
def initialize(pdf, index)
|
12
|
+
@pdf = pdf
|
13
|
+
@index = index
|
14
|
+
@number = index + 1
|
15
|
+
end
|
16
|
+
|
17
|
+
# Extracts the selected page and turns it into an image.
|
18
|
+
# Tested on png and jpeg.
|
19
|
+
#
|
20
|
+
# path - String of the path to save to
|
21
|
+
#
|
22
|
+
# For example:
|
23
|
+
#
|
24
|
+
# pdf[1].save(/path/to/save/image.png)
|
25
|
+
# # => true
|
26
|
+
#
|
27
|
+
# Returns a File.
|
28
|
+
#
|
29
|
+
def save(path)
|
30
|
+
SafeShell.execute("convert", "-resize", Grim::WIDTH, "-antialias", "-render",
|
31
|
+
"-quality", Grim::QUALITY, "-colorspace", "RGB",
|
32
|
+
"-interlace", "none", "-density", Grim::DENSITY,
|
33
|
+
"#{@pdf.path}[#{@index}]", path)
|
34
|
+
File.exists?(path)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Extracts the text from the selected page.
|
38
|
+
#
|
39
|
+
# For example:
|
40
|
+
#
|
41
|
+
# pdf[1].text
|
42
|
+
# # => "This is text from slide 2.\n\nAnd even more text from slide 2."
|
43
|
+
#
|
44
|
+
# Returns a String.
|
45
|
+
#
|
46
|
+
def text
|
47
|
+
SafeShell.execute("pdftotext", "-enc", "UTF-8", "-f", @number, "-l", @number, @pdf.path, "-")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/lib/grim/pdf.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
module Grim
|
2
|
+
class Pdf
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
attr_reader :path
|
6
|
+
|
7
|
+
# ghostscript prints out a warning, this regex matches it
|
8
|
+
WarningRegex = /\*\*\*\*.*\n/
|
9
|
+
|
10
|
+
# Raises an error if pdf not found and sets some instance
|
11
|
+
# variables if pdf is found.
|
12
|
+
#
|
13
|
+
# path - A String or Path to the pdf
|
14
|
+
#
|
15
|
+
def initialize(path)
|
16
|
+
raise Grim::PdfNotFound unless File.exists?(path)
|
17
|
+
@path = path
|
18
|
+
end
|
19
|
+
|
20
|
+
# Shells out to ghostscript to read the pdf with the pdf_info.ps script
|
21
|
+
# as a filter, returning the number of pages in the pdf as an integer.
|
22
|
+
#
|
23
|
+
# For example:
|
24
|
+
#
|
25
|
+
# pdf.count
|
26
|
+
# # => 4
|
27
|
+
#
|
28
|
+
# Returns an Integer.
|
29
|
+
#
|
30
|
+
def count
|
31
|
+
@count ||= begin
|
32
|
+
result = SafeShell.execute("gs", "-dNODISPLAY", "-q", "-sFile=#{@path}", "./lib/pdf_info.ps")
|
33
|
+
|
34
|
+
result.gsub(WarningRegex, '').to_i
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Creates an instance Grim::Page for the index passed in.
|
39
|
+
#
|
40
|
+
# index - accepts Integer for position in array
|
41
|
+
#
|
42
|
+
# For example:
|
43
|
+
#
|
44
|
+
# pdf[4] # returns 5th page
|
45
|
+
#
|
46
|
+
# Returns an instance of Grim::Page.
|
47
|
+
#
|
48
|
+
def [](index)
|
49
|
+
raise Grim::PageNotFound unless index >= 0 && index < count
|
50
|
+
Grim::Page.new(self, index)
|
51
|
+
end
|
52
|
+
|
53
|
+
def each
|
54
|
+
(0..(count-1)).each do |index|
|
55
|
+
yield Grim::Page.new(self, index)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Grim::Page do
|
5
|
+
after(:all) do
|
6
|
+
FileUtils.rm_rf(tmp_dir)
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should have number" do
|
10
|
+
Grim::Page.new(Grim::Pdf.new(fixture_path("smoker.pdf")), 1).number.should == 2
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "#save" do
|
14
|
+
before(:all) do
|
15
|
+
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
|
16
|
+
pdf[0].save(tmp_path("to_png_spec.png"))
|
17
|
+
@file = File.open(tmp_path("to_png_spec.png"))
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should create the file" do
|
21
|
+
File.exist?(tmp_path("to_png_spec.png")).should be_true
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should have the right file size" do
|
25
|
+
@file.stat.size.should == 188515
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
describe "#text" do
|
31
|
+
it "should return the text from the selected page" do
|
32
|
+
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
|
33
|
+
pdf[1].text.should == "Step 1: get someone to print this curve for you to scale, 72\342\200\235 wide\n\nStep 2: Get a couple 55 gallon drums\n\n\f"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Grim::Pdf do
|
4
|
+
|
5
|
+
it "should have a path" do
|
6
|
+
Grim::Pdf.new(fixture_path("smoker.pdf")).path.should == fixture_path("smoker.pdf")
|
7
|
+
end
|
8
|
+
|
9
|
+
describe "#initialize" do
|
10
|
+
it "should raise an error if pdf does not exist" do
|
11
|
+
lambda { Grim::Pdf.new(fixture_path("booboo.pdf")) }.should raise_error(Grim::PdfNotFound)
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should set path on pdf" do
|
15
|
+
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
|
16
|
+
pdf.path.should == fixture_path("smoker.pdf")
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "#count" do
|
21
|
+
it "should return 25" do
|
22
|
+
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
|
23
|
+
pdf.count.should == 25
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe "#[]" do
|
28
|
+
before(:each) do
|
29
|
+
@pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should raise Grim::PageDoesNotExist if page doesn't exist" do
|
33
|
+
lambda { @pdf[25] }.should raise_error(Grim::PageNotFound)
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should return an instance of Grim::Page if page exists" do
|
37
|
+
@pdf[24].class.should == Grim::Page
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe "#each" do
|
42
|
+
it "should be iterable" do
|
43
|
+
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
|
44
|
+
pdf.map {|p| p.number }.should == (1..25).to_a
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
data/spec/lib/grim_spec.rb
CHANGED
@@ -1,95 +1,25 @@
|
|
1
|
-
require 'fileutils'
|
2
1
|
require 'spec_helper'
|
3
2
|
|
4
3
|
describe Grim do
|
5
|
-
after(:all) do
|
6
|
-
FileUtils.rm_rf(tmp_dir)
|
7
|
-
end
|
8
|
-
|
9
4
|
it "should have a VERSION constant" do
|
10
5
|
Grim.const_defined?('VERSION').should be_true
|
11
6
|
end
|
12
7
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
lambda { Grim.new(fixture_path("booboo.pdf")) }.should raise_error(Grim::PdfNotFound)
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
describe "#page_count" do
|
21
|
-
it "should return an integer" do
|
22
|
-
instance = Grim.new(fixture_path("smoker.pdf"))
|
23
|
-
instance.page_count.should == 25
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
describe "#page" do
|
28
|
-
it "should be set to 1 by default" do
|
29
|
-
instance = Grim.new(fixture_path("smoker.pdf"))
|
30
|
-
instance.page_number.should == 1
|
31
|
-
end
|
32
|
-
|
33
|
-
it "should set page attribute and return instance" do
|
34
|
-
instance = Grim.new(fixture_path("smoker.pdf"))
|
35
|
-
instance.page(2).should == instance
|
36
|
-
instance.page_number.should == 2
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
describe "#index" do
|
41
|
-
it "should return page minus 1" do
|
42
|
-
instance = Grim.new(fixture_path("smoker.pdf"))
|
43
|
-
instance.page(2)
|
44
|
-
instance.index.should == 1
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
describe "#to_image" do
|
49
|
-
describe "output png" do
|
50
|
-
before(:all) do
|
51
|
-
instance = Grim.new(fixture_path("smoker.pdf"))
|
52
|
-
@png = instance.to_image(tmp_path("to_png_spec.png"))
|
53
|
-
end
|
54
|
-
|
55
|
-
it "should create the file" do
|
56
|
-
File.exist?(tmp_path("to_png_spec.png")).should be_true
|
57
|
-
end
|
58
|
-
|
59
|
-
it "should return an instance of File" do
|
60
|
-
@png.class.should == File
|
61
|
-
end
|
62
|
-
|
63
|
-
it "should have the right file size" do
|
64
|
-
@png.stat.size.should == 188515
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
describe "output jpeg" do
|
69
|
-
before(:all) do
|
70
|
-
instance = Grim.new(fixture_path("smoker.pdf"))
|
71
|
-
@jpeg = instance.to_image(tmp_path("to_jpeg_spec.jpeg"))
|
72
|
-
end
|
73
|
-
|
74
|
-
it "should create the file" do
|
75
|
-
File.exist?(tmp_path("to_jpeg_spec.jpeg")).should be_true
|
76
|
-
end
|
8
|
+
it "should have WIDTH constant set to 1024" do
|
9
|
+
Grim::WIDTH.should == 1024
|
10
|
+
end
|
77
11
|
|
78
|
-
|
79
|
-
|
80
|
-
|
12
|
+
it "should have QUALITY constant set to 90" do
|
13
|
+
Grim::QUALITY.should == 90
|
14
|
+
end
|
81
15
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
end
|
86
|
-
end
|
16
|
+
it "should have DENSITY constant set to 300" do
|
17
|
+
Grim::DENSITY.should == 300
|
18
|
+
end
|
87
19
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
instance.page(2).text.should == "Step 1: get someone to print this curve for you to scale, 72\342\200\235 wide\n\nStep 2: Get a couple 55 gallon drums\n\n\f"
|
92
|
-
end
|
20
|
+
describe "#new" do
|
21
|
+
it "should return an instance of Grim::Pdf" do
|
22
|
+
Grim.reap(fixture_path("smoker.pdf")).class.should == Grim::Pdf
|
93
23
|
end
|
94
24
|
end
|
95
25
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grim
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 2
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jonathan Hoyt
|
@@ -15,10 +15,25 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-09-
|
18
|
+
date: 2011-09-06 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
|
-
dependencies:
|
21
|
-
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: safe_shell
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 23
|
30
|
+
segments:
|
31
|
+
- 1
|
32
|
+
- 0
|
33
|
+
- 0
|
34
|
+
version: 1.0.0
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id001
|
22
37
|
description: Grim is a simple gem for extracting a page from a pdf and converting it to an image as well as extract the text from the page as a string. It basically gives you an easy to use api to ghostscript, imagemagick, and pdftotext specific to this use case.
|
23
38
|
email:
|
24
39
|
- jonmagic@gmail.com
|
@@ -31,16 +46,21 @@ extra_rdoc_files: []
|
|
31
46
|
files:
|
32
47
|
- .gitignore
|
33
48
|
- Gemfile
|
49
|
+
- LICENSE
|
34
50
|
- README.textile
|
35
51
|
- Rakefile
|
36
52
|
- grim.gemspec
|
37
53
|
- lib/grim.rb
|
54
|
+
- lib/grim/page.rb
|
55
|
+
- lib/grim/pdf.rb
|
38
56
|
- lib/pdf_info.ps
|
39
57
|
- spec/fixtures/smoker.pdf
|
58
|
+
- spec/lib/grim/page_spec.rb
|
59
|
+
- spec/lib/grim/pdf_spec.rb
|
40
60
|
- spec/lib/grim_spec.rb
|
41
61
|
- spec/spec_helper.rb
|
42
62
|
has_rdoc: true
|
43
|
-
homepage:
|
63
|
+
homepage: http://github.com/jonmagic/grim
|
44
64
|
licenses: []
|
45
65
|
|
46
66
|
post_install_message:
|
@@ -75,5 +95,7 @@ specification_version: 3
|
|
75
95
|
summary: Extract slides and text from a PDF.
|
76
96
|
test_files:
|
77
97
|
- spec/fixtures/smoker.pdf
|
98
|
+
- spec/lib/grim/page_spec.rb
|
99
|
+
- spec/lib/grim/pdf_spec.rb
|
78
100
|
- spec/lib/grim_spec.rb
|
79
101
|
- spec/spec_helper.rb
|