grim 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README.textile +25 -8
- data/grim.gemspec +2 -1
- data/lib/grim.rb +15 -75
- data/lib/grim/page.rb +50 -0
- data/lib/grim/pdf.rb +60 -0
- data/spec/lib/grim/page_spec.rb +36 -0
- data/spec/lib/grim/pdf_spec.rb +48 -0
- data/spec/lib/grim_spec.rb +12 -82
- metadata +29 -7
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 Jonathan Hoyt
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.textile
CHANGED
@@ -1,17 +1,34 @@
|
|
1
1
|
h1. Grim
|
2
2
|
|
3
|
-
Grim is a simple gem for extracting a page from a pdf and converting it to an image as well as extract the text from the page as a string. It basically gives you an easy to use api to ghostscript, imagemagick, and pdftotext specific to this use case.
|
3
|
+
Grim is a simple gem for extracting (reaping) a page from a pdf and converting it to an image as well as extract the text from the page as a string. It basically gives you an easy to use api to ghostscript, imagemagick, and pdftotext specific to this use case.
|
4
4
|
|
5
5
|
h2. Prerequisites
|
6
6
|
|
7
|
-
You will need ghostscript, imagemagick, and xpdf installed. On the Mac (OSX) I highly recommend using "Homebrew":http://mxcl.github.com/homebrew/ to get them installed
|
7
|
+
You will need ghostscript, imagemagick, and xpdf installed. On the Mac (OSX) I highly recommend using "Homebrew":http://mxcl.github.com/homebrew/ to get them installed.
|
8
|
+
|
9
|
+
<pre><code>
|
10
|
+
brew install ghostscript imagemagick xpdf
|
11
|
+
</code></pre>
|
12
|
+
|
13
|
+
h2. Installation
|
14
|
+
|
15
|
+
<pre><code>
|
16
|
+
gem install grim
|
17
|
+
</code></pre>
|
8
18
|
|
9
19
|
h2. Usage
|
10
20
|
|
11
21
|
<pre><code>
|
12
|
-
|
13
|
-
|
14
|
-
png
|
15
|
-
|
16
|
-
|
17
|
-
|
22
|
+
pdf = Grim.reap("/path/to/pdf") # returns Grim::Pdf instance for pdf
|
23
|
+
count = pdf.count # returns the number of pages in the pdf
|
24
|
+
png = pdf[3].save('/path/to/image.png') # will return true if page was saved or false if not
|
25
|
+
text = pdf[3].text # returns text as a String
|
26
|
+
|
27
|
+
pdf.each do |page|
|
28
|
+
puts page.text
|
29
|
+
end
|
30
|
+
</pre></code>
|
31
|
+
|
32
|
+
h2. License
|
33
|
+
|
34
|
+
See LICENSE for details.
|
data/grim.gemspec
CHANGED
@@ -7,11 +7,12 @@ Gem::Specification.new do |s|
|
|
7
7
|
s.version = Grim::VERSION
|
8
8
|
s.authors = ["Jonathan Hoyt"]
|
9
9
|
s.email = ["jonmagic@gmail.com"]
|
10
|
-
s.homepage = ""
|
10
|
+
s.homepage = "http://github.com/jonmagic/grim"
|
11
11
|
s.summary = %q{Extract slides and text from a PDF.}
|
12
12
|
s.description = %q{Grim is a simple gem for extracting a page from a pdf and converting it to an image as well as extract the text from the page as a string. It basically gives you an easy to use api to ghostscript, imagemagick, and pdftotext specific to this use case.}
|
13
13
|
|
14
14
|
s.rubyforge_project = "grim"
|
15
|
+
s.add_dependency 'safe_shell', '~> 1.0.0'
|
15
16
|
|
16
17
|
s.files = `git ls-files`.split("\n")
|
17
18
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
data/lib/grim.rb
CHANGED
@@ -1,17 +1,8 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
# For example:
|
5
|
-
#
|
6
|
-
# instance = Grim.new("/path/to/pdf")
|
7
|
-
# page_count = instance.page_count
|
8
|
-
# png = instance.page(1).to_png("/path/to/save/png")
|
9
|
-
# jpeg = instance.page(2).to_jpeg("/path/to/save/jpeg")
|
10
|
-
# text = instance.page(3).text
|
11
|
-
#
|
12
|
-
class Grim
|
1
|
+
require 'safe_shell'
|
2
|
+
|
3
|
+
module Grim
|
13
4
|
# VERSION
|
14
|
-
VERSION = "0.
|
5
|
+
VERSION = "0.2.0"
|
15
6
|
|
16
7
|
# Default resize output width, any positive integer
|
17
8
|
WIDTH = 1024
|
@@ -30,75 +21,24 @@ class Grim
|
|
30
21
|
class PdfNotFound < Grim::Exception
|
31
22
|
end
|
32
23
|
|
33
|
-
#
|
34
|
-
|
35
|
-
|
36
|
-
# initialize is called when a new instance is created and accepts path.
|
37
|
-
def initialize(path)
|
38
|
-
raise Grim::PdfNotFound unless File.exists?(path)
|
39
|
-
@page_number = 1
|
40
|
-
@path = path
|
24
|
+
# Exception that is raised if pdf does not have page
|
25
|
+
class PageNotFound < Grim::Exception
|
41
26
|
end
|
42
27
|
|
43
|
-
#
|
44
|
-
# to read the pdf with the pdf_info.ps script as a filter,
|
45
|
-
# returning the number of pages in the pdf as an integer.
|
46
|
-
#
|
47
|
-
# For example:
|
48
|
-
#
|
49
|
-
# instance.page_count
|
50
|
-
# => 4
|
28
|
+
# Creates and returns a new instance of Grim::Pdf
|
51
29
|
#
|
52
|
-
#
|
53
|
-
def page_count
|
54
|
-
@page_count ||= begin
|
55
|
-
`gs -dNODISPLAY -q -sFile=#{@path} ./lib/pdf_info.ps`.to_i
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
# page just sets the page attribute on the instance.
|
30
|
+
# path - a path string or object
|
60
31
|
#
|
61
32
|
# For example:
|
62
33
|
#
|
63
|
-
#
|
64
|
-
# => instance
|
34
|
+
# pdf = Grim.reap(/path/to/pdf)
|
65
35
|
#
|
66
|
-
# Returns
|
67
|
-
def page(number)
|
68
|
-
@page_number = number
|
69
|
-
self
|
70
|
-
end
|
71
|
-
|
72
|
-
# Returns page_number minus 1
|
73
|
-
def index
|
74
|
-
@page_number - 1
|
75
|
-
end
|
76
|
-
|
77
|
-
# to_image extracts the selected page and turns it into an image.
|
78
|
-
# Tested on png and jpeg.
|
79
|
-
#
|
80
|
-
# For example:
|
36
|
+
# Returns an instance of Grim::Pdf
|
81
37
|
#
|
82
|
-
|
83
|
-
|
84
|
-
#
|
85
|
-
# Returns an instance of File
|
86
|
-
def to_image(path)
|
87
|
-
`convert -resize #{Grim::WIDTH} -antialias -render -quality #{Grim::QUALITY} -colorspace RGB -interlace none -density #{Grim::DENSITY} #{@path}[#{index}] #{path}`
|
88
|
-
file = File.open(path)
|
89
|
-
file.rewind
|
90
|
-
file
|
38
|
+
def self.reap(path)
|
39
|
+
Grim::Pdf.new(path)
|
91
40
|
end
|
41
|
+
end
|
92
42
|
|
93
|
-
|
94
|
-
|
95
|
-
# For example:
|
96
|
-
#
|
97
|
-
# instance.page(2).text
|
98
|
-
# => "This is text from slide 2.\n\nAnd even more text from slide 2."
|
99
|
-
#
|
100
|
-
# Returns a string
|
101
|
-
def text
|
102
|
-
`pdftotext -enc UTF-8 -f #{@page_number} -l #{@page_number} #{@path} -`
|
103
|
-
end
|
104
|
-
end
|
43
|
+
require 'grim/pdf'
|
44
|
+
require 'grim/page'
|
data/lib/grim/page.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
module Grim
|
2
|
+
class Page
|
3
|
+
|
4
|
+
attr_reader :number
|
5
|
+
|
6
|
+
# Sets up some instance variables on new instance.
|
7
|
+
#
|
8
|
+
# pdf - the pdf this page belongs to
|
9
|
+
# index - the index of the page in the array of pages
|
10
|
+
#
|
11
|
+
def initialize(pdf, index)
|
12
|
+
@pdf = pdf
|
13
|
+
@index = index
|
14
|
+
@number = index + 1
|
15
|
+
end
|
16
|
+
|
17
|
+
# Extracts the selected page and turns it into an image.
|
18
|
+
# Tested on png and jpeg.
|
19
|
+
#
|
20
|
+
# path - String of the path to save to
|
21
|
+
#
|
22
|
+
# For example:
|
23
|
+
#
|
24
|
+
# pdf[1].save(/path/to/save/image.png)
|
25
|
+
# # => true
|
26
|
+
#
|
27
|
+
# Returns a File.
|
28
|
+
#
|
29
|
+
def save(path)
|
30
|
+
SafeShell.execute("convert", "-resize", Grim::WIDTH, "-antialias", "-render",
|
31
|
+
"-quality", Grim::QUALITY, "-colorspace", "RGB",
|
32
|
+
"-interlace", "none", "-density", Grim::DENSITY,
|
33
|
+
"#{@pdf.path}[#{@index}]", path)
|
34
|
+
File.exists?(path)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Extracts the text from the selected page.
|
38
|
+
#
|
39
|
+
# For example:
|
40
|
+
#
|
41
|
+
# pdf[1].text
|
42
|
+
# # => "This is text from slide 2.\n\nAnd even more text from slide 2."
|
43
|
+
#
|
44
|
+
# Returns a String.
|
45
|
+
#
|
46
|
+
def text
|
47
|
+
SafeShell.execute("pdftotext", "-enc", "UTF-8", "-f", @number, "-l", @number, @pdf.path, "-")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/lib/grim/pdf.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
module Grim
|
2
|
+
class Pdf
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
attr_reader :path
|
6
|
+
|
7
|
+
# ghostscript prints out a warning, this regex matches it
|
8
|
+
WarningRegex = /\*\*\*\*.*\n/
|
9
|
+
|
10
|
+
# Raises an error if pdf not found and sets some instance
|
11
|
+
# variables if pdf is found.
|
12
|
+
#
|
13
|
+
# path - A String or Path to the pdf
|
14
|
+
#
|
15
|
+
def initialize(path)
|
16
|
+
raise Grim::PdfNotFound unless File.exists?(path)
|
17
|
+
@path = path
|
18
|
+
end
|
19
|
+
|
20
|
+
# Shells out to ghostscript to read the pdf with the pdf_info.ps script
|
21
|
+
# as a filter, returning the number of pages in the pdf as an integer.
|
22
|
+
#
|
23
|
+
# For example:
|
24
|
+
#
|
25
|
+
# pdf.count
|
26
|
+
# # => 4
|
27
|
+
#
|
28
|
+
# Returns an Integer.
|
29
|
+
#
|
30
|
+
def count
|
31
|
+
@count ||= begin
|
32
|
+
result = SafeShell.execute("gs", "-dNODISPLAY", "-q", "-sFile=#{@path}", "./lib/pdf_info.ps")
|
33
|
+
|
34
|
+
result.gsub(WarningRegex, '').to_i
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Creates an instance Grim::Page for the index passed in.
|
39
|
+
#
|
40
|
+
# index - accepts Integer for position in array
|
41
|
+
#
|
42
|
+
# For example:
|
43
|
+
#
|
44
|
+
# pdf[4] # returns 5th page
|
45
|
+
#
|
46
|
+
# Returns an instance of Grim::Page.
|
47
|
+
#
|
48
|
+
def [](index)
|
49
|
+
raise Grim::PageNotFound unless index >= 0 && index < count
|
50
|
+
Grim::Page.new(self, index)
|
51
|
+
end
|
52
|
+
|
53
|
+
def each
|
54
|
+
(0..(count-1)).each do |index|
|
55
|
+
yield Grim::Page.new(self, index)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Grim::Page do
|
5
|
+
after(:all) do
|
6
|
+
FileUtils.rm_rf(tmp_dir)
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should have number" do
|
10
|
+
Grim::Page.new(Grim::Pdf.new(fixture_path("smoker.pdf")), 1).number.should == 2
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "#save" do
|
14
|
+
before(:all) do
|
15
|
+
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
|
16
|
+
pdf[0].save(tmp_path("to_png_spec.png"))
|
17
|
+
@file = File.open(tmp_path("to_png_spec.png"))
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should create the file" do
|
21
|
+
File.exist?(tmp_path("to_png_spec.png")).should be_true
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should have the right file size" do
|
25
|
+
@file.stat.size.should == 188515
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
describe "#text" do
|
31
|
+
it "should return the text from the selected page" do
|
32
|
+
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
|
33
|
+
pdf[1].text.should == "Step 1: get someone to print this curve for you to scale, 72\342\200\235 wide\n\nStep 2: Get a couple 55 gallon drums\n\n\f"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Grim::Pdf do
|
4
|
+
|
5
|
+
it "should have a path" do
|
6
|
+
Grim::Pdf.new(fixture_path("smoker.pdf")).path.should == fixture_path("smoker.pdf")
|
7
|
+
end
|
8
|
+
|
9
|
+
describe "#initialize" do
|
10
|
+
it "should raise an error if pdf does not exist" do
|
11
|
+
lambda { Grim::Pdf.new(fixture_path("booboo.pdf")) }.should raise_error(Grim::PdfNotFound)
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should set path on pdf" do
|
15
|
+
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
|
16
|
+
pdf.path.should == fixture_path("smoker.pdf")
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "#count" do
|
21
|
+
it "should return 25" do
|
22
|
+
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
|
23
|
+
pdf.count.should == 25
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe "#[]" do
|
28
|
+
before(:each) do
|
29
|
+
@pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should raise Grim::PageDoesNotExist if page doesn't exist" do
|
33
|
+
lambda { @pdf[25] }.should raise_error(Grim::PageNotFound)
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should return an instance of Grim::Page if page exists" do
|
37
|
+
@pdf[24].class.should == Grim::Page
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe "#each" do
|
42
|
+
it "should be iterable" do
|
43
|
+
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
|
44
|
+
pdf.map {|p| p.number }.should == (1..25).to_a
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
data/spec/lib/grim_spec.rb
CHANGED
@@ -1,95 +1,25 @@
|
|
1
|
-
require 'fileutils'
|
2
1
|
require 'spec_helper'
|
3
2
|
|
4
3
|
describe Grim do
|
5
|
-
after(:all) do
|
6
|
-
FileUtils.rm_rf(tmp_dir)
|
7
|
-
end
|
8
|
-
|
9
4
|
it "should have a VERSION constant" do
|
10
5
|
Grim.const_defined?('VERSION').should be_true
|
11
6
|
end
|
12
7
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
lambda { Grim.new(fixture_path("booboo.pdf")) }.should raise_error(Grim::PdfNotFound)
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
describe "#page_count" do
|
21
|
-
it "should return an integer" do
|
22
|
-
instance = Grim.new(fixture_path("smoker.pdf"))
|
23
|
-
instance.page_count.should == 25
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
describe "#page" do
|
28
|
-
it "should be set to 1 by default" do
|
29
|
-
instance = Grim.new(fixture_path("smoker.pdf"))
|
30
|
-
instance.page_number.should == 1
|
31
|
-
end
|
32
|
-
|
33
|
-
it "should set page attribute and return instance" do
|
34
|
-
instance = Grim.new(fixture_path("smoker.pdf"))
|
35
|
-
instance.page(2).should == instance
|
36
|
-
instance.page_number.should == 2
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
describe "#index" do
|
41
|
-
it "should return page minus 1" do
|
42
|
-
instance = Grim.new(fixture_path("smoker.pdf"))
|
43
|
-
instance.page(2)
|
44
|
-
instance.index.should == 1
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
describe "#to_image" do
|
49
|
-
describe "output png" do
|
50
|
-
before(:all) do
|
51
|
-
instance = Grim.new(fixture_path("smoker.pdf"))
|
52
|
-
@png = instance.to_image(tmp_path("to_png_spec.png"))
|
53
|
-
end
|
54
|
-
|
55
|
-
it "should create the file" do
|
56
|
-
File.exist?(tmp_path("to_png_spec.png")).should be_true
|
57
|
-
end
|
58
|
-
|
59
|
-
it "should return an instance of File" do
|
60
|
-
@png.class.should == File
|
61
|
-
end
|
62
|
-
|
63
|
-
it "should have the right file size" do
|
64
|
-
@png.stat.size.should == 188515
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
describe "output jpeg" do
|
69
|
-
before(:all) do
|
70
|
-
instance = Grim.new(fixture_path("smoker.pdf"))
|
71
|
-
@jpeg = instance.to_image(tmp_path("to_jpeg_spec.jpeg"))
|
72
|
-
end
|
73
|
-
|
74
|
-
it "should create the file" do
|
75
|
-
File.exist?(tmp_path("to_jpeg_spec.jpeg")).should be_true
|
76
|
-
end
|
8
|
+
it "should have WIDTH constant set to 1024" do
|
9
|
+
Grim::WIDTH.should == 1024
|
10
|
+
end
|
77
11
|
|
78
|
-
|
79
|
-
|
80
|
-
|
12
|
+
it "should have QUALITY constant set to 90" do
|
13
|
+
Grim::QUALITY.should == 90
|
14
|
+
end
|
81
15
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
end
|
86
|
-
end
|
16
|
+
it "should have DENSITY constant set to 300" do
|
17
|
+
Grim::DENSITY.should == 300
|
18
|
+
end
|
87
19
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
instance.page(2).text.should == "Step 1: get someone to print this curve for you to scale, 72\342\200\235 wide\n\nStep 2: Get a couple 55 gallon drums\n\n\f"
|
92
|
-
end
|
20
|
+
describe "#new" do
|
21
|
+
it "should return an instance of Grim::Pdf" do
|
22
|
+
Grim.reap(fixture_path("smoker.pdf")).class.should == Grim::Pdf
|
93
23
|
end
|
94
24
|
end
|
95
25
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grim
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 2
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jonathan Hoyt
|
@@ -15,10 +15,25 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-09-
|
18
|
+
date: 2011-09-06 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
|
-
dependencies:
|
21
|
-
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: safe_shell
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 23
|
30
|
+
segments:
|
31
|
+
- 1
|
32
|
+
- 0
|
33
|
+
- 0
|
34
|
+
version: 1.0.0
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id001
|
22
37
|
description: Grim is a simple gem for extracting a page from a pdf and converting it to an image as well as extract the text from the page as a string. It basically gives you an easy to use api to ghostscript, imagemagick, and pdftotext specific to this use case.
|
23
38
|
email:
|
24
39
|
- jonmagic@gmail.com
|
@@ -31,16 +46,21 @@ extra_rdoc_files: []
|
|
31
46
|
files:
|
32
47
|
- .gitignore
|
33
48
|
- Gemfile
|
49
|
+
- LICENSE
|
34
50
|
- README.textile
|
35
51
|
- Rakefile
|
36
52
|
- grim.gemspec
|
37
53
|
- lib/grim.rb
|
54
|
+
- lib/grim/page.rb
|
55
|
+
- lib/grim/pdf.rb
|
38
56
|
- lib/pdf_info.ps
|
39
57
|
- spec/fixtures/smoker.pdf
|
58
|
+
- spec/lib/grim/page_spec.rb
|
59
|
+
- spec/lib/grim/pdf_spec.rb
|
40
60
|
- spec/lib/grim_spec.rb
|
41
61
|
- spec/spec_helper.rb
|
42
62
|
has_rdoc: true
|
43
|
-
homepage:
|
63
|
+
homepage: http://github.com/jonmagic/grim
|
44
64
|
licenses: []
|
45
65
|
|
46
66
|
post_install_message:
|
@@ -75,5 +95,7 @@ specification_version: 3
|
|
75
95
|
summary: Extract slides and text from a PDF.
|
76
96
|
test_files:
|
77
97
|
- spec/fixtures/smoker.pdf
|
98
|
+
- spec/lib/grim/page_spec.rb
|
99
|
+
- spec/lib/grim/pdf_spec.rb
|
78
100
|
- spec/lib/grim_spec.rb
|
79
101
|
- spec/spec_helper.rb
|