grim 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.textile CHANGED
@@ -48,6 +48,21 @@ h2. Usage
48
48
  end
49
49
  </pre></code>
50
50
 
51
+ We also support using other processors (the default is whatever version of Imagemagick/Ghostscript is in your path).
52
+
53
+ <pre><code>
54
+ # specifying one processor with specific ImageMagick and GhostScript paths
55
+ Grim.processor = Grim::ImageMagickProcessor.new({:imagemagick_path => "/path/to/convert", :ghostscript_path => "/path/to/gs"})
56
+
57
+ # multiple processors with fallback if first fails, useful if you need multiple versions of convert/gs
58
+ Grim.processor = Grim::MultiProcessor.new([
59
+ Grim::ImageMagickProcessor.new({:imagemagick_path => "/path/to/6.7/convert", :ghostscript_path => "/path/to/9.04/gs"}),
60
+ Grim::ImageMagickProcessor.new({:imagemagick_path => "/path/to/6.6/convert", :ghostscript_path => "/path/to/9.02/gs"})
61
+ ])
62
+
63
+ pdf = Grim.reap('/path/to/pdf)
64
+ </code></pre>
65
+
51
66
  h2. License
52
67
 
53
68
  See LICENSE for details.
data/grim.gemspec CHANGED
@@ -12,7 +12,6 @@ Gem::Specification.new do |s|
12
12
  s.description = %q{Grim is a simple gem for extracting a page from a pdf and converting it to an image as well as extract the text from the page as a string. It basically gives you an easy to use api to ghostscript, imagemagick, and pdftotext specific to this use case.}
13
13
 
14
14
  s.rubyforge_project = "grim"
15
- s.add_dependency 'safe_shell', '~> 1.0.0'
16
15
 
17
16
  s.files = `git ls-files`.split("\n")
18
17
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
data/lib/grim.rb CHANGED
@@ -1,7 +1,10 @@
1
1
  # encoding: UTF-8
2
- require 'safe_shell'
2
+ require 'shellwords'
3
3
 
4
4
  module Grim
5
+ extend self
6
+ attr_accessor :processor
7
+
5
8
  # Default resize output width, any positive integer
6
9
  WIDTH = 1024
7
10
 
@@ -47,4 +50,8 @@ module Grim
47
50
  end
48
51
 
49
52
  require 'grim/pdf'
50
- require 'grim/page'
53
+ require 'grim/page'
54
+ require 'grim/image_magick_processor'
55
+ require 'grim/multi_processor'
56
+
57
+ Grim.processor = Grim::ImageMagickProcessor.new
@@ -0,0 +1,37 @@
1
+ module Grim
2
+ class ImageMagickProcessor
3
+
4
+ # ghostscript prints out a warning, this regex matches it
5
+ WarningRegex = /\*\*\*\*.*\n/
6
+
7
+ def initialize(options={})
8
+ @imagemagick_path = options[:imagemagick_path] || 'convert'
9
+ @ghostscript_path = options[:ghostscript_path]
10
+ @original_path = ENV['PATH']
11
+ end
12
+
13
+ def count(path)
14
+ command = ["-dNODISPLAY", "-q",
15
+ "-sFile=#{Shellwords.shellescape(path)}",
16
+ File.expand_path('../../../lib/pdf_info.ps', __FILE__)]
17
+ @ghostscript_path ? command.unshift(@ghostscript_path) : command.unshift('gs')
18
+ result = `#{command.join(' ')}`
19
+ result.gsub(WarningRegex, '').to_i
20
+ end
21
+
22
+ def save(pdf, index, path, options)
23
+ width = options.fetch(:width, Grim::WIDTH)
24
+ density = options.fetch(:density, Grim::DENSITY)
25
+ quality = options.fetch(:quality, Grim::QUALITY)
26
+ command = [@imagemagick_path, "-resize", width.to_s, "-antialias", "-render",
27
+ "-quality", quality.to_s, "-colorspace", "RGB",
28
+ "-interlace", "none", "-density", density.to_s,
29
+ "#{Shellwords.shellescape(pdf.path)}[#{index}]", path]
30
+ command.unshift("PATH=#{File.dirname(@ghostscript_path)}:#{ENV['PATH']}") if @ghostscript_path
31
+
32
+ result = `#{command.join(' ')}`
33
+
34
+ $? == 0 || raise(UnprocessablePage, result)
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,29 @@
1
+ module Grim
2
+ class MultiProcessor
3
+ def initialize(processors)
4
+ @processors = processors
5
+ end
6
+
7
+ def count(path)
8
+ result = ""
9
+ @processors.each do |processor|
10
+ result = processor.count(path)
11
+ break if result != ""
12
+ end
13
+ result
14
+ end
15
+
16
+ def save(pdf, index, path, options)
17
+ result = true
18
+ @processors.each do |processor|
19
+ begin
20
+ result = processor.save(pdf, index, path, options)
21
+ rescue UnprocessablePage
22
+ next
23
+ end
24
+ break if result
25
+ end
26
+ raise UnprocessablePage unless result
27
+ end
28
+ end
29
+ end
data/lib/grim/page.rb CHANGED
@@ -32,16 +32,7 @@ module Grim
32
32
  def save(path, options={})
33
33
  raise PathMissing if path.nil? || path !~ /\S/
34
34
 
35
- width = options.fetch(:width, Grim::WIDTH)
36
- density = options.fetch(:density, Grim::DENSITY)
37
- quality = options.fetch(:quality, Grim::QUALITY)
38
-
39
- output = SafeShell.execute("convert", "-resize", width, "-antialias", "-render",
40
- "-quality", quality, "-colorspace", "RGB",
41
- "-interlace", "none", "-density", density,
42
- "#{@pdf.path}[#{@index}]", path)
43
-
44
- $? == 0 || raise(UnprocessablePage, output)
35
+ Grim.processor.save(@pdf, @index, path, options)
45
36
  end
46
37
 
47
38
  # Extracts the text from the selected page.
@@ -54,7 +45,7 @@ module Grim
54
45
  # Returns a String.
55
46
  #
56
47
  def text
57
- SafeShell.execute("pdftotext", "-enc", "UTF-8", "-f", @number, "-l", @number, @pdf.path, "-")
48
+ `#{["pdftotext", "-enc", "UTF-8", "-f", @number, "-l", @number, Shellwords.escape(@pdf.path), "-"].join(' ')}`
58
49
  end
59
50
  end
60
51
  end
data/lib/grim/pdf.rb CHANGED
@@ -5,9 +5,6 @@ module Grim
5
5
 
6
6
  attr_reader :path
7
7
 
8
- # ghostscript prints out a warning, this regex matches it
9
- WarningRegex = /\*\*\*\*.*\n/
10
-
11
8
  # Raises an error if pdf not found and sets some instance
12
9
  # variables if pdf is found.
13
10
  #
@@ -30,8 +27,7 @@ module Grim
30
27
  #
31
28
  def count
32
29
  @count ||= begin
33
- result = SafeShell.execute("gs", "-dNODISPLAY", "-q", "-sFile=#{@path}", File.expand_path('../../../lib/pdf_info.ps', __FILE__))
34
- result.gsub(WarningRegex, '').to_i
30
+ Grim.processor.count(@path)
35
31
  end
36
32
  end
37
33
 
data/lib/grim/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  # encoding: UTF-8
2
2
  module Grim
3
- VERSION = "0.2.4" unless defined?(::Grim::VERSION)
3
+ VERSION = "0.3.0" unless defined?(::Grim::VERSION)
4
4
  end
@@ -0,0 +1,87 @@
1
+ # encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ describe Grim::ImageMagickProcessor do
5
+ before(:each) do
6
+ @reset_to = ENV['PATH']
7
+ end
8
+
9
+ after(:each) do
10
+ ENV['PATH'] = @reset_to
11
+ end
12
+
13
+ describe "#count" do
14
+ before(:each) do
15
+ @processor = Grim::ImageMagickProcessor.new
16
+ end
17
+
18
+ it "should return page count" do
19
+ @processor.count(fixture_path("smoker.pdf")).should == 25
20
+ end
21
+ end
22
+
23
+ describe "#save" do
24
+ before(:all) do
25
+ @path = tmp_path("to_png_spec.png")
26
+ @pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
27
+
28
+ @processor = Grim::ImageMagickProcessor.new
29
+ end
30
+
31
+ it "should create the file" do
32
+ @processor.save(@pdf, 0, @path, {})
33
+ File.exist?(@path).should be_true
34
+ end
35
+
36
+ it "should use default width of 1024" do
37
+ @processor.save(@pdf, 0, @path, {})
38
+ width, height = dimensions_for_path(@path)
39
+ width.should == 1024
40
+ end
41
+ end
42
+
43
+ describe "#save with width option" do
44
+ before(:each) do
45
+ @path = tmp_path("to_png_spec.png")
46
+ pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
47
+
48
+ Grim::ImageMagickProcessor.new.save(pdf, 0, @path, {:width => 20})
49
+ end
50
+
51
+ it "should set width" do
52
+ width, height = dimensions_for_path(@path)
53
+ width.should == 20
54
+ end
55
+ end
56
+
57
+ describe "#save with quality option" do
58
+ before(:each) do
59
+ @path = tmp_path("to_png_spec.jpg")
60
+ @pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
61
+ end
62
+
63
+ it "should use quality" do
64
+ Grim::ImageMagickProcessor.new.save(@pdf, 0, @path, {:quality => 20})
65
+ lower_size = File.size(@path)
66
+
67
+ Grim::ImageMagickProcessor.new.save(@pdf, 0, @path, {:quality => 90})
68
+ higher_size = File.size(@path)
69
+
70
+ (lower_size < higher_size).should be_true
71
+ end
72
+ end
73
+
74
+ describe "#save with density option" do
75
+ before(:each) do
76
+ @path = tmp_path("to_png_spec.jpg")
77
+ @pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
78
+ end
79
+
80
+ it "should use density" do
81
+ lower_time = Benchmark.realtime { Grim::ImageMagickProcessor.new.save(@pdf, 0, @path, {:density => 72}) }
82
+ higher_time = Benchmark.realtime { Grim::ImageMagickProcessor.new.save(@pdf, 0, @path, {:density => 300}) }
83
+
84
+ (lower_time < higher_time).should be_true
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,42 @@
1
+ # encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ describe Grim::MultiProcessor do
5
+ before(:each) do
6
+ @failure = Grim::ImageMagickProcessor.new
7
+ @success = Grim::ImageMagickProcessor.new
8
+ @extra = Grim::ImageMagickProcessor.new
9
+ @processor = Grim::MultiProcessor.new([@failure, @success, @extra])
10
+
11
+ @path = fixture_path("smoker.pdf")
12
+ @pdf = Grim::Pdf.new(@path)
13
+ end
14
+
15
+ describe "#count" do
16
+ it "should try processors until it succeeds" do
17
+ @failure.stub(:count){""}
18
+ @success.should_receive(:count).and_return(30)
19
+ @extra.should_not_receive(:count)
20
+
21
+ @processor.count(@path)
22
+ end
23
+ end
24
+
25
+ describe "#save" do
26
+ it "should try processors until it succeeds" do
27
+ @failure.stub(:save){false}
28
+ @success.should_receive(:save).and_return(true)
29
+ @extra.should_not_receive(:save)
30
+
31
+ @processor.save(@pdf, 0, @path, {})
32
+ end
33
+
34
+ it "should raise error if all processors fail" do
35
+ @failure.should_receive(:save).and_return(false)
36
+ @success.should_receive(:save).and_return(false)
37
+ @extra.should_receive(:save).and_return(false)
38
+
39
+ lambda { @processor.save(@pdf, 0, @path, {}) }.should raise_error(Grim::UnprocessablePage)
40
+ end
41
+ end
42
+ end
@@ -14,18 +14,12 @@ describe Grim::Page do
14
14
  describe "#save" do
15
15
  before(:all) do
16
16
  @path = tmp_path("to_png_spec.png")
17
- pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
18
-
19
- pdf[0].save(@path)
20
- end
21
-
22
- it "should create the file" do
23
- File.exist?(@path).should be_true
17
+ @pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
24
18
  end
25
19
 
26
- it "should use default width of 1024" do
27
- width, height = dimensions_for_path(@path)
28
- width.should == 1024
20
+ it "should call Grim.processor.save with pdf, index, path, and options" do
21
+ Grim.processor.should_receive(:save).with(@pdf, 0, @path, {})
22
+ @pdf[0].save(@path)
29
23
  end
30
24
  end
31
25
 
@@ -41,59 +35,14 @@ describe Grim::Page do
41
35
  end
42
36
  end
43
37
 
44
- describe "#save with width option" do
45
- before(:each) do
46
- @path = tmp_path("to_png_spec.png")
47
- pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
48
-
49
- pdf[0].save(@path, :width => 20)
50
- end
51
-
52
- it "should set width" do
53
- width, height = dimensions_for_path(@path)
54
- width.should == 20
55
- end
56
- end
57
-
58
- describe "#save with quality option" do
59
- before(:each) do
60
- @path = tmp_path("to_png_spec.jpg")
61
- @pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
62
- end
63
-
64
- it "should use quality" do
65
- @pdf[0].save(@path, :quality => 20)
66
- lower_size = File.size(@path)
67
-
68
- @pdf[0].save(@path, :quality => 90)
69
- higher_size = File.size(@path)
70
-
71
- (lower_size < higher_size).should be_true
72
- end
73
- end
74
-
75
- describe "#save with density option" do
76
- before(:each) do
77
- @path = tmp_path("to_png_spec.jpg")
78
- @pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
79
- end
80
-
81
- it "should use density" do
82
- lower_time = Benchmark.realtime { @pdf[0].save(@path, :density => 20) }
83
- higher_time = Benchmark.realtime { @pdf[0].save(@path, :density => 300) }
84
-
85
- (lower_time < higher_time).should be_true
86
- end
87
- end
88
-
89
- describe "#save with an unprocessable PDF" do
90
- let(:path) { tmp_path("unprocessable.jpg") }
91
- let(:pdf) { Grim::Pdf.new(fixture_path("unprocessable.pdf")) }
92
-
93
- it "should raise an error" do
94
- lambda { pdf[0].save(path) }.should raise_error(Grim::UnprocessablePage, /missing an image filename/)
95
- end
96
- end
38
+ # describe "#save with an unprocessable PDF" do
39
+ # let(:path) { tmp_path("unprocessable.jpg") }
40
+ # let(:pdf) { Grim::Pdf.new(fixture_path("unprocessable.pdf")) }
41
+ #
42
+ # it "should raise an error" do
43
+ # lambda { pdf[0].save(path) }.should raise_error(Grim::UnprocessablePage, /missing an image filename/)
44
+ # end
45
+ # end
97
46
 
98
47
  describe "#text" do
99
48
  it "should return the text from the selected page" do
@@ -19,9 +19,10 @@ describe Grim::Pdf do
19
19
  end
20
20
 
21
21
  describe "#count" do
22
- it "should return 25" do
22
+ it "should call Grim.processor.count with pdf path" do
23
+ Grim.processor.should_receive(:count).with(fixture_path("smoker.pdf"))
23
24
  pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
24
- pdf.count.should == 25
25
+ pdf.count
25
26
  end
26
27
  end
27
28
 
@@ -2,6 +2,10 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe Grim do
5
+ it "should have a default processor" do
6
+ Grim.processor.class.should == Grim::ImageMagickProcessor
7
+ end
8
+
5
9
  it "should have a VERSION constant" do
6
10
  Grim.const_defined?('VERSION').should be_true
7
11
  end
@@ -18,7 +22,7 @@ describe Grim do
18
22
  Grim::DENSITY.should == 300
19
23
  end
20
24
 
21
- describe "#new" do
25
+ describe "#reap" do
22
26
  it "should return an instance of Grim::Pdf" do
23
27
  Grim.reap(fixture_path("smoker.pdf")).class.should == Grim::Pdf
24
28
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: grim
3
3
  version: !ruby/object:Gem::Version
4
- hash: 31
4
+ hash: 19
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 2
9
- - 4
10
- version: 0.2.4
8
+ - 3
9
+ - 0
10
+ version: 0.3.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jonathan Hoyt
@@ -15,24 +15,9 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-09-27 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
21
- name: safe_shell
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
26
- - - ~>
27
- - !ruby/object:Gem::Version
28
- hash: 23
29
- segments:
30
- - 1
31
- - 0
32
- - 0
33
- version: 1.0.0
34
- type: :runtime
35
- version_requirements: *id001
18
+ date: 2011-10-04 00:00:00 Z
19
+ dependencies: []
20
+
36
21
  description: Grim is a simple gem for extracting a page from a pdf and converting it to an image as well as extract the text from the page as a string. It basically gives you an easy to use api to ghostscript, imagemagick, and pdftotext specific to this use case.
37
22
  email:
38
23
  - jonmagic@gmail.com
@@ -50,12 +35,16 @@ files:
50
35
  - Rakefile
51
36
  - grim.gemspec
52
37
  - lib/grim.rb
38
+ - lib/grim/image_magick_processor.rb
39
+ - lib/grim/multi_processor.rb
53
40
  - lib/grim/page.rb
54
41
  - lib/grim/pdf.rb
55
42
  - lib/grim/version.rb
56
43
  - lib/pdf_info.ps
57
44
  - spec/fixtures/smoker.pdf
58
45
  - spec/fixtures/unprocessable.pdf
46
+ - spec/lib/grim/image_magick_processor_spec.rb
47
+ - spec/lib/grim/multi_processor_spec.rb
59
48
  - spec/lib/grim/page_spec.rb
60
49
  - spec/lib/grim/pdf_spec.rb
61
50
  - spec/lib/grim_spec.rb
@@ -96,6 +85,8 @@ summary: Extract slides and text from a PDF.
96
85
  test_files:
97
86
  - spec/fixtures/smoker.pdf
98
87
  - spec/fixtures/unprocessable.pdf
88
+ - spec/lib/grim/image_magick_processor_spec.rb
89
+ - spec/lib/grim/multi_processor_spec.rb
99
90
  - spec/lib/grim/page_spec.rb
100
91
  - spec/lib/grim/pdf_spec.rb
101
92
  - spec/lib/grim_spec.rb