grim 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.textile CHANGED
@@ -48,6 +48,21 @@ h2. Usage
48
48
  end
49
49
  </pre></code>
50
50
 
51
+ We also support using other processors (the default is whatever version of Imagemagick/Ghostscript is in your path).
52
+
53
+ <pre><code>
54
+ # specifying one processor with specific ImageMagick and GhostScript paths
55
+ Grim.processor = Grim::ImageMagickProcessor.new({:imagemagick_path => "/path/to/convert", :ghostscript_path => "/path/to/gs"})
56
+
57
+ # multiple processors with fallback if first fails, useful if you need multiple versions of convert/gs
58
+ Grim.processor = Grim::MultiProcessor.new([
59
+ Grim::ImageMagickProcessor.new({:imagemagick_path => "/path/to/6.7/convert", :ghostscript_path => "/path/to/9.04/gs"}),
60
+ Grim::ImageMagickProcessor.new({:imagemagick_path => "/path/to/6.6/convert", :ghostscript_path => "/path/to/9.02/gs"})
61
+ ])
62
+
63
+ pdf = Grim.reap('/path/to/pdf)
64
+ </code></pre>
65
+
51
66
  h2. License
52
67
 
53
68
  See LICENSE for details.
data/grim.gemspec CHANGED
@@ -12,7 +12,6 @@ Gem::Specification.new do |s|
12
12
  s.description = %q{Grim is a simple gem for extracting a page from a pdf and converting it to an image as well as extract the text from the page as a string. It basically gives you an easy to use api to ghostscript, imagemagick, and pdftotext specific to this use case.}
13
13
 
14
14
  s.rubyforge_project = "grim"
15
- s.add_dependency 'safe_shell', '~> 1.0.0'
16
15
 
17
16
  s.files = `git ls-files`.split("\n")
18
17
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
data/lib/grim.rb CHANGED
@@ -1,7 +1,10 @@
1
1
  # encoding: UTF-8
2
- require 'safe_shell'
2
+ require 'shellwords'
3
3
 
4
4
  module Grim
5
+ extend self
6
+ attr_accessor :processor
7
+
5
8
  # Default resize output width, any positive integer
6
9
  WIDTH = 1024
7
10
 
@@ -47,4 +50,8 @@ module Grim
47
50
  end
48
51
 
49
52
  require 'grim/pdf'
50
- require 'grim/page'
53
+ require 'grim/page'
54
+ require 'grim/image_magick_processor'
55
+ require 'grim/multi_processor'
56
+
57
+ Grim.processor = Grim::ImageMagickProcessor.new
@@ -0,0 +1,37 @@
1
+ module Grim
2
+ class ImageMagickProcessor
3
+
4
+ # ghostscript prints out a warning, this regex matches it
5
+ WarningRegex = /\*\*\*\*.*\n/
6
+
7
+ def initialize(options={})
8
+ @imagemagick_path = options[:imagemagick_path] || 'convert'
9
+ @ghostscript_path = options[:ghostscript_path]
10
+ @original_path = ENV['PATH']
11
+ end
12
+
13
+ def count(path)
14
+ command = ["-dNODISPLAY", "-q",
15
+ "-sFile=#{Shellwords.shellescape(path)}",
16
+ File.expand_path('../../../lib/pdf_info.ps', __FILE__)]
17
+ @ghostscript_path ? command.unshift(@ghostscript_path) : command.unshift('gs')
18
+ result = `#{command.join(' ')}`
19
+ result.gsub(WarningRegex, '').to_i
20
+ end
21
+
22
+ def save(pdf, index, path, options)
23
+ width = options.fetch(:width, Grim::WIDTH)
24
+ density = options.fetch(:density, Grim::DENSITY)
25
+ quality = options.fetch(:quality, Grim::QUALITY)
26
+ command = [@imagemagick_path, "-resize", width.to_s, "-antialias", "-render",
27
+ "-quality", quality.to_s, "-colorspace", "RGB",
28
+ "-interlace", "none", "-density", density.to_s,
29
+ "#{Shellwords.shellescape(pdf.path)}[#{index}]", path]
30
+ command.unshift("PATH=#{File.dirname(@ghostscript_path)}:#{ENV['PATH']}") if @ghostscript_path
31
+
32
+ result = `#{command.join(' ')}`
33
+
34
+ $? == 0 || raise(UnprocessablePage, result)
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,29 @@
1
+ module Grim
2
+ class MultiProcessor
3
+ def initialize(processors)
4
+ @processors = processors
5
+ end
6
+
7
+ def count(path)
8
+ result = ""
9
+ @processors.each do |processor|
10
+ result = processor.count(path)
11
+ break if result != ""
12
+ end
13
+ result
14
+ end
15
+
16
+ def save(pdf, index, path, options)
17
+ result = true
18
+ @processors.each do |processor|
19
+ begin
20
+ result = processor.save(pdf, index, path, options)
21
+ rescue UnprocessablePage
22
+ next
23
+ end
24
+ break if result
25
+ end
26
+ raise UnprocessablePage unless result
27
+ end
28
+ end
29
+ end
data/lib/grim/page.rb CHANGED
@@ -32,16 +32,7 @@ module Grim
32
32
  def save(path, options={})
33
33
  raise PathMissing if path.nil? || path !~ /\S/
34
34
 
35
- width = options.fetch(:width, Grim::WIDTH)
36
- density = options.fetch(:density, Grim::DENSITY)
37
- quality = options.fetch(:quality, Grim::QUALITY)
38
-
39
- output = SafeShell.execute("convert", "-resize", width, "-antialias", "-render",
40
- "-quality", quality, "-colorspace", "RGB",
41
- "-interlace", "none", "-density", density,
42
- "#{@pdf.path}[#{@index}]", path)
43
-
44
- $? == 0 || raise(UnprocessablePage, output)
35
+ Grim.processor.save(@pdf, @index, path, options)
45
36
  end
46
37
 
47
38
  # Extracts the text from the selected page.
@@ -54,7 +45,7 @@ module Grim
54
45
  # Returns a String.
55
46
  #
56
47
  def text
57
- SafeShell.execute("pdftotext", "-enc", "UTF-8", "-f", @number, "-l", @number, @pdf.path, "-")
48
+ `#{["pdftotext", "-enc", "UTF-8", "-f", @number, "-l", @number, Shellwords.escape(@pdf.path), "-"].join(' ')}`
58
49
  end
59
50
  end
60
51
  end
data/lib/grim/pdf.rb CHANGED
@@ -5,9 +5,6 @@ module Grim
5
5
 
6
6
  attr_reader :path
7
7
 
8
- # ghostscript prints out a warning, this regex matches it
9
- WarningRegex = /\*\*\*\*.*\n/
10
-
11
8
  # Raises an error if pdf not found and sets some instance
12
9
  # variables if pdf is found.
13
10
  #
@@ -30,8 +27,7 @@ module Grim
30
27
  #
31
28
  def count
32
29
  @count ||= begin
33
- result = SafeShell.execute("gs", "-dNODISPLAY", "-q", "-sFile=#{@path}", File.expand_path('../../../lib/pdf_info.ps', __FILE__))
34
- result.gsub(WarningRegex, '').to_i
30
+ Grim.processor.count(@path)
35
31
  end
36
32
  end
37
33
 
data/lib/grim/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  # encoding: UTF-8
2
2
  module Grim
3
- VERSION = "0.2.4" unless defined?(::Grim::VERSION)
3
+ VERSION = "0.3.0" unless defined?(::Grim::VERSION)
4
4
  end
@@ -0,0 +1,87 @@
1
+ # encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ describe Grim::ImageMagickProcessor do
5
+ before(:each) do
6
+ @reset_to = ENV['PATH']
7
+ end
8
+
9
+ after(:each) do
10
+ ENV['PATH'] = @reset_to
11
+ end
12
+
13
+ describe "#count" do
14
+ before(:each) do
15
+ @processor = Grim::ImageMagickProcessor.new
16
+ end
17
+
18
+ it "should return page count" do
19
+ @processor.count(fixture_path("smoker.pdf")).should == 25
20
+ end
21
+ end
22
+
23
+ describe "#save" do
24
+ before(:all) do
25
+ @path = tmp_path("to_png_spec.png")
26
+ @pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
27
+
28
+ @processor = Grim::ImageMagickProcessor.new
29
+ end
30
+
31
+ it "should create the file" do
32
+ @processor.save(@pdf, 0, @path, {})
33
+ File.exist?(@path).should be_true
34
+ end
35
+
36
+ it "should use default width of 1024" do
37
+ @processor.save(@pdf, 0, @path, {})
38
+ width, height = dimensions_for_path(@path)
39
+ width.should == 1024
40
+ end
41
+ end
42
+
43
+ describe "#save with width option" do
44
+ before(:each) do
45
+ @path = tmp_path("to_png_spec.png")
46
+ pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
47
+
48
+ Grim::ImageMagickProcessor.new.save(pdf, 0, @path, {:width => 20})
49
+ end
50
+
51
+ it "should set width" do
52
+ width, height = dimensions_for_path(@path)
53
+ width.should == 20
54
+ end
55
+ end
56
+
57
+ describe "#save with quality option" do
58
+ before(:each) do
59
+ @path = tmp_path("to_png_spec.jpg")
60
+ @pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
61
+ end
62
+
63
+ it "should use quality" do
64
+ Grim::ImageMagickProcessor.new.save(@pdf, 0, @path, {:quality => 20})
65
+ lower_size = File.size(@path)
66
+
67
+ Grim::ImageMagickProcessor.new.save(@pdf, 0, @path, {:quality => 90})
68
+ higher_size = File.size(@path)
69
+
70
+ (lower_size < higher_size).should be_true
71
+ end
72
+ end
73
+
74
+ describe "#save with density option" do
75
+ before(:each) do
76
+ @path = tmp_path("to_png_spec.jpg")
77
+ @pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
78
+ end
79
+
80
+ it "should use density" do
81
+ lower_time = Benchmark.realtime { Grim::ImageMagickProcessor.new.save(@pdf, 0, @path, {:density => 72}) }
82
+ higher_time = Benchmark.realtime { Grim::ImageMagickProcessor.new.save(@pdf, 0, @path, {:density => 300}) }
83
+
84
+ (lower_time < higher_time).should be_true
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,42 @@
1
+ # encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ describe Grim::MultiProcessor do
5
+ before(:each) do
6
+ @failure = Grim::ImageMagickProcessor.new
7
+ @success = Grim::ImageMagickProcessor.new
8
+ @extra = Grim::ImageMagickProcessor.new
9
+ @processor = Grim::MultiProcessor.new([@failure, @success, @extra])
10
+
11
+ @path = fixture_path("smoker.pdf")
12
+ @pdf = Grim::Pdf.new(@path)
13
+ end
14
+
15
+ describe "#count" do
16
+ it "should try processors until it succeeds" do
17
+ @failure.stub(:count){""}
18
+ @success.should_receive(:count).and_return(30)
19
+ @extra.should_not_receive(:count)
20
+
21
+ @processor.count(@path)
22
+ end
23
+ end
24
+
25
+ describe "#save" do
26
+ it "should try processors until it succeeds" do
27
+ @failure.stub(:save){false}
28
+ @success.should_receive(:save).and_return(true)
29
+ @extra.should_not_receive(:save)
30
+
31
+ @processor.save(@pdf, 0, @path, {})
32
+ end
33
+
34
+ it "should raise error if all processors fail" do
35
+ @failure.should_receive(:save).and_return(false)
36
+ @success.should_receive(:save).and_return(false)
37
+ @extra.should_receive(:save).and_return(false)
38
+
39
+ lambda { @processor.save(@pdf, 0, @path, {}) }.should raise_error(Grim::UnprocessablePage)
40
+ end
41
+ end
42
+ end
@@ -14,18 +14,12 @@ describe Grim::Page do
14
14
  describe "#save" do
15
15
  before(:all) do
16
16
  @path = tmp_path("to_png_spec.png")
17
- pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
18
-
19
- pdf[0].save(@path)
20
- end
21
-
22
- it "should create the file" do
23
- File.exist?(@path).should be_true
17
+ @pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
24
18
  end
25
19
 
26
- it "should use default width of 1024" do
27
- width, height = dimensions_for_path(@path)
28
- width.should == 1024
20
+ it "should call Grim.processor.save with pdf, index, path, and options" do
21
+ Grim.processor.should_receive(:save).with(@pdf, 0, @path, {})
22
+ @pdf[0].save(@path)
29
23
  end
30
24
  end
31
25
 
@@ -41,59 +35,14 @@ describe Grim::Page do
41
35
  end
42
36
  end
43
37
 
44
- describe "#save with width option" do
45
- before(:each) do
46
- @path = tmp_path("to_png_spec.png")
47
- pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
48
-
49
- pdf[0].save(@path, :width => 20)
50
- end
51
-
52
- it "should set width" do
53
- width, height = dimensions_for_path(@path)
54
- width.should == 20
55
- end
56
- end
57
-
58
- describe "#save with quality option" do
59
- before(:each) do
60
- @path = tmp_path("to_png_spec.jpg")
61
- @pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
62
- end
63
-
64
- it "should use quality" do
65
- @pdf[0].save(@path, :quality => 20)
66
- lower_size = File.size(@path)
67
-
68
- @pdf[0].save(@path, :quality => 90)
69
- higher_size = File.size(@path)
70
-
71
- (lower_size < higher_size).should be_true
72
- end
73
- end
74
-
75
- describe "#save with density option" do
76
- before(:each) do
77
- @path = tmp_path("to_png_spec.jpg")
78
- @pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
79
- end
80
-
81
- it "should use density" do
82
- lower_time = Benchmark.realtime { @pdf[0].save(@path, :density => 20) }
83
- higher_time = Benchmark.realtime { @pdf[0].save(@path, :density => 300) }
84
-
85
- (lower_time < higher_time).should be_true
86
- end
87
- end
88
-
89
- describe "#save with an unprocessable PDF" do
90
- let(:path) { tmp_path("unprocessable.jpg") }
91
- let(:pdf) { Grim::Pdf.new(fixture_path("unprocessable.pdf")) }
92
-
93
- it "should raise an error" do
94
- lambda { pdf[0].save(path) }.should raise_error(Grim::UnprocessablePage, /missing an image filename/)
95
- end
96
- end
38
+ # describe "#save with an unprocessable PDF" do
39
+ # let(:path) { tmp_path("unprocessable.jpg") }
40
+ # let(:pdf) { Grim::Pdf.new(fixture_path("unprocessable.pdf")) }
41
+ #
42
+ # it "should raise an error" do
43
+ # lambda { pdf[0].save(path) }.should raise_error(Grim::UnprocessablePage, /missing an image filename/)
44
+ # end
45
+ # end
97
46
 
98
47
  describe "#text" do
99
48
  it "should return the text from the selected page" do
@@ -19,9 +19,10 @@ describe Grim::Pdf do
19
19
  end
20
20
 
21
21
  describe "#count" do
22
- it "should return 25" do
22
+ it "should call Grim.processor.count with pdf path" do
23
+ Grim.processor.should_receive(:count).with(fixture_path("smoker.pdf"))
23
24
  pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
24
- pdf.count.should == 25
25
+ pdf.count
25
26
  end
26
27
  end
27
28
 
@@ -2,6 +2,10 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe Grim do
5
+ it "should have a default processor" do
6
+ Grim.processor.class.should == Grim::ImageMagickProcessor
7
+ end
8
+
5
9
  it "should have a VERSION constant" do
6
10
  Grim.const_defined?('VERSION').should be_true
7
11
  end
@@ -18,7 +22,7 @@ describe Grim do
18
22
  Grim::DENSITY.should == 300
19
23
  end
20
24
 
21
- describe "#new" do
25
+ describe "#reap" do
22
26
  it "should return an instance of Grim::Pdf" do
23
27
  Grim.reap(fixture_path("smoker.pdf")).class.should == Grim::Pdf
24
28
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: grim
3
3
  version: !ruby/object:Gem::Version
4
- hash: 31
4
+ hash: 19
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 2
9
- - 4
10
- version: 0.2.4
8
+ - 3
9
+ - 0
10
+ version: 0.3.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jonathan Hoyt
@@ -15,24 +15,9 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-09-27 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
21
- name: safe_shell
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
26
- - - ~>
27
- - !ruby/object:Gem::Version
28
- hash: 23
29
- segments:
30
- - 1
31
- - 0
32
- - 0
33
- version: 1.0.0
34
- type: :runtime
35
- version_requirements: *id001
18
+ date: 2011-10-04 00:00:00 Z
19
+ dependencies: []
20
+
36
21
  description: Grim is a simple gem for extracting a page from a pdf and converting it to an image as well as extract the text from the page as a string. It basically gives you an easy to use api to ghostscript, imagemagick, and pdftotext specific to this use case.
37
22
  email:
38
23
  - jonmagic@gmail.com
@@ -50,12 +35,16 @@ files:
50
35
  - Rakefile
51
36
  - grim.gemspec
52
37
  - lib/grim.rb
38
+ - lib/grim/image_magick_processor.rb
39
+ - lib/grim/multi_processor.rb
53
40
  - lib/grim/page.rb
54
41
  - lib/grim/pdf.rb
55
42
  - lib/grim/version.rb
56
43
  - lib/pdf_info.ps
57
44
  - spec/fixtures/smoker.pdf
58
45
  - spec/fixtures/unprocessable.pdf
46
+ - spec/lib/grim/image_magick_processor_spec.rb
47
+ - spec/lib/grim/multi_processor_spec.rb
59
48
  - spec/lib/grim/page_spec.rb
60
49
  - spec/lib/grim/pdf_spec.rb
61
50
  - spec/lib/grim_spec.rb
@@ -96,6 +85,8 @@ summary: Extract slides and text from a PDF.
96
85
  test_files:
97
86
  - spec/fixtures/smoker.pdf
98
87
  - spec/fixtures/unprocessable.pdf
88
+ - spec/lib/grim/image_magick_processor_spec.rb
89
+ - spec/lib/grim/multi_processor_spec.rb
99
90
  - spec/lib/grim/page_spec.rb
100
91
  - spec/lib/grim/pdf_spec.rb
101
92
  - spec/lib/grim_spec.rb