grim 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/grim/page.rb +8 -3
- data/lib/grim/pdf.rb +9 -4
- data/lib/grim/version.rb +1 -1
- data/spec/lib/grim/page_spec.rb +7 -1
- metadata +8 -10
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d9b3ea24639d459434c20b2f23e417e13b811a6a
|
4
|
+
data.tar.gz: d25c76a9a4ead4f3a3067eb40a364ff532460911
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cb6007819bb63ba6b07c253e978bcad0b792adcfac87f96ff1ea3b11fa4e94621384bb182e8413abc08efa37f984a70f6d3f1643a2d922e7f9147b42ae98a487
|
7
|
+
data.tar.gz: 64c20dc98e97cf1a341ef904d7c038feffe54acdd00e21c4d2f1dded2d5a756861e8e665d7910f9b94d0f457dd8734a0951110095c0b6f1b3116ea35d7866224
|
data/lib/grim/page.rb
CHANGED
@@ -8,11 +8,16 @@ module Grim
|
|
8
8
|
#
|
9
9
|
# pdf - the pdf this page belongs to
|
10
10
|
# index - the index of the page in the array of pages
|
11
|
+
# options - A Hash of options.
|
12
|
+
# :pdftotext_path - The String path of where to find the pdftotext
|
13
|
+
# binary to use when extracting text
|
14
|
+
# (default: "pdftotext").
|
11
15
|
#
|
12
|
-
def initialize(pdf, index)
|
16
|
+
def initialize(pdf, index, options = {})
|
13
17
|
@pdf = pdf
|
14
18
|
@index = index
|
15
19
|
@number = index + 1
|
20
|
+
@pdftotext_path = options[:pdftotext_path] || 'pdftotext'
|
16
21
|
end
|
17
22
|
|
18
23
|
# Extracts the selected page and turns it into an image.
|
@@ -45,7 +50,7 @@ module Grim
|
|
45
50
|
# Returns a String.
|
46
51
|
#
|
47
52
|
def text
|
48
|
-
`#{[
|
53
|
+
`#{[@pdftotext_path, "-enc", "UTF-8", "-f", @number, "-l", @number, Shellwords.escape(@pdf.path), "-"].join(' ')}`
|
49
54
|
end
|
50
55
|
end
|
51
|
-
end
|
56
|
+
end
|
data/lib/grim/pdf.rb
CHANGED
@@ -9,10 +9,15 @@ module Grim
|
|
9
9
|
# variables if pdf is found.
|
10
10
|
#
|
11
11
|
# path - A String or Path to the pdf
|
12
|
+
# options - A Hash of options.
|
13
|
+
# :pdftotext_path - The String path of where to find the pdftotext
|
14
|
+
# binary to use when extracting text
|
15
|
+
# (default: "pdftotext").
|
12
16
|
#
|
13
|
-
def initialize(path)
|
17
|
+
def initialize(path, options = {})
|
14
18
|
raise Grim::PdfNotFound unless File.exists?(path)
|
15
19
|
@path = path
|
20
|
+
@pdftotext_path = options[:pdftotext_path] || 'pdftotext'
|
16
21
|
end
|
17
22
|
|
18
23
|
# Shells out to ghostscript to read the pdf with the pdf_info.ps script
|
@@ -43,14 +48,14 @@ module Grim
|
|
43
48
|
#
|
44
49
|
def [](index)
|
45
50
|
raise Grim::PageNotFound unless index >= 0 && index < count
|
46
|
-
Grim::Page.new(self, index)
|
51
|
+
Grim::Page.new(self, index, pdftotext_path: @pdftotext_path)
|
47
52
|
end
|
48
53
|
|
49
54
|
def each
|
50
55
|
(0..(count-1)).each do |index|
|
51
|
-
yield Grim::Page.new(self, index)
|
56
|
+
yield Grim::Page.new(self, index, pdftotext_path: @pdftotext_path)
|
52
57
|
end
|
53
58
|
end
|
54
59
|
|
55
60
|
end
|
56
|
-
end
|
61
|
+
end
|
data/lib/grim/version.rb
CHANGED
data/spec/lib/grim/page_spec.rb
CHANGED
@@ -47,7 +47,13 @@ describe Grim::Page do
|
|
47
47
|
describe "#text" do
|
48
48
|
it "should return the text from the selected page" do
|
49
49
|
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
|
50
|
-
pdf[1].text.should == "Step 1: get someone to print this curve for you to scale, 72” wide\
|
50
|
+
pdf[1].text.should == "Step 1: get someone to print this curve for you to scale, 72” wide\nStep 2: Get a couple 55 gallon drums\n\n\f"
|
51
|
+
end
|
52
|
+
|
53
|
+
it "works with full path to pdftotext" do
|
54
|
+
pdftotext_path = `which pdftotext`.chomp
|
55
|
+
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"), pdftotext_path: pdftotext_path)
|
56
|
+
pdf[1].text.should == "Step 1: get someone to print this curve for you to scale, 72” wide\nStep 2: Get a couple 55 gallon drums\n\n\f"
|
51
57
|
end
|
52
58
|
end
|
53
59
|
end
|
metadata
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grim
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
5
|
-
prerelease:
|
4
|
+
version: 1.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Jonathan Hoyt
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2014-
|
11
|
+
date: 2014-11-25 00:00:00.000000000 Z
|
13
12
|
dependencies: []
|
14
13
|
description: Grim is a simple gem for extracting a page from a pdf and converting
|
15
14
|
it to an image as well as extract the text from the page as a string. It basically
|
@@ -21,7 +20,7 @@ executables: []
|
|
21
20
|
extensions: []
|
22
21
|
extra_rdoc_files: []
|
23
22
|
files:
|
24
|
-
- .gitignore
|
23
|
+
- ".gitignore"
|
25
24
|
- Gemfile
|
26
25
|
- LICENSE
|
27
26
|
- README.textile
|
@@ -44,27 +43,26 @@ files:
|
|
44
43
|
- spec/spec_helper.rb
|
45
44
|
homepage: http://github.com/jonmagic/grim
|
46
45
|
licenses: []
|
46
|
+
metadata: {}
|
47
47
|
post_install_message:
|
48
48
|
rdoc_options: []
|
49
49
|
require_paths:
|
50
50
|
- lib
|
51
51
|
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
-
none: false
|
53
52
|
requirements:
|
54
|
-
- -
|
53
|
+
- - ">="
|
55
54
|
- !ruby/object:Gem::Version
|
56
55
|
version: '0'
|
57
56
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
-
none: false
|
59
57
|
requirements:
|
60
|
-
- -
|
58
|
+
- - ">="
|
61
59
|
- !ruby/object:Gem::Version
|
62
60
|
version: '0'
|
63
61
|
requirements: []
|
64
62
|
rubyforge_project: grim
|
65
|
-
rubygems_version:
|
63
|
+
rubygems_version: 2.2.2
|
66
64
|
signing_key:
|
67
|
-
specification_version:
|
65
|
+
specification_version: 4
|
68
66
|
summary: Extract slides and text from a PDF.
|
69
67
|
test_files:
|
70
68
|
- spec/fixtures/smoker.pdf
|