grim 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/grim/page.rb +8 -3
- data/lib/grim/pdf.rb +9 -4
- data/lib/grim/version.rb +1 -1
- data/spec/lib/grim/page_spec.rb +7 -1
- metadata +8 -10
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d9b3ea24639d459434c20b2f23e417e13b811a6a
|
4
|
+
data.tar.gz: d25c76a9a4ead4f3a3067eb40a364ff532460911
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cb6007819bb63ba6b07c253e978bcad0b792adcfac87f96ff1ea3b11fa4e94621384bb182e8413abc08efa37f984a70f6d3f1643a2d922e7f9147b42ae98a487
|
7
|
+
data.tar.gz: 64c20dc98e97cf1a341ef904d7c038feffe54acdd00e21c4d2f1dded2d5a756861e8e665d7910f9b94d0f457dd8734a0951110095c0b6f1b3116ea35d7866224
|
data/lib/grim/page.rb
CHANGED
@@ -8,11 +8,16 @@ module Grim
|
|
8
8
|
#
|
9
9
|
# pdf - the pdf this page belongs to
|
10
10
|
# index - the index of the page in the array of pages
|
11
|
+
# options - A Hash of options.
|
12
|
+
# :pdftotext_path - The String path of where to find the pdftotext
|
13
|
+
# binary to use when extracting text
|
14
|
+
# (default: "pdftotext").
|
11
15
|
#
|
12
|
-
def initialize(pdf, index)
|
16
|
+
def initialize(pdf, index, options = {})
|
13
17
|
@pdf = pdf
|
14
18
|
@index = index
|
15
19
|
@number = index + 1
|
20
|
+
@pdftotext_path = options[:pdftotext_path] || 'pdftotext'
|
16
21
|
end
|
17
22
|
|
18
23
|
# Extracts the selected page and turns it into an image.
|
@@ -45,7 +50,7 @@ module Grim
|
|
45
50
|
# Returns a String.
|
46
51
|
#
|
47
52
|
def text
|
48
|
-
`#{[
|
53
|
+
`#{[@pdftotext_path, "-enc", "UTF-8", "-f", @number, "-l", @number, Shellwords.escape(@pdf.path), "-"].join(' ')}`
|
49
54
|
end
|
50
55
|
end
|
51
|
-
end
|
56
|
+
end
|
data/lib/grim/pdf.rb
CHANGED
@@ -9,10 +9,15 @@ module Grim
|
|
9
9
|
# variables if pdf is found.
|
10
10
|
#
|
11
11
|
# path - A String or Path to the pdf
|
12
|
+
# options - A Hash of options.
|
13
|
+
# :pdftotext_path - The String path of where to find the pdftotext
|
14
|
+
# binary to use when extracting text
|
15
|
+
# (default: "pdftotext").
|
12
16
|
#
|
13
|
-
def initialize(path)
|
17
|
+
def initialize(path, options = {})
|
14
18
|
raise Grim::PdfNotFound unless File.exists?(path)
|
15
19
|
@path = path
|
20
|
+
@pdftotext_path = options[:pdftotext_path] || 'pdftotext'
|
16
21
|
end
|
17
22
|
|
18
23
|
# Shells out to ghostscript to read the pdf with the pdf_info.ps script
|
@@ -43,14 +48,14 @@ module Grim
|
|
43
48
|
#
|
44
49
|
def [](index)
|
45
50
|
raise Grim::PageNotFound unless index >= 0 && index < count
|
46
|
-
Grim::Page.new(self, index)
|
51
|
+
Grim::Page.new(self, index, pdftotext_path: @pdftotext_path)
|
47
52
|
end
|
48
53
|
|
49
54
|
def each
|
50
55
|
(0..(count-1)).each do |index|
|
51
|
-
yield Grim::Page.new(self, index)
|
56
|
+
yield Grim::Page.new(self, index, pdftotext_path: @pdftotext_path)
|
52
57
|
end
|
53
58
|
end
|
54
59
|
|
55
60
|
end
|
56
|
-
end
|
61
|
+
end
|
data/lib/grim/version.rb
CHANGED
data/spec/lib/grim/page_spec.rb
CHANGED
@@ -47,7 +47,13 @@ describe Grim::Page do
|
|
47
47
|
describe "#text" do
|
48
48
|
it "should return the text from the selected page" do
|
49
49
|
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"))
|
50
|
-
pdf[1].text.should == "Step 1: get someone to print this curve for you to scale, 72” wide\
|
50
|
+
pdf[1].text.should == "Step 1: get someone to print this curve for you to scale, 72” wide\nStep 2: Get a couple 55 gallon drums\n\n\f"
|
51
|
+
end
|
52
|
+
|
53
|
+
it "works with full path to pdftotext" do
|
54
|
+
pdftotext_path = `which pdftotext`.chomp
|
55
|
+
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"), pdftotext_path: pdftotext_path)
|
56
|
+
pdf[1].text.should == "Step 1: get someone to print this curve for you to scale, 72” wide\nStep 2: Get a couple 55 gallon drums\n\n\f"
|
51
57
|
end
|
52
58
|
end
|
53
59
|
end
|
metadata
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grim
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
5
|
-
prerelease:
|
4
|
+
version: 1.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Jonathan Hoyt
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2014-
|
11
|
+
date: 2014-11-25 00:00:00.000000000 Z
|
13
12
|
dependencies: []
|
14
13
|
description: Grim is a simple gem for extracting a page from a pdf and converting
|
15
14
|
it to an image as well as extract the text from the page as a string. It basically
|
@@ -21,7 +20,7 @@ executables: []
|
|
21
20
|
extensions: []
|
22
21
|
extra_rdoc_files: []
|
23
22
|
files:
|
24
|
-
- .gitignore
|
23
|
+
- ".gitignore"
|
25
24
|
- Gemfile
|
26
25
|
- LICENSE
|
27
26
|
- README.textile
|
@@ -44,27 +43,26 @@ files:
|
|
44
43
|
- spec/spec_helper.rb
|
45
44
|
homepage: http://github.com/jonmagic/grim
|
46
45
|
licenses: []
|
46
|
+
metadata: {}
|
47
47
|
post_install_message:
|
48
48
|
rdoc_options: []
|
49
49
|
require_paths:
|
50
50
|
- lib
|
51
51
|
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
-
none: false
|
53
52
|
requirements:
|
54
|
-
- -
|
53
|
+
- - ">="
|
55
54
|
- !ruby/object:Gem::Version
|
56
55
|
version: '0'
|
57
56
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
-
none: false
|
59
57
|
requirements:
|
60
|
-
- -
|
58
|
+
- - ">="
|
61
59
|
- !ruby/object:Gem::Version
|
62
60
|
version: '0'
|
63
61
|
requirements: []
|
64
62
|
rubyforge_project: grim
|
65
|
-
rubygems_version:
|
63
|
+
rubygems_version: 2.2.2
|
66
64
|
signing_key:
|
67
|
-
specification_version:
|
65
|
+
specification_version: 4
|
68
66
|
summary: Extract slides and text from a PDF.
|
69
67
|
test_files:
|
70
68
|
- spec/fixtures/smoker.pdf
|