grim 1.3.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/grim/page.rb +8 -3
- data/lib/grim/version.rb +1 -1
- data/spec/fixtures/table.pdf +0 -0
- data/spec/lib/grim/page_spec.rb +8 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 30464651a26c1f6e432aff72789ab8f1f2af0ce1
|
4
|
+
data.tar.gz: cc290f2afdafe2e16001a8ed3e05a3c3730a1c00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4aa1e307eb53b753676a77dc2d4b15880764c49b0c4b647062ed8f54e0757b1df24915c71a2aaceb28013a12250da7db283d3195a843c0a69d69b7cfb757913
|
7
|
+
data.tar.gz: 79d39dd5645978619f81f056e3c089e60193bc2358d6e37d8aaff6d49e4f33d23c091ffb4602e7e014ba244acb9199e3aed095db20ac9b7bce7e68cc58ade64c
|
data/README.md
CHANGED
data/lib/grim/page.rb
CHANGED
@@ -40,17 +40,22 @@ module Grim
|
|
40
40
|
Grim.processor.save(@pdf, @index, path, options)
|
41
41
|
end
|
42
42
|
|
43
|
-
# Extracts the text from the selected page.
|
43
|
+
# Extracts the text from the selected page, using additional options.
|
44
44
|
#
|
45
45
|
# For example:
|
46
46
|
#
|
47
47
|
# pdf[1].text
|
48
48
|
# # => "This is text from slide 2.\n\nAnd even more text from slide 2."
|
49
49
|
#
|
50
|
+
# pdf[1].text({flags: ["-table"]})
|
50
51
|
# Returns a String.
|
51
52
|
#
|
52
|
-
def text
|
53
|
-
|
53
|
+
def text(options={})
|
54
|
+
flags = options.fetch(:flags, [])
|
55
|
+
command_parts = [@pdftotext_path, "-enc", "UTF-8", "-f", @number, "-l", @number]
|
56
|
+
command_parts += flags if flags.length > 0
|
57
|
+
command_parts += [Shellwords.escape(@pdf.path), "-"]
|
58
|
+
command = command_parts.join(' ')
|
54
59
|
Grim.logger.debug { "Running pdftotext command" }
|
55
60
|
Grim.logger.debug { command }
|
56
61
|
`#{command}`
|
data/lib/grim/version.rb
CHANGED
Binary file
|
data/spec/lib/grim/page_spec.rb
CHANGED
@@ -51,6 +51,14 @@ describe Grim::Page do
|
|
51
51
|
eq("Step 1: get someone to print this curve for you to scale, 72” wide\nStep 2: Get a couple 55 gallon drums\n\n\f")
|
52
52
|
end
|
53
53
|
|
54
|
+
it "should extract tabular data with the -table option" do
|
55
|
+
pdf = Grim::Pdf.new(fixture_path("table.pdf"))
|
56
|
+
expect(pdf[0].text({flags: ["-table"]})).to \
|
57
|
+
include(
|
58
|
+
" Male 979 (85) 968 (85)\n\n" +
|
59
|
+
" Female 169 (15) 169 (15)\n")
|
60
|
+
end
|
61
|
+
|
54
62
|
it "works with full path to pdftotext" do
|
55
63
|
pdftotext_path = `which pdftotext`.chomp
|
56
64
|
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"), pdftotext_path: pdftotext_path)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grim
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Hoyt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-05-08 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Grim is a simple gem for extracting a page from a pdf and converting
|
14
14
|
it to an image as well as extract the text from the page as a string. It basically
|
@@ -36,6 +36,7 @@ files:
|
|
36
36
|
- lib/pdf_info.ps
|
37
37
|
- spec/fixtures/remove_alpha.pdf
|
38
38
|
- spec/fixtures/smoker.pdf
|
39
|
+
- spec/fixtures/table.pdf
|
39
40
|
- spec/fixtures/unprocessable.pdf
|
40
41
|
- spec/lib/grim/image_magick_processor_spec.rb
|
41
42
|
- spec/lib/grim/multi_processor_spec.rb
|
@@ -63,13 +64,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
63
64
|
version: '0'
|
64
65
|
requirements: []
|
65
66
|
rubyforge_project: grim
|
66
|
-
rubygems_version: 2.
|
67
|
+
rubygems_version: 2.6.11
|
67
68
|
signing_key:
|
68
69
|
specification_version: 4
|
69
70
|
summary: Extract slides and text from a PDF.
|
70
71
|
test_files:
|
71
72
|
- spec/fixtures/remove_alpha.pdf
|
72
73
|
- spec/fixtures/smoker.pdf
|
74
|
+
- spec/fixtures/table.pdf
|
73
75
|
- spec/fixtures/unprocessable.pdf
|
74
76
|
- spec/lib/grim/image_magick_processor_spec.rb
|
75
77
|
- spec/lib/grim/multi_processor_spec.rb
|