grim 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/grim/page.rb +8 -3
- data/lib/grim/version.rb +1 -1
- data/spec/fixtures/table.pdf +0 -0
- data/spec/lib/grim/page_spec.rb +8 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 30464651a26c1f6e432aff72789ab8f1f2af0ce1
|
4
|
+
data.tar.gz: cc290f2afdafe2e16001a8ed3e05a3c3730a1c00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4aa1e307eb53b753676a77dc2d4b15880764c49b0c4b647062ed8f54e0757b1df24915c71a2aaceb28013a12250da7db283d3195a843c0a69d69b7cfb757913
|
7
|
+
data.tar.gz: 79d39dd5645978619f81f056e3c089e60193bc2358d6e37d8aaff6d49e4f33d23c091ffb4602e7e014ba244acb9199e3aed095db20ac9b7bce7e68cc58ade64c
|
data/README.md
CHANGED
data/lib/grim/page.rb
CHANGED
@@ -40,17 +40,22 @@ module Grim
|
|
40
40
|
Grim.processor.save(@pdf, @index, path, options)
|
41
41
|
end
|
42
42
|
|
43
|
-
# Extracts the text from the selected page.
|
43
|
+
# Extracts the text from the selected page, using additional options.
|
44
44
|
#
|
45
45
|
# For example:
|
46
46
|
#
|
47
47
|
# pdf[1].text
|
48
48
|
# # => "This is text from slide 2.\n\nAnd even more text from slide 2."
|
49
49
|
#
|
50
|
+
# pdf[1].text({flags: ["-table"]})
|
50
51
|
# Returns a String.
|
51
52
|
#
|
52
|
-
def text
|
53
|
-
|
53
|
+
def text(options={})
|
54
|
+
flags = options.fetch(:flags, [])
|
55
|
+
command_parts = [@pdftotext_path, "-enc", "UTF-8", "-f", @number, "-l", @number]
|
56
|
+
command_parts += flags if flags.length > 0
|
57
|
+
command_parts += [Shellwords.escape(@pdf.path), "-"]
|
58
|
+
command = command_parts.join(' ')
|
54
59
|
Grim.logger.debug { "Running pdftotext command" }
|
55
60
|
Grim.logger.debug { command }
|
56
61
|
`#{command}`
|
data/lib/grim/version.rb
CHANGED
Binary file
|
data/spec/lib/grim/page_spec.rb
CHANGED
@@ -51,6 +51,14 @@ describe Grim::Page do
|
|
51
51
|
eq("Step 1: get someone to print this curve for you to scale, 72” wide\nStep 2: Get a couple 55 gallon drums\n\n\f")
|
52
52
|
end
|
53
53
|
|
54
|
+
it "should extract tabular data with the -table option" do
|
55
|
+
pdf = Grim::Pdf.new(fixture_path("table.pdf"))
|
56
|
+
expect(pdf[0].text({flags: ["-table"]})).to \
|
57
|
+
include(
|
58
|
+
" Male 979 (85) 968 (85)\n\n" +
|
59
|
+
" Female 169 (15) 169 (15)\n")
|
60
|
+
end
|
61
|
+
|
54
62
|
it "works with full path to pdftotext" do
|
55
63
|
pdftotext_path = `which pdftotext`.chomp
|
56
64
|
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"), pdftotext_path: pdftotext_path)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grim
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Hoyt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-05-08 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Grim is a simple gem for extracting a page from a pdf and converting
|
14
14
|
it to an image as well as extract the text from the page as a string. It basically
|
@@ -36,6 +36,7 @@ files:
|
|
36
36
|
- lib/pdf_info.ps
|
37
37
|
- spec/fixtures/remove_alpha.pdf
|
38
38
|
- spec/fixtures/smoker.pdf
|
39
|
+
- spec/fixtures/table.pdf
|
39
40
|
- spec/fixtures/unprocessable.pdf
|
40
41
|
- spec/lib/grim/image_magick_processor_spec.rb
|
41
42
|
- spec/lib/grim/multi_processor_spec.rb
|
@@ -63,13 +64,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
63
64
|
version: '0'
|
64
65
|
requirements: []
|
65
66
|
rubyforge_project: grim
|
66
|
-
rubygems_version: 2.
|
67
|
+
rubygems_version: 2.6.11
|
67
68
|
signing_key:
|
68
69
|
specification_version: 4
|
69
70
|
summary: Extract slides and text from a PDF.
|
70
71
|
test_files:
|
71
72
|
- spec/fixtures/remove_alpha.pdf
|
72
73
|
- spec/fixtures/smoker.pdf
|
74
|
+
- spec/fixtures/table.pdf
|
73
75
|
- spec/fixtures/unprocessable.pdf
|
74
76
|
- spec/lib/grim/image_magick_processor_spec.rb
|
75
77
|
- spec/lib/grim/multi_processor_spec.rb
|