pdf-reader 1.4.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +6 -2
- data/Rakefile +10 -18
- data/examples/callbacks.rb +2 -1
- data/examples/fuzzy_paragraphs.rb +24 -0
- data/lib/pdf/reader.rb +1 -1
- data/lib/pdf/reader/abstract_strategy.rb +1 -1
- data/lib/pdf/reader/buffer.rb +6 -3
- data/lib/pdf/reader/cmap.rb +2 -2
- data/lib/pdf/reader/encoding.rb +2 -2
- data/lib/pdf/reader/error.rb +3 -3
- data/lib/pdf/reader/font.rb +8 -6
- data/lib/pdf/reader/glyph_hash.rb +5 -5
- data/lib/pdf/reader/object_hash.rb +2 -2
- data/lib/pdf/reader/page.rb +1 -1
- data/lib/pdf/reader/page_state.rb +0 -1
- data/lib/pdf/reader/page_text_receiver.rb +1 -1
- data/lib/pdf/reader/pages_strategy.rb +3 -3
- data/lib/pdf/reader/parser.rb +4 -4
- data/lib/pdf/reader/reference.rb +1 -1
- data/lib/pdf/reader/register_receiver.rb +0 -1
- data/lib/pdf/reader/stream.rb +1 -1
- data/lib/pdf/reader/text_receiver.rb +20 -20
- data/lib/pdf/reader/token.rb +1 -1
- data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
- data/lib/pdf/reader/xref.rb +2 -2
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fb8a5be7c95212f559bb4d26af5fbdb484d21e77
|
4
|
+
data.tar.gz: f8fe70bf868dfff03b47a0b81993d1e680593e84
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b881cecddfa41e3ad15dcafd31d4109290c664d0cf06478f3af6769aa7ced108e3ba082db54c6759c117d7559cc118e0d3a971c17b59cb23bf4e50024089fa6b
|
7
|
+
data.tar.gz: 50d61b135d79840dce5e5ca712b5db5185deefeee5de13d2adc63c1a8e1eb4b383bb0e8bb491c03bea49d11c4edf130b0fdb3b2eafea63ee0b85ca0390e047a0
|
data/CHANGELOG
CHANGED
@@ -1,9 +1,13 @@
|
|
1
|
+
v1.4.1 (2nd January 2017)
|
2
|
+
- improve compatability with ruby 2.4 (thanks Akira Matsuda)
|
3
|
+
- various bug fixes
|
4
|
+
|
1
5
|
v1.4.0 (22nd February 2016)
|
2
6
|
- raise minimum ruby version to 1.9.3
|
3
7
|
- print warnings to stderr when deprecated methods are used. These methods have been
|
4
8
|
deprecated for 4 years, so hopefully few people are depending on them
|
5
|
-
- Fix exception when a
|
6
|
-
built-in
|
9
|
+
- Fix exception when a non-breaking space (character 160) is used with a
|
10
|
+
built-in font (helvetica, etc)
|
7
11
|
- various bug fixes
|
8
12
|
|
9
13
|
v1.3.3 (7th April 2013)
|
data/Rakefile
CHANGED
@@ -4,27 +4,19 @@ require "rdoc/task"
|
|
4
4
|
require "rspec/core/rake_task"
|
5
5
|
require "yaml"
|
6
6
|
|
7
|
-
|
8
|
-
|
7
|
+
desc "Default Task"
|
8
|
+
task :default => [ :quality, :spec ]
|
9
9
|
|
10
|
-
|
11
|
-
|
10
|
+
require 'cane/rake_task'
|
11
|
+
require 'morecane'
|
12
12
|
|
13
|
-
|
14
|
-
|
13
|
+
desc "Run cane to check quality metrics"
|
14
|
+
Cane::RakeTask.new(:quality) do |cane|
|
15
|
+
cane.abc_max = 20
|
16
|
+
cane.style_measure = 100
|
17
|
+
cane.max_violations = 93
|
15
18
|
|
16
|
-
|
17
|
-
Cane::RakeTask.new(:quality) do |cane|
|
18
|
-
cane.abc_max = 20
|
19
|
-
cane.style_measure = 100
|
20
|
-
cane.max_violations = 93
|
21
|
-
|
22
|
-
cane.use Morecane::EncodingCheck, :encoding_glob => "{app,lib,spec}/**/*.rb"
|
23
|
-
end
|
24
|
-
|
25
|
-
else
|
26
|
-
desc "Default Task"
|
27
|
-
task :default => [ :spec ]
|
19
|
+
cane.use Morecane::EncodingCheck, :encoding_glob => "{app,lib,spec}/**/*.rb"
|
28
20
|
end
|
29
21
|
|
30
22
|
desc "Run all rspec files"
|
data/examples/callbacks.rb
CHANGED
@@ -9,12 +9,13 @@
|
|
9
9
|
require 'rubygems'
|
10
10
|
require 'pdf/reader'
|
11
11
|
|
12
|
-
receiver = PDF::Reader::RegisterReceiver.new
|
13
12
|
filename = File.expand_path(File.dirname(__FILE__)) + "/../spec/data/cairo-basic.pdf"
|
14
13
|
|
15
14
|
PDF::Reader.open(filename) do |reader|
|
16
15
|
reader.pages.each do |page|
|
16
|
+
receiver = PDF::Reader::RegisterReceiver.new
|
17
17
|
page.walk(receiver)
|
18
|
+
|
18
19
|
receiver.callbacks.each do |cb|
|
19
20
|
puts cb
|
20
21
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
# Extract an (imperfect) array of paragraphs divided somewhat
|
5
|
+
# arbitrarily on line length.
|
6
|
+
|
7
|
+
require 'pdf/reader'
|
8
|
+
|
9
|
+
reader = PDF::Reader.new('somefile.pdf')
|
10
|
+
|
11
|
+
paragraph = ""
|
12
|
+
paragraphs = []
|
13
|
+
reader.pages.each do |page|
|
14
|
+
lines = page.text.scan(/^.+/)
|
15
|
+
lines.each do |line|
|
16
|
+
if line.length > 55
|
17
|
+
paragraph += " #{line}"
|
18
|
+
else
|
19
|
+
paragraph += " #{line}"
|
20
|
+
paragraphs << paragraph
|
21
|
+
paragraph = ""
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/pdf/reader.rb
CHANGED
@@ -285,7 +285,7 @@ module PDF
|
|
285
285
|
#
|
286
286
|
# Given an IO object that contains PDF data, return the contents of a single object
|
287
287
|
#
|
288
|
-
def object
|
288
|
+
def object(io, id, gen)
|
289
289
|
msg = "PDF::Reader#object is deprecated and will be removed in the 2.0 release"
|
290
290
|
$stderr.puts(msg)
|
291
291
|
@objects = ObjectHash.new(io)
|
@@ -23,7 +23,7 @@ class PDF::Reader
|
|
23
23
|
|
24
24
|
# calls the name callback method on the receiver class with params as the arguments
|
25
25
|
#
|
26
|
-
def callback
|
26
|
+
def callback(name, params=[])
|
27
27
|
@receivers.each do |receiver|
|
28
28
|
receiver.send(name, *params) if receiver.respond_to?(name)
|
29
29
|
end
|
data/lib/pdf/reader/buffer.rb
CHANGED
@@ -45,6 +45,7 @@ class PDF::Reader
|
|
45
45
|
STREAM = "stream"
|
46
46
|
ID = "ID"
|
47
47
|
FWD_SLASH = "/"
|
48
|
+
NULL_BYTE = "\x00"
|
48
49
|
|
49
50
|
attr_reader :pos
|
50
51
|
|
@@ -60,7 +61,7 @@ class PDF::Reader
|
|
60
61
|
# :content_stream - set to true if buffer will be tokenising a
|
61
62
|
# content stream. Defaults to false
|
62
63
|
#
|
63
|
-
def initialize
|
64
|
+
def initialize(io, opts = {})
|
64
65
|
@io = io
|
65
66
|
@tokens = []
|
66
67
|
@in_content_stream = opts[:content_stream]
|
@@ -227,7 +228,7 @@ class PDF::Reader
|
|
227
228
|
|
228
229
|
buffer = []
|
229
230
|
|
230
|
-
until buffer[0] =~ /\s/ && buffer[1, 2] == ["E", "I"]
|
231
|
+
until buffer[0] =~ /\s|\0/ && buffer[1, 2] == ["E", "I"]
|
231
232
|
chr = @io.read(1)
|
232
233
|
buffer << chr
|
233
234
|
|
@@ -236,7 +237,9 @@ class PDF::Reader
|
|
236
237
|
end
|
237
238
|
end
|
238
239
|
|
239
|
-
|
240
|
+
str << NULL_BYTE if buffer.first == NULL_BYTE
|
241
|
+
|
242
|
+
@tokens << string_token(str)
|
240
243
|
@io.seek(-3, IO::SEEK_CUR) unless chr.nil?
|
241
244
|
end
|
242
245
|
|
data/lib/pdf/reader/cmap.rb
CHANGED
@@ -68,11 +68,11 @@ class PDF::Reader
|
|
68
68
|
|
69
69
|
# Convert a glyph code into one or more Codepoints.
|
70
70
|
#
|
71
|
-
# Returns an array of
|
71
|
+
# Returns an array of Integers.
|
72
72
|
#
|
73
73
|
def decode(c)
|
74
74
|
# TODO: implement the conversion
|
75
|
-
return c unless
|
75
|
+
return c unless Integer === c
|
76
76
|
@map[c]
|
77
77
|
end
|
78
78
|
|
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -112,7 +112,7 @@ class PDF::Reader
|
|
112
112
|
# convert an integer glyph code into an Adobe glyph name.
|
113
113
|
#
|
114
114
|
# int_to_name(65)
|
115
|
-
# => :A
|
115
|
+
# => [:A]
|
116
116
|
#
|
117
117
|
def int_to_name(glyph_code)
|
118
118
|
if @enc_name == "Identity-H" || @enc_name == "Identity-V"
|
@@ -210,7 +210,7 @@ class PDF::Reader
|
|
210
210
|
RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
|
211
211
|
File.open(file, mode) do |f|
|
212
212
|
f.each do |l|
|
213
|
-
|
213
|
+
_m, single_byte, unicode = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
214
214
|
@mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
|
215
215
|
end
|
216
216
|
end
|
data/lib/pdf/reader/error.rb
CHANGED
@@ -29,19 +29,19 @@ class PDF::Reader
|
|
29
29
|
# are valid
|
30
30
|
class Error # :nodoc:
|
31
31
|
################################################################################
|
32
|
-
def self.str_assert
|
32
|
+
def self.str_assert(lvalue, rvalue, chars=nil)
|
33
33
|
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
34
34
|
lvalue = lvalue[0,chars] if chars
|
35
35
|
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue != rvalue
|
36
36
|
end
|
37
37
|
################################################################################
|
38
|
-
def self.str_assert_not
|
38
|
+
def self.str_assert_not(lvalue, rvalue, chars=nil)
|
39
39
|
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
40
40
|
lvalue = lvalue[0,chars] if chars
|
41
41
|
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue == rvalue
|
42
42
|
end
|
43
43
|
################################################################################
|
44
|
-
def self.assert_equal
|
44
|
+
def self.assert_equal(lvalue, rvalue)
|
45
45
|
raise MalformedPDFError, "PDF malformed, expected #{rvalue} but found #{lvalue} instead" if lvalue != rvalue
|
46
46
|
end
|
47
47
|
################################################################################
|
data/lib/pdf/reader/font.rb
CHANGED
@@ -161,15 +161,16 @@ class PDF::Reader
|
|
161
161
|
end
|
162
162
|
|
163
163
|
def to_utf8_via_cmap(params)
|
164
|
-
|
164
|
+
case params
|
165
|
+
when Integer
|
165
166
|
[
|
166
167
|
@tounicode.decode(params) || PDF::Reader::Encoding::UNKNOWN_CHAR
|
167
168
|
].flatten.pack("U*")
|
168
|
-
|
169
|
+
when String
|
169
170
|
params.unpack(encoding.unpack).map { |c|
|
170
171
|
@tounicode.decode(c) || PDF::Reader::Encoding::UNKNOWN_CHAR
|
171
172
|
}.flatten.pack("U*")
|
172
|
-
|
173
|
+
when Array
|
173
174
|
params.collect { |param| to_utf8_via_cmap(param) }
|
174
175
|
else
|
175
176
|
params
|
@@ -181,11 +182,12 @@ class PDF::Reader
|
|
181
182
|
raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported"
|
182
183
|
end
|
183
184
|
|
184
|
-
|
185
|
+
case params
|
186
|
+
when Integer
|
185
187
|
encoding.int_to_utf8_string(params)
|
186
|
-
|
188
|
+
when String
|
187
189
|
encoding.to_utf8(params)
|
188
|
-
|
190
|
+
when Array
|
189
191
|
params.collect { |param| to_utf8_via_encoding(param) }
|
190
192
|
else
|
191
193
|
params
|
@@ -81,16 +81,16 @@ class PDF::Reader
|
|
81
81
|
# h = GlyphHash.new
|
82
82
|
#
|
83
83
|
# h.unicode_to_name(65)
|
84
|
-
# => :A
|
84
|
+
# => [:A]
|
85
85
|
#
|
86
86
|
# h.unicode_to_name(8364)
|
87
|
-
# => :Euro
|
87
|
+
# => [:Euro]
|
88
88
|
#
|
89
89
|
# h.unicode_to_name(34)
|
90
|
-
# => :34
|
90
|
+
# => [:34]
|
91
91
|
#
|
92
92
|
def unicode_to_name(codepoint)
|
93
|
-
@by_codepoint[codepoint.to_i]
|
93
|
+
@by_codepoint[codepoint.to_i] || []
|
94
94
|
end
|
95
95
|
|
96
96
|
private
|
@@ -105,7 +105,7 @@ class PDF::Reader
|
|
105
105
|
RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
|
106
106
|
File.open(File.dirname(__FILE__) + "/glyphlist.txt", mode) do |f|
|
107
107
|
f.each do |l|
|
108
|
-
|
108
|
+
_m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
109
109
|
if name && code
|
110
110
|
cp = "0x#{code}".hex
|
111
111
|
keyed_by_name[name.to_sym] = cp
|
@@ -78,7 +78,7 @@ class PDF::Reader
|
|
78
78
|
|
79
79
|
if @cache.has_key?(key)
|
80
80
|
@cache[key]
|
81
|
-
elsif xref[key].is_a?(
|
81
|
+
elsif xref[key].is_a?(Integer)
|
82
82
|
buf = new_buffer(xref[key])
|
83
83
|
@cache[key] = decrypt(key, Parser.new(buf, self).object(key.id, key.gen))
|
84
84
|
elsif xref[key].is_a?(PDF::Reader::Reference)
|
@@ -323,7 +323,7 @@ class PDF::Reader
|
|
323
323
|
|
324
324
|
def read_version
|
325
325
|
@io.seek(0)
|
326
|
-
|
326
|
+
_m, version = *@io.read(10).match(/PDF-(\d.\d)/)
|
327
327
|
@io.seek(0)
|
328
328
|
version.to_f
|
329
329
|
end
|
data/lib/pdf/reader/page.rb
CHANGED
@@ -155,7 +155,7 @@ module PDF
|
|
155
155
|
|
156
156
|
# calls the name callback method on each receiver object with params as the arguments
|
157
157
|
#
|
158
|
-
def callback
|
158
|
+
def callback(receivers, name, params=[])
|
159
159
|
receivers.each do |receiver|
|
160
160
|
receiver.send(name, *params) if receiver.respond_to?(name)
|
161
161
|
end
|
@@ -327,7 +327,6 @@ class PDF::Reader
|
|
327
327
|
glyph_width = ((w0 - (tj/1000.0)) * fs) * th
|
328
328
|
tx = glyph_width + ((tc + tw) * th)
|
329
329
|
end
|
330
|
-
ty = 0
|
331
330
|
|
332
331
|
# TODO: I'm pretty sure that tx shouldn't need to be divided by
|
333
332
|
# ctm[0] here, but this gets my tests green and I'm out of
|
@@ -282,7 +282,7 @@ class PDF::Reader
|
|
282
282
|
################################################################################
|
283
283
|
# Walk over all pages in the PDF file, calling the appropriate callbacks for each page and all
|
284
284
|
# its content
|
285
|
-
def walk_pages
|
285
|
+
def walk_pages(page)
|
286
286
|
|
287
287
|
# extract page content
|
288
288
|
if page[:Type] == :Pages
|
@@ -351,7 +351,7 @@ class PDF::Reader
|
|
351
351
|
# Reads a PDF content stream and calls all the appropriate callback methods for the operators
|
352
352
|
# it contains
|
353
353
|
#
|
354
|
-
def content_stream
|
354
|
+
def content_stream(instructions, fonts = {})
|
355
355
|
instructions = [instructions] unless instructions.kind_of?(Array)
|
356
356
|
instructions = instructions.map { |ins|
|
357
357
|
ins.is_a?(PDF::Reader::Stream) ? ins.unfiltered_data : ins.to_s
|
@@ -399,7 +399,7 @@ class PDF::Reader
|
|
399
399
|
params << token
|
400
400
|
end
|
401
401
|
end
|
402
|
-
rescue EOFError
|
402
|
+
rescue EOFError
|
403
403
|
raise MalformedPDFError, "End Of File while processing a content stream"
|
404
404
|
end
|
405
405
|
################################################################################
|
data/lib/pdf/reader/parser.rb
CHANGED
@@ -60,7 +60,7 @@ class PDF::Reader
|
|
60
60
|
#
|
61
61
|
# buffer - a PDF::Reader::Buffer object that contains PDF data
|
62
62
|
# objects - a PDF::Reader::ObjectHash object that can return objects from the PDF file
|
63
|
-
def initialize
|
63
|
+
def initialize(buffer, objects=nil)
|
64
64
|
@buffer = buffer
|
65
65
|
@objects = objects
|
66
66
|
end
|
@@ -69,7 +69,7 @@ class PDF::Reader
|
|
69
69
|
# object
|
70
70
|
#
|
71
71
|
# operators - a hash of supported operators to read from the underlying buffer.
|
72
|
-
def parse_token
|
72
|
+
def parse_token(operators={})
|
73
73
|
token = @buffer.token
|
74
74
|
|
75
75
|
if STRATEGIES.has_key? token
|
@@ -93,7 +93,7 @@ class PDF::Reader
|
|
93
93
|
#
|
94
94
|
# id - the object ID to return
|
95
95
|
# gen - the object revision number to return
|
96
|
-
def object
|
96
|
+
def object(id, gen)
|
97
97
|
Error.assert_equal(parse_token, id)
|
98
98
|
Error.assert_equal(parse_token, gen)
|
99
99
|
Error.str_assert(parse_token, "obj")
|
@@ -198,7 +198,7 @@ class PDF::Reader
|
|
198
198
|
|
199
199
|
################################################################################
|
200
200
|
# Decodes the contents of a PDF Stream and returns it as a Ruby String.
|
201
|
-
def stream
|
201
|
+
def stream(dict)
|
202
202
|
raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
|
203
203
|
if @objects
|
204
204
|
length = @objects.deref(dict[:Length])
|
data/lib/pdf/reader/reference.rb
CHANGED
@@ -32,7 +32,7 @@ class PDF::Reader
|
|
32
32
|
attr_reader :id, :gen
|
33
33
|
################################################################################
|
34
34
|
# Create a new Reference to an object with the specified id and revision number
|
35
|
-
def initialize
|
35
|
+
def initialize(id, gen)
|
36
36
|
@id, @gen = id, gen
|
37
37
|
end
|
38
38
|
################################################################################
|
data/lib/pdf/reader/stream.rb
CHANGED
@@ -37,7 +37,7 @@ class PDF::Reader
|
|
37
37
|
################################################################################
|
38
38
|
# Creates a new stream with the specified dictionary and data. The dictionary
|
39
39
|
# should be a standard ruby hash, the data should be a standard ruby string.
|
40
|
-
def initialize
|
40
|
+
def initialize(hash, data)
|
41
41
|
@hash = hash
|
42
42
|
@data = data
|
43
43
|
@udata = nil
|
@@ -39,13 +39,13 @@ class PDF::Reader
|
|
39
39
|
class TextReceiver
|
40
40
|
################################################################################
|
41
41
|
# Initialize with the library user's receiver
|
42
|
-
def initialize
|
42
|
+
def initialize(main_receiver)
|
43
43
|
@main_receiver = main_receiver
|
44
44
|
@upper_corners = []
|
45
45
|
end
|
46
46
|
################################################################################
|
47
47
|
# Called when the document parsing begins
|
48
|
-
def begin_document
|
48
|
+
def begin_document(root)
|
49
49
|
@upper_corners = []
|
50
50
|
end
|
51
51
|
################################################################################
|
@@ -54,7 +54,7 @@ class PDF::Reader
|
|
54
54
|
@state.clear
|
55
55
|
end
|
56
56
|
################################################################################
|
57
|
-
def begin_page_container
|
57
|
+
def begin_page_container(page)
|
58
58
|
@upper_corners.push(media_box_check(page))
|
59
59
|
end
|
60
60
|
################################################################################
|
@@ -63,7 +63,7 @@ class PDF::Reader
|
|
63
63
|
end
|
64
64
|
################################################################################
|
65
65
|
# Called when new page parsing begins
|
66
|
-
def begin_page
|
66
|
+
def begin_page(info)
|
67
67
|
@page = info
|
68
68
|
|
69
69
|
@state = [{
|
@@ -101,29 +101,29 @@ class PDF::Reader
|
|
101
101
|
end
|
102
102
|
################################################################################
|
103
103
|
# PDF operator Tm
|
104
|
-
def set_text_matrix_and_text_line_matrix
|
104
|
+
def set_text_matrix_and_text_line_matrix(*args)
|
105
105
|
# these variable names look bad, but they're from the PDF spec
|
106
|
-
|
106
|
+
_a, _b, _c, _d, _e, f = *args
|
107
107
|
calculate_line_and_location(f)
|
108
108
|
end
|
109
109
|
################################################################################
|
110
110
|
# PDF operator Tc
|
111
|
-
def set_character_spacing
|
111
|
+
def set_character_spacing(n)
|
112
112
|
@state.last[:char_spacing] = n
|
113
113
|
end
|
114
114
|
################################################################################
|
115
115
|
# PDF operator Tw
|
116
|
-
def set_word_spacing
|
116
|
+
def set_word_spacing(n)
|
117
117
|
@state.last[:word_spacing] = n
|
118
118
|
end
|
119
119
|
################################################################################
|
120
120
|
# PDF operator Tz
|
121
|
-
def set_horizontal_text_scaling
|
121
|
+
def set_horizontal_text_scaling(n)
|
122
122
|
@state.last[:hori_scaling] = n/100
|
123
123
|
end
|
124
124
|
################################################################################
|
125
125
|
# PDF operator TL
|
126
|
-
def set_text_leading
|
126
|
+
def set_text_leading(n)
|
127
127
|
@state.last[:leading] = n
|
128
128
|
end
|
129
129
|
################################################################################
|
@@ -133,19 +133,19 @@ class PDF::Reader
|
|
133
133
|
end
|
134
134
|
################################################################################
|
135
135
|
# PDF operator Td
|
136
|
-
def move_text_position
|
136
|
+
def move_text_position(tx, ty)
|
137
137
|
#puts "#{tx} #{ty} Td"
|
138
138
|
calculate_line_and_location(@location + ty)
|
139
139
|
end
|
140
140
|
################################################################################
|
141
141
|
# PDF operator TD
|
142
|
-
def move_text_position_and_set_leading
|
142
|
+
def move_text_position_and_set_leading(tx, ty)
|
143
143
|
set_text_leading(ty)# * -1)
|
144
144
|
move_text_position(tx, ty)
|
145
145
|
end
|
146
146
|
################################################################################
|
147
147
|
# PDF operator Tj
|
148
|
-
def show_text
|
148
|
+
def show_text(string)
|
149
149
|
#puts "getting line #@line"
|
150
150
|
|
151
151
|
place = (@output[@line] ||= "")
|
@@ -157,7 +157,7 @@ class PDF::Reader
|
|
157
157
|
#puts "place is now: #{place}"
|
158
158
|
@written_to = true
|
159
159
|
end
|
160
|
-
def super_show_text
|
160
|
+
def super_show_text(string)
|
161
161
|
urx = @upper_corners.last[:urx]/TS_UNITS_PER_H_CHAR
|
162
162
|
ury = @upper_corners.last[:ury]/TS_UNITS_PER_V_CHAR
|
163
163
|
|
@@ -193,12 +193,12 @@ class PDF::Reader
|
|
193
193
|
end
|
194
194
|
################################################################################
|
195
195
|
# PDF operator TJ
|
196
|
-
def show_text_with_positioning
|
196
|
+
def show_text_with_positioning(params)
|
197
197
|
prev_adjustment = @state.last[:tj_adjustment]
|
198
198
|
|
199
199
|
params.each do |p|
|
200
200
|
case p
|
201
|
-
when Float,
|
201
|
+
when Float, Integer
|
202
202
|
@state.last[:tj_adjustment] = p
|
203
203
|
else
|
204
204
|
show_text(p)
|
@@ -209,19 +209,19 @@ class PDF::Reader
|
|
209
209
|
end
|
210
210
|
################################################################################
|
211
211
|
# PDF operator '
|
212
|
-
def move_to_next_line_and_show_text
|
212
|
+
def move_to_next_line_and_show_text(string)
|
213
213
|
move_to_start_of_next_line
|
214
214
|
show_text(string)
|
215
215
|
end
|
216
216
|
################################################################################
|
217
217
|
# PDF operator "
|
218
|
-
def set_spacing_next_line_show_text
|
218
|
+
def set_spacing_next_line_show_text(aw, ac, string)
|
219
219
|
set_word_spacing(aw)
|
220
220
|
set_character_spacing(ac)
|
221
221
|
move_to_next_line_and_show_text(string)
|
222
222
|
end
|
223
223
|
################################################################################
|
224
|
-
def media_box_check
|
224
|
+
def media_box_check(dict)
|
225
225
|
corners = (@upper_corners.last || {:urx => 0, :ury => 0}).dup
|
226
226
|
|
227
227
|
if dict.has_key?(:MediaBox)
|
@@ -233,7 +233,7 @@ class PDF::Reader
|
|
233
233
|
corners
|
234
234
|
end
|
235
235
|
################################################################################
|
236
|
-
def calculate_line_and_location
|
236
|
+
def calculate_line_and_location(new_loc)
|
237
237
|
##puts "calculate_line_and_location(#{new_loc})"
|
238
238
|
key = new_loc; key.freeze
|
239
239
|
|
data/lib/pdf/reader/token.rb
CHANGED
@@ -33,7 +33,7 @@ class PDF::Reader
|
|
33
33
|
class Token < String # :nodoc:
|
34
34
|
################################################################################
|
35
35
|
# Creates a new token with the specified value
|
36
|
-
def initialize
|
36
|
+
def initialize(val)
|
37
37
|
super
|
38
38
|
end
|
39
39
|
################################################################################
|
data/lib/pdf/reader/xref.rb
CHANGED
@@ -53,7 +53,7 @@ class PDF::Reader
|
|
53
53
|
#
|
54
54
|
# io - must be an IO object, generally either a file or a StringIO
|
55
55
|
#
|
56
|
-
def initialize
|
56
|
+
def initialize(io)
|
57
57
|
@io = io
|
58
58
|
@junk_offset = calc_junk_offset(io) || 0
|
59
59
|
@xref = {}
|
@@ -219,7 +219,7 @@ class PDF::Reader
|
|
219
219
|
################################################################################
|
220
220
|
# Stores an offset value for a particular PDF object ID and revision number
|
221
221
|
#
|
222
|
-
def store
|
222
|
+
def store(id, gen, offset)
|
223
223
|
(@xref[id] ||= {})[gen] ||= offset
|
224
224
|
end
|
225
225
|
################################################################################
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-01-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -58,14 +58,14 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
61
|
+
version: '3.0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
68
|
+
version: '3.0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: morecane
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -207,6 +207,7 @@ files:
|
|
207
207
|
- examples/extract_bates.rb
|
208
208
|
- examples/extract_fonts.rb
|
209
209
|
- examples/extract_images.rb
|
210
|
+
- examples/fuzzy_paragraphs.rb
|
210
211
|
- examples/hash.rb
|
211
212
|
- examples/metadata.rb
|
212
213
|
- examples/page_count.rb
|
@@ -325,7 +326,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
325
326
|
version: '0'
|
326
327
|
requirements: []
|
327
328
|
rubyforge_project:
|
328
|
-
rubygems_version: 2.5.
|
329
|
+
rubygems_version: 2.5.2
|
329
330
|
signing_key:
|
330
331
|
specification_version: 4
|
331
332
|
summary: A library for accessing the content of PDF files
|