pdf-reader 1.4.0 → 1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +6 -2
- data/Rakefile +10 -18
- data/examples/callbacks.rb +2 -1
- data/examples/fuzzy_paragraphs.rb +24 -0
- data/lib/pdf/reader.rb +1 -1
- data/lib/pdf/reader/abstract_strategy.rb +1 -1
- data/lib/pdf/reader/buffer.rb +6 -3
- data/lib/pdf/reader/cmap.rb +2 -2
- data/lib/pdf/reader/encoding.rb +2 -2
- data/lib/pdf/reader/error.rb +3 -3
- data/lib/pdf/reader/font.rb +8 -6
- data/lib/pdf/reader/glyph_hash.rb +5 -5
- data/lib/pdf/reader/object_hash.rb +2 -2
- data/lib/pdf/reader/page.rb +1 -1
- data/lib/pdf/reader/page_state.rb +0 -1
- data/lib/pdf/reader/page_text_receiver.rb +1 -1
- data/lib/pdf/reader/pages_strategy.rb +3 -3
- data/lib/pdf/reader/parser.rb +4 -4
- data/lib/pdf/reader/reference.rb +1 -1
- data/lib/pdf/reader/register_receiver.rb +0 -1
- data/lib/pdf/reader/stream.rb +1 -1
- data/lib/pdf/reader/text_receiver.rb +20 -20
- data/lib/pdf/reader/token.rb +1 -1
- data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
- data/lib/pdf/reader/xref.rb +2 -2
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fb8a5be7c95212f559bb4d26af5fbdb484d21e77
|
4
|
+
data.tar.gz: f8fe70bf868dfff03b47a0b81993d1e680593e84
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b881cecddfa41e3ad15dcafd31d4109290c664d0cf06478f3af6769aa7ced108e3ba082db54c6759c117d7559cc118e0d3a971c17b59cb23bf4e50024089fa6b
|
7
|
+
data.tar.gz: 50d61b135d79840dce5e5ca712b5db5185deefeee5de13d2adc63c1a8e1eb4b383bb0e8bb491c03bea49d11c4edf130b0fdb3b2eafea63ee0b85ca0390e047a0
|
data/CHANGELOG
CHANGED
@@ -1,9 +1,13 @@
|
|
1
|
+
v1.4.1 (2nd January 2017)
|
2
|
+
- improve compatability with ruby 2.4 (thanks Akira Matsuda)
|
3
|
+
- various bug fixes
|
4
|
+
|
1
5
|
v1.4.0 (22nd February 2016)
|
2
6
|
- raise minimum ruby version to 1.9.3
|
3
7
|
- print warnings to stderr when deprecated methods are used. These methods have been
|
4
8
|
deprecated for 4 years, so hopefully few people are depending on them
|
5
|
-
- Fix exception when a
|
6
|
-
built-in
|
9
|
+
- Fix exception when a non-breaking space (character 160) is used with a
|
10
|
+
built-in font (helvetica, etc)
|
7
11
|
- various bug fixes
|
8
12
|
|
9
13
|
v1.3.3 (7th April 2013)
|
data/Rakefile
CHANGED
@@ -4,27 +4,19 @@ require "rdoc/task"
|
|
4
4
|
require "rspec/core/rake_task"
|
5
5
|
require "yaml"
|
6
6
|
|
7
|
-
|
8
|
-
|
7
|
+
desc "Default Task"
|
8
|
+
task :default => [ :quality, :spec ]
|
9
9
|
|
10
|
-
|
11
|
-
|
10
|
+
require 'cane/rake_task'
|
11
|
+
require 'morecane'
|
12
12
|
|
13
|
-
|
14
|
-
|
13
|
+
desc "Run cane to check quality metrics"
|
14
|
+
Cane::RakeTask.new(:quality) do |cane|
|
15
|
+
cane.abc_max = 20
|
16
|
+
cane.style_measure = 100
|
17
|
+
cane.max_violations = 93
|
15
18
|
|
16
|
-
|
17
|
-
Cane::RakeTask.new(:quality) do |cane|
|
18
|
-
cane.abc_max = 20
|
19
|
-
cane.style_measure = 100
|
20
|
-
cane.max_violations = 93
|
21
|
-
|
22
|
-
cane.use Morecane::EncodingCheck, :encoding_glob => "{app,lib,spec}/**/*.rb"
|
23
|
-
end
|
24
|
-
|
25
|
-
else
|
26
|
-
desc "Default Task"
|
27
|
-
task :default => [ :spec ]
|
19
|
+
cane.use Morecane::EncodingCheck, :encoding_glob => "{app,lib,spec}/**/*.rb"
|
28
20
|
end
|
29
21
|
|
30
22
|
desc "Run all rspec files"
|
data/examples/callbacks.rb
CHANGED
@@ -9,12 +9,13 @@
|
|
9
9
|
require 'rubygems'
|
10
10
|
require 'pdf/reader'
|
11
11
|
|
12
|
-
receiver = PDF::Reader::RegisterReceiver.new
|
13
12
|
filename = File.expand_path(File.dirname(__FILE__)) + "/../spec/data/cairo-basic.pdf"
|
14
13
|
|
15
14
|
PDF::Reader.open(filename) do |reader|
|
16
15
|
reader.pages.each do |page|
|
16
|
+
receiver = PDF::Reader::RegisterReceiver.new
|
17
17
|
page.walk(receiver)
|
18
|
+
|
18
19
|
receiver.callbacks.each do |cb|
|
19
20
|
puts cb
|
20
21
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
# Extract an (imperfect) array of paragraphs divided somewhat
|
5
|
+
# arbitrarily on line length.
|
6
|
+
|
7
|
+
require 'pdf/reader'
|
8
|
+
|
9
|
+
reader = PDF::Reader.new('somefile.pdf')
|
10
|
+
|
11
|
+
paragraph = ""
|
12
|
+
paragraphs = []
|
13
|
+
reader.pages.each do |page|
|
14
|
+
lines = page.text.scan(/^.+/)
|
15
|
+
lines.each do |line|
|
16
|
+
if line.length > 55
|
17
|
+
paragraph += " #{line}"
|
18
|
+
else
|
19
|
+
paragraph += " #{line}"
|
20
|
+
paragraphs << paragraph
|
21
|
+
paragraph = ""
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/pdf/reader.rb
CHANGED
@@ -285,7 +285,7 @@ module PDF
|
|
285
285
|
#
|
286
286
|
# Given an IO object that contains PDF data, return the contents of a single object
|
287
287
|
#
|
288
|
-
def object
|
288
|
+
def object(io, id, gen)
|
289
289
|
msg = "PDF::Reader#object is deprecated and will be removed in the 2.0 release"
|
290
290
|
$stderr.puts(msg)
|
291
291
|
@objects = ObjectHash.new(io)
|
@@ -23,7 +23,7 @@ class PDF::Reader
|
|
23
23
|
|
24
24
|
# calls the name callback method on the receiver class with params as the arguments
|
25
25
|
#
|
26
|
-
def callback
|
26
|
+
def callback(name, params=[])
|
27
27
|
@receivers.each do |receiver|
|
28
28
|
receiver.send(name, *params) if receiver.respond_to?(name)
|
29
29
|
end
|
data/lib/pdf/reader/buffer.rb
CHANGED
@@ -45,6 +45,7 @@ class PDF::Reader
|
|
45
45
|
STREAM = "stream"
|
46
46
|
ID = "ID"
|
47
47
|
FWD_SLASH = "/"
|
48
|
+
NULL_BYTE = "\x00"
|
48
49
|
|
49
50
|
attr_reader :pos
|
50
51
|
|
@@ -60,7 +61,7 @@ class PDF::Reader
|
|
60
61
|
# :content_stream - set to true if buffer will be tokenising a
|
61
62
|
# content stream. Defaults to false
|
62
63
|
#
|
63
|
-
def initialize
|
64
|
+
def initialize(io, opts = {})
|
64
65
|
@io = io
|
65
66
|
@tokens = []
|
66
67
|
@in_content_stream = opts[:content_stream]
|
@@ -227,7 +228,7 @@ class PDF::Reader
|
|
227
228
|
|
228
229
|
buffer = []
|
229
230
|
|
230
|
-
until buffer[0] =~ /\s/ && buffer[1, 2] == ["E", "I"]
|
231
|
+
until buffer[0] =~ /\s|\0/ && buffer[1, 2] == ["E", "I"]
|
231
232
|
chr = @io.read(1)
|
232
233
|
buffer << chr
|
233
234
|
|
@@ -236,7 +237,9 @@ class PDF::Reader
|
|
236
237
|
end
|
237
238
|
end
|
238
239
|
|
239
|
-
|
240
|
+
str << NULL_BYTE if buffer.first == NULL_BYTE
|
241
|
+
|
242
|
+
@tokens << string_token(str)
|
240
243
|
@io.seek(-3, IO::SEEK_CUR) unless chr.nil?
|
241
244
|
end
|
242
245
|
|
data/lib/pdf/reader/cmap.rb
CHANGED
@@ -68,11 +68,11 @@ class PDF::Reader
|
|
68
68
|
|
69
69
|
# Convert a glyph code into one or more Codepoints.
|
70
70
|
#
|
71
|
-
# Returns an array of
|
71
|
+
# Returns an array of Integers.
|
72
72
|
#
|
73
73
|
def decode(c)
|
74
74
|
# TODO: implement the conversion
|
75
|
-
return c unless
|
75
|
+
return c unless Integer === c
|
76
76
|
@map[c]
|
77
77
|
end
|
78
78
|
|
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -112,7 +112,7 @@ class PDF::Reader
|
|
112
112
|
# convert an integer glyph code into an Adobe glyph name.
|
113
113
|
#
|
114
114
|
# int_to_name(65)
|
115
|
-
# => :A
|
115
|
+
# => [:A]
|
116
116
|
#
|
117
117
|
def int_to_name(glyph_code)
|
118
118
|
if @enc_name == "Identity-H" || @enc_name == "Identity-V"
|
@@ -210,7 +210,7 @@ class PDF::Reader
|
|
210
210
|
RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
|
211
211
|
File.open(file, mode) do |f|
|
212
212
|
f.each do |l|
|
213
|
-
|
213
|
+
_m, single_byte, unicode = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
214
214
|
@mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
|
215
215
|
end
|
216
216
|
end
|
data/lib/pdf/reader/error.rb
CHANGED
@@ -29,19 +29,19 @@ class PDF::Reader
|
|
29
29
|
# are valid
|
30
30
|
class Error # :nodoc:
|
31
31
|
################################################################################
|
32
|
-
def self.str_assert
|
32
|
+
def self.str_assert(lvalue, rvalue, chars=nil)
|
33
33
|
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
34
34
|
lvalue = lvalue[0,chars] if chars
|
35
35
|
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue != rvalue
|
36
36
|
end
|
37
37
|
################################################################################
|
38
|
-
def self.str_assert_not
|
38
|
+
def self.str_assert_not(lvalue, rvalue, chars=nil)
|
39
39
|
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
40
40
|
lvalue = lvalue[0,chars] if chars
|
41
41
|
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue == rvalue
|
42
42
|
end
|
43
43
|
################################################################################
|
44
|
-
def self.assert_equal
|
44
|
+
def self.assert_equal(lvalue, rvalue)
|
45
45
|
raise MalformedPDFError, "PDF malformed, expected #{rvalue} but found #{lvalue} instead" if lvalue != rvalue
|
46
46
|
end
|
47
47
|
################################################################################
|
data/lib/pdf/reader/font.rb
CHANGED
@@ -161,15 +161,16 @@ class PDF::Reader
|
|
161
161
|
end
|
162
162
|
|
163
163
|
def to_utf8_via_cmap(params)
|
164
|
-
|
164
|
+
case params
|
165
|
+
when Integer
|
165
166
|
[
|
166
167
|
@tounicode.decode(params) || PDF::Reader::Encoding::UNKNOWN_CHAR
|
167
168
|
].flatten.pack("U*")
|
168
|
-
|
169
|
+
when String
|
169
170
|
params.unpack(encoding.unpack).map { |c|
|
170
171
|
@tounicode.decode(c) || PDF::Reader::Encoding::UNKNOWN_CHAR
|
171
172
|
}.flatten.pack("U*")
|
172
|
-
|
173
|
+
when Array
|
173
174
|
params.collect { |param| to_utf8_via_cmap(param) }
|
174
175
|
else
|
175
176
|
params
|
@@ -181,11 +182,12 @@ class PDF::Reader
|
|
181
182
|
raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported"
|
182
183
|
end
|
183
184
|
|
184
|
-
|
185
|
+
case params
|
186
|
+
when Integer
|
185
187
|
encoding.int_to_utf8_string(params)
|
186
|
-
|
188
|
+
when String
|
187
189
|
encoding.to_utf8(params)
|
188
|
-
|
190
|
+
when Array
|
189
191
|
params.collect { |param| to_utf8_via_encoding(param) }
|
190
192
|
else
|
191
193
|
params
|
@@ -81,16 +81,16 @@ class PDF::Reader
|
|
81
81
|
# h = GlyphHash.new
|
82
82
|
#
|
83
83
|
# h.unicode_to_name(65)
|
84
|
-
# => :A
|
84
|
+
# => [:A]
|
85
85
|
#
|
86
86
|
# h.unicode_to_name(8364)
|
87
|
-
# => :Euro
|
87
|
+
# => [:Euro]
|
88
88
|
#
|
89
89
|
# h.unicode_to_name(34)
|
90
|
-
# => :34
|
90
|
+
# => [:34]
|
91
91
|
#
|
92
92
|
def unicode_to_name(codepoint)
|
93
|
-
@by_codepoint[codepoint.to_i]
|
93
|
+
@by_codepoint[codepoint.to_i] || []
|
94
94
|
end
|
95
95
|
|
96
96
|
private
|
@@ -105,7 +105,7 @@ class PDF::Reader
|
|
105
105
|
RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
|
106
106
|
File.open(File.dirname(__FILE__) + "/glyphlist.txt", mode) do |f|
|
107
107
|
f.each do |l|
|
108
|
-
|
108
|
+
_m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
109
109
|
if name && code
|
110
110
|
cp = "0x#{code}".hex
|
111
111
|
keyed_by_name[name.to_sym] = cp
|
@@ -78,7 +78,7 @@ class PDF::Reader
|
|
78
78
|
|
79
79
|
if @cache.has_key?(key)
|
80
80
|
@cache[key]
|
81
|
-
elsif xref[key].is_a?(
|
81
|
+
elsif xref[key].is_a?(Integer)
|
82
82
|
buf = new_buffer(xref[key])
|
83
83
|
@cache[key] = decrypt(key, Parser.new(buf, self).object(key.id, key.gen))
|
84
84
|
elsif xref[key].is_a?(PDF::Reader::Reference)
|
@@ -323,7 +323,7 @@ class PDF::Reader
|
|
323
323
|
|
324
324
|
def read_version
|
325
325
|
@io.seek(0)
|
326
|
-
|
326
|
+
_m, version = *@io.read(10).match(/PDF-(\d.\d)/)
|
327
327
|
@io.seek(0)
|
328
328
|
version.to_f
|
329
329
|
end
|
data/lib/pdf/reader/page.rb
CHANGED
@@ -155,7 +155,7 @@ module PDF
|
|
155
155
|
|
156
156
|
# calls the name callback method on each receiver object with params as the arguments
|
157
157
|
#
|
158
|
-
def callback
|
158
|
+
def callback(receivers, name, params=[])
|
159
159
|
receivers.each do |receiver|
|
160
160
|
receiver.send(name, *params) if receiver.respond_to?(name)
|
161
161
|
end
|
@@ -327,7 +327,6 @@ class PDF::Reader
|
|
327
327
|
glyph_width = ((w0 - (tj/1000.0)) * fs) * th
|
328
328
|
tx = glyph_width + ((tc + tw) * th)
|
329
329
|
end
|
330
|
-
ty = 0
|
331
330
|
|
332
331
|
# TODO: I'm pretty sure that tx shouldn't need to be divided by
|
333
332
|
# ctm[0] here, but this gets my tests green and I'm out of
|
@@ -282,7 +282,7 @@ class PDF::Reader
|
|
282
282
|
################################################################################
|
283
283
|
# Walk over all pages in the PDF file, calling the appropriate callbacks for each page and all
|
284
284
|
# its content
|
285
|
-
def walk_pages
|
285
|
+
def walk_pages(page)
|
286
286
|
|
287
287
|
# extract page content
|
288
288
|
if page[:Type] == :Pages
|
@@ -351,7 +351,7 @@ class PDF::Reader
|
|
351
351
|
# Reads a PDF content stream and calls all the appropriate callback methods for the operators
|
352
352
|
# it contains
|
353
353
|
#
|
354
|
-
def content_stream
|
354
|
+
def content_stream(instructions, fonts = {})
|
355
355
|
instructions = [instructions] unless instructions.kind_of?(Array)
|
356
356
|
instructions = instructions.map { |ins|
|
357
357
|
ins.is_a?(PDF::Reader::Stream) ? ins.unfiltered_data : ins.to_s
|
@@ -399,7 +399,7 @@ class PDF::Reader
|
|
399
399
|
params << token
|
400
400
|
end
|
401
401
|
end
|
402
|
-
rescue EOFError
|
402
|
+
rescue EOFError
|
403
403
|
raise MalformedPDFError, "End Of File while processing a content stream"
|
404
404
|
end
|
405
405
|
################################################################################
|
data/lib/pdf/reader/parser.rb
CHANGED
@@ -60,7 +60,7 @@ class PDF::Reader
|
|
60
60
|
#
|
61
61
|
# buffer - a PDF::Reader::Buffer object that contains PDF data
|
62
62
|
# objects - a PDF::Reader::ObjectHash object that can return objects from the PDF file
|
63
|
-
def initialize
|
63
|
+
def initialize(buffer, objects=nil)
|
64
64
|
@buffer = buffer
|
65
65
|
@objects = objects
|
66
66
|
end
|
@@ -69,7 +69,7 @@ class PDF::Reader
|
|
69
69
|
# object
|
70
70
|
#
|
71
71
|
# operators - a hash of supported operators to read from the underlying buffer.
|
72
|
-
def parse_token
|
72
|
+
def parse_token(operators={})
|
73
73
|
token = @buffer.token
|
74
74
|
|
75
75
|
if STRATEGIES.has_key? token
|
@@ -93,7 +93,7 @@ class PDF::Reader
|
|
93
93
|
#
|
94
94
|
# id - the object ID to return
|
95
95
|
# gen - the object revision number to return
|
96
|
-
def object
|
96
|
+
def object(id, gen)
|
97
97
|
Error.assert_equal(parse_token, id)
|
98
98
|
Error.assert_equal(parse_token, gen)
|
99
99
|
Error.str_assert(parse_token, "obj")
|
@@ -198,7 +198,7 @@ class PDF::Reader
|
|
198
198
|
|
199
199
|
################################################################################
|
200
200
|
# Decodes the contents of a PDF Stream and returns it as a Ruby String.
|
201
|
-
def stream
|
201
|
+
def stream(dict)
|
202
202
|
raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
|
203
203
|
if @objects
|
204
204
|
length = @objects.deref(dict[:Length])
|
data/lib/pdf/reader/reference.rb
CHANGED
@@ -32,7 +32,7 @@ class PDF::Reader
|
|
32
32
|
attr_reader :id, :gen
|
33
33
|
################################################################################
|
34
34
|
# Create a new Reference to an object with the specified id and revision number
|
35
|
-
def initialize
|
35
|
+
def initialize(id, gen)
|
36
36
|
@id, @gen = id, gen
|
37
37
|
end
|
38
38
|
################################################################################
|
data/lib/pdf/reader/stream.rb
CHANGED
@@ -37,7 +37,7 @@ class PDF::Reader
|
|
37
37
|
################################################################################
|
38
38
|
# Creates a new stream with the specified dictionary and data. The dictionary
|
39
39
|
# should be a standard ruby hash, the data should be a standard ruby string.
|
40
|
-
def initialize
|
40
|
+
def initialize(hash, data)
|
41
41
|
@hash = hash
|
42
42
|
@data = data
|
43
43
|
@udata = nil
|
@@ -39,13 +39,13 @@ class PDF::Reader
|
|
39
39
|
class TextReceiver
|
40
40
|
################################################################################
|
41
41
|
# Initialize with the library user's receiver
|
42
|
-
def initialize
|
42
|
+
def initialize(main_receiver)
|
43
43
|
@main_receiver = main_receiver
|
44
44
|
@upper_corners = []
|
45
45
|
end
|
46
46
|
################################################################################
|
47
47
|
# Called when the document parsing begins
|
48
|
-
def begin_document
|
48
|
+
def begin_document(root)
|
49
49
|
@upper_corners = []
|
50
50
|
end
|
51
51
|
################################################################################
|
@@ -54,7 +54,7 @@ class PDF::Reader
|
|
54
54
|
@state.clear
|
55
55
|
end
|
56
56
|
################################################################################
|
57
|
-
def begin_page_container
|
57
|
+
def begin_page_container(page)
|
58
58
|
@upper_corners.push(media_box_check(page))
|
59
59
|
end
|
60
60
|
################################################################################
|
@@ -63,7 +63,7 @@ class PDF::Reader
|
|
63
63
|
end
|
64
64
|
################################################################################
|
65
65
|
# Called when new page parsing begins
|
66
|
-
def begin_page
|
66
|
+
def begin_page(info)
|
67
67
|
@page = info
|
68
68
|
|
69
69
|
@state = [{
|
@@ -101,29 +101,29 @@ class PDF::Reader
|
|
101
101
|
end
|
102
102
|
################################################################################
|
103
103
|
# PDF operator Tm
|
104
|
-
def set_text_matrix_and_text_line_matrix
|
104
|
+
def set_text_matrix_and_text_line_matrix(*args)
|
105
105
|
# these variable names look bad, but they're from the PDF spec
|
106
|
-
|
106
|
+
_a, _b, _c, _d, _e, f = *args
|
107
107
|
calculate_line_and_location(f)
|
108
108
|
end
|
109
109
|
################################################################################
|
110
110
|
# PDF operator Tc
|
111
|
-
def set_character_spacing
|
111
|
+
def set_character_spacing(n)
|
112
112
|
@state.last[:char_spacing] = n
|
113
113
|
end
|
114
114
|
################################################################################
|
115
115
|
# PDF operator Tw
|
116
|
-
def set_word_spacing
|
116
|
+
def set_word_spacing(n)
|
117
117
|
@state.last[:word_spacing] = n
|
118
118
|
end
|
119
119
|
################################################################################
|
120
120
|
# PDF operator Tz
|
121
|
-
def set_horizontal_text_scaling
|
121
|
+
def set_horizontal_text_scaling(n)
|
122
122
|
@state.last[:hori_scaling] = n/100
|
123
123
|
end
|
124
124
|
################################################################################
|
125
125
|
# PDF operator TL
|
126
|
-
def set_text_leading
|
126
|
+
def set_text_leading(n)
|
127
127
|
@state.last[:leading] = n
|
128
128
|
end
|
129
129
|
################################################################################
|
@@ -133,19 +133,19 @@ class PDF::Reader
|
|
133
133
|
end
|
134
134
|
################################################################################
|
135
135
|
# PDF operator Td
|
136
|
-
def move_text_position
|
136
|
+
def move_text_position(tx, ty)
|
137
137
|
#puts "#{tx} #{ty} Td"
|
138
138
|
calculate_line_and_location(@location + ty)
|
139
139
|
end
|
140
140
|
################################################################################
|
141
141
|
# PDF operator TD
|
142
|
-
def move_text_position_and_set_leading
|
142
|
+
def move_text_position_and_set_leading(tx, ty)
|
143
143
|
set_text_leading(ty)# * -1)
|
144
144
|
move_text_position(tx, ty)
|
145
145
|
end
|
146
146
|
################################################################################
|
147
147
|
# PDF operator Tj
|
148
|
-
def show_text
|
148
|
+
def show_text(string)
|
149
149
|
#puts "getting line #@line"
|
150
150
|
|
151
151
|
place = (@output[@line] ||= "")
|
@@ -157,7 +157,7 @@ class PDF::Reader
|
|
157
157
|
#puts "place is now: #{place}"
|
158
158
|
@written_to = true
|
159
159
|
end
|
160
|
-
def super_show_text
|
160
|
+
def super_show_text(string)
|
161
161
|
urx = @upper_corners.last[:urx]/TS_UNITS_PER_H_CHAR
|
162
162
|
ury = @upper_corners.last[:ury]/TS_UNITS_PER_V_CHAR
|
163
163
|
|
@@ -193,12 +193,12 @@ class PDF::Reader
|
|
193
193
|
end
|
194
194
|
################################################################################
|
195
195
|
# PDF operator TJ
|
196
|
-
def show_text_with_positioning
|
196
|
+
def show_text_with_positioning(params)
|
197
197
|
prev_adjustment = @state.last[:tj_adjustment]
|
198
198
|
|
199
199
|
params.each do |p|
|
200
200
|
case p
|
201
|
-
when Float,
|
201
|
+
when Float, Integer
|
202
202
|
@state.last[:tj_adjustment] = p
|
203
203
|
else
|
204
204
|
show_text(p)
|
@@ -209,19 +209,19 @@ class PDF::Reader
|
|
209
209
|
end
|
210
210
|
################################################################################
|
211
211
|
# PDF operator '
|
212
|
-
def move_to_next_line_and_show_text
|
212
|
+
def move_to_next_line_and_show_text(string)
|
213
213
|
move_to_start_of_next_line
|
214
214
|
show_text(string)
|
215
215
|
end
|
216
216
|
################################################################################
|
217
217
|
# PDF operator "
|
218
|
-
def set_spacing_next_line_show_text
|
218
|
+
def set_spacing_next_line_show_text(aw, ac, string)
|
219
219
|
set_word_spacing(aw)
|
220
220
|
set_character_spacing(ac)
|
221
221
|
move_to_next_line_and_show_text(string)
|
222
222
|
end
|
223
223
|
################################################################################
|
224
|
-
def media_box_check
|
224
|
+
def media_box_check(dict)
|
225
225
|
corners = (@upper_corners.last || {:urx => 0, :ury => 0}).dup
|
226
226
|
|
227
227
|
if dict.has_key?(:MediaBox)
|
@@ -233,7 +233,7 @@ class PDF::Reader
|
|
233
233
|
corners
|
234
234
|
end
|
235
235
|
################################################################################
|
236
|
-
def calculate_line_and_location
|
236
|
+
def calculate_line_and_location(new_loc)
|
237
237
|
##puts "calculate_line_and_location(#{new_loc})"
|
238
238
|
key = new_loc; key.freeze
|
239
239
|
|
data/lib/pdf/reader/token.rb
CHANGED
@@ -33,7 +33,7 @@ class PDF::Reader
|
|
33
33
|
class Token < String # :nodoc:
|
34
34
|
################################################################################
|
35
35
|
# Creates a new token with the specified value
|
36
|
-
def initialize
|
36
|
+
def initialize(val)
|
37
37
|
super
|
38
38
|
end
|
39
39
|
################################################################################
|
data/lib/pdf/reader/xref.rb
CHANGED
@@ -53,7 +53,7 @@ class PDF::Reader
|
|
53
53
|
#
|
54
54
|
# io - must be an IO object, generally either a file or a StringIO
|
55
55
|
#
|
56
|
-
def initialize
|
56
|
+
def initialize(io)
|
57
57
|
@io = io
|
58
58
|
@junk_offset = calc_junk_offset(io) || 0
|
59
59
|
@xref = {}
|
@@ -219,7 +219,7 @@ class PDF::Reader
|
|
219
219
|
################################################################################
|
220
220
|
# Stores an offset value for a particular PDF object ID and revision number
|
221
221
|
#
|
222
|
-
def store
|
222
|
+
def store(id, gen, offset)
|
223
223
|
(@xref[id] ||= {})[gen] ||= offset
|
224
224
|
end
|
225
225
|
################################################################################
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-01-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -58,14 +58,14 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
61
|
+
version: '3.0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
68
|
+
version: '3.0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: morecane
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -207,6 +207,7 @@ files:
|
|
207
207
|
- examples/extract_bates.rb
|
208
208
|
- examples/extract_fonts.rb
|
209
209
|
- examples/extract_images.rb
|
210
|
+
- examples/fuzzy_paragraphs.rb
|
210
211
|
- examples/hash.rb
|
211
212
|
- examples/metadata.rb
|
212
213
|
- examples/page_count.rb
|
@@ -325,7 +326,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
325
326
|
version: '0'
|
326
327
|
requirements: []
|
327
328
|
rubyforge_project:
|
328
|
-
rubygems_version: 2.5.
|
329
|
+
rubygems_version: 2.5.2
|
329
330
|
signing_key:
|
330
331
|
specification_version: 4
|
331
332
|
summary: A library for accessing the content of PDF files
|