pdf-reader 2.9.2 → 2.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +8 -0
- data/Rakefile +1 -1
- data/lib/pdf/reader/buffer.rb +1 -1
- data/lib/pdf/reader/cid_widths.rb +7 -5
- data/lib/pdf/reader/cmap.rb +1 -1
- data/lib/pdf/reader/encoding.rb +4 -4
- data/lib/pdf/reader/error.rb +0 -4
- data/lib/pdf/reader/filter/depredict.rb +2 -2
- data/lib/pdf/reader/font.rb +10 -11
- data/lib/pdf/reader/font_descriptor.rb +3 -1
- data/lib/pdf/reader/form_xobject.rb +4 -1
- data/lib/pdf/reader/glyph_hash.rb +13 -5
- data/lib/pdf/reader/lzw.rb +25 -10
- data/lib/pdf/reader/no_text_filter.rb +14 -0
- data/lib/pdf/reader/object_hash.rb +15 -9
- data/lib/pdf/reader/object_stream.rb +3 -3
- data/lib/pdf/reader/overlapping_runs_filter.rb +1 -1
- data/lib/pdf/reader/page.rb +26 -7
- data/lib/pdf/reader/page_layout.rb +1 -1
- data/lib/pdf/reader/page_state.rb +7 -1
- data/lib/pdf/reader/page_text_receiver.rb +2 -0
- data/lib/pdf/reader/pages_strategy.rb +1 -1
- data/lib/pdf/reader/reference.rb +3 -2
- data/lib/pdf/reader/resources.rb +3 -2
- data/lib/pdf/reader/stream.rb +1 -1
- data/lib/pdf/reader/synchronized_cache.rb +1 -1
- data/lib/pdf/reader/text_run.rb +5 -2
- data/lib/pdf/reader/transformation_matrix.rb +8 -8
- data/lib/pdf/reader/type_check.rb +46 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +4 -3
- data/lib/pdf/reader/width_calculator/composite.rb +6 -2
- data/lib/pdf/reader/width_calculator/true_type.rb +10 -12
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -5
- data/lib/pdf/reader/width_calculator/type_zero.rb +6 -3
- data/lib/pdf/reader/xref.rb +3 -3
- data/lib/pdf/reader.rb +5 -10
- data/rbi/pdf-reader.rbi +428 -358
- metadata +6 -5
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
class PDF::Reader
|
@@ -51,7 +51,7 @@ class PDF::Reader
|
|
51
51
|
# displacement to speed up processing documents that use vertical
|
52
52
|
# writing systems
|
53
53
|
#
|
54
|
-
def multiply!(a,b
|
54
|
+
def multiply!(a,b,c, d,e,f)
|
55
55
|
if a == 1 && b == 0 && c == 0 && d == 1 && e == 0 && f == 0
|
56
56
|
# the identity matrix, no effect
|
57
57
|
self
|
@@ -164,12 +164,12 @@ class PDF::Reader
|
|
164
164
|
# [ e f 1 ] [ e f 1 ]
|
165
165
|
#
|
166
166
|
def regular_multiply!(a2,b2,c2,d2,e2,f2)
|
167
|
-
newa = (@a * a2) + (@b * c2) + (
|
168
|
-
newb = (@a * b2) + (@b * d2) + (
|
169
|
-
newc = (@c * a2) + (@d * c2) + (
|
170
|
-
newd = (@c * b2) + (@d * d2) + (
|
171
|
-
newe = (@e * a2) + (@f * c2) + (
|
172
|
-
newf = (@e * b2) + (@f * d2) + (
|
167
|
+
newa = (@a * a2) + (@b * c2) + (e2 * 0)
|
168
|
+
newb = (@a * b2) + (@b * d2) + (f2 * 0)
|
169
|
+
newc = (@c * a2) + (@d * c2) + (e2 * 0)
|
170
|
+
newd = (@c * b2) + (@d * d2) + (f2 * 0)
|
171
|
+
newe = (@e * a2) + (@f * c2) + (e2 * 1)
|
172
|
+
newf = (@e * b2) + (@f * d2) + (f2 * 1)
|
173
173
|
@a, @b, @c, @d, @e, @f = newa, newb, newc, newd, newe, newf
|
174
174
|
end
|
175
175
|
|
@@ -9,6 +9,18 @@ module PDF
|
|
9
9
|
#
|
10
10
|
class TypeCheck
|
11
11
|
|
12
|
+
def self.cast_to_int!(obj)
|
13
|
+
if obj.is_a?(Integer)
|
14
|
+
obj
|
15
|
+
elsif obj.nil?
|
16
|
+
0
|
17
|
+
elsif obj.respond_to?(:to_i)
|
18
|
+
obj.to_i
|
19
|
+
else
|
20
|
+
raise MalformedPDFError, "Unable to cast to integer"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
12
24
|
def self.cast_to_numeric!(obj)
|
13
25
|
if obj.is_a?(Numeric)
|
14
26
|
obj
|
@@ -46,6 +58,40 @@ module PDF
|
|
46
58
|
raise MalformedPDFError, "Unable to cast to symbol"
|
47
59
|
end
|
48
60
|
end
|
61
|
+
|
62
|
+
def self.cast_to_symbol!(obj)
|
63
|
+
res = cast_to_symbol(obj)
|
64
|
+
if res
|
65
|
+
res
|
66
|
+
else
|
67
|
+
raise MalformedPDFError, "Unable to cast to symbol"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.cast_to_pdf_dict!(obj)
|
72
|
+
if obj.is_a?(Hash)
|
73
|
+
obj
|
74
|
+
elsif obj.respond_to?(:to_h)
|
75
|
+
obj.to_h
|
76
|
+
else
|
77
|
+
raise MalformedPDFError, "Unable to cast to hash"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def self.cast_to_pdf_dict_with_stream_values!(obj)
|
82
|
+
if obj.is_a?(Hash)
|
83
|
+
result = Hash.new
|
84
|
+
obj.each do |k, v|
|
85
|
+
raise MalformedPDFError, "Expected a stream" unless v.is_a?(PDF::Reader::Stream)
|
86
|
+
result[cast_to_symbol!(k)] = v
|
87
|
+
end
|
88
|
+
result
|
89
|
+
elsif obj.respond_to?(:to_h)
|
90
|
+
cast_to_pdf_dict_with_stream_values!(obj.to_h)
|
91
|
+
else
|
92
|
+
raise MalformedPDFError, "Unable to cast to hash"
|
93
|
+
end
|
94
|
+
end
|
49
95
|
end
|
50
96
|
end
|
51
97
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
require 'afm'
|
@@ -53,12 +53,13 @@ class PDF::Reader
|
|
53
53
|
private
|
54
54
|
|
55
55
|
def control_character?(code_point)
|
56
|
-
@font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
|
56
|
+
match = @font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
|
57
|
+
match ? true : false
|
57
58
|
end
|
58
59
|
|
59
60
|
def extract_basefont(font_name)
|
60
61
|
if BUILTINS.include?(font_name)
|
61
|
-
font_name
|
62
|
+
font_name.to_s
|
62
63
|
else
|
63
64
|
"Times-Roman"
|
64
65
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
class PDF::Reader
|
@@ -22,7 +22,11 @@ class PDF::Reader
|
|
22
22
|
|
23
23
|
w = @widths[code_point]
|
24
24
|
# 0 is a valid width
|
25
|
-
|
25
|
+
if w
|
26
|
+
w.to_f
|
27
|
+
else
|
28
|
+
0
|
29
|
+
end
|
26
30
|
end
|
27
31
|
end
|
28
32
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
class PDF::Reader
|
@@ -10,8 +10,8 @@ class PDF::Reader
|
|
10
10
|
def initialize(font)
|
11
11
|
@font = font
|
12
12
|
|
13
|
-
if @font.font_descriptor
|
14
|
-
@missing_width =
|
13
|
+
if fd = @font.font_descriptor
|
14
|
+
@missing_width = fd.missing_width
|
15
15
|
else
|
16
16
|
@missing_width = 0
|
17
17
|
end
|
@@ -30,25 +30,23 @@ class PDF::Reader
|
|
30
30
|
|
31
31
|
# in ruby a negative index is valid, and will go from the end of the array
|
32
32
|
# which is undesireable in this case.
|
33
|
-
|
34
|
-
|
33
|
+
first_char = @font.first_char
|
34
|
+
if first_char && first_char <= code_point
|
35
|
+
@font.widths.fetch(code_point - first_char, @missing_width.to_i).to_f
|
35
36
|
else
|
36
37
|
@missing_width.to_f
|
37
38
|
end
|
38
39
|
end
|
39
40
|
|
40
41
|
def glyph_width_from_descriptor(code_point)
|
41
|
-
return unless @font.font_descriptor
|
42
|
-
|
43
42
|
# true type fonts will have most of their information contained
|
44
43
|
# with-in a program inside the font descriptor, however the widths
|
45
44
|
# may not be in standard PDF glyph widths (1000 units => 1 text space unit)
|
46
45
|
# so this width will need to be scaled
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
nil
|
46
|
+
if fd = @font.font_descriptor
|
47
|
+
if w = fd.glyph_width(code_point)
|
48
|
+
w.to_f * fd.glyph_to_pdf_scale_factor.to_f
|
49
|
+
end
|
52
50
|
end
|
53
51
|
end
|
54
52
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
class PDF::Reader
|
@@ -10,8 +10,8 @@ class PDF::Reader
|
|
10
10
|
def initialize(font)
|
11
11
|
@font = font
|
12
12
|
|
13
|
-
if @font.font_descriptor
|
14
|
-
@missing_width =
|
13
|
+
if fd = @font.font_descriptor
|
14
|
+
@missing_width = fd.missing_width
|
15
15
|
else
|
16
16
|
@missing_width = 0
|
17
17
|
end
|
@@ -23,8 +23,9 @@ class PDF::Reader
|
|
23
23
|
|
24
24
|
# in ruby a negative index is valid, and will go from the end of the array
|
25
25
|
# which is undesireable in this case.
|
26
|
-
|
27
|
-
|
26
|
+
first_char = @font.first_char
|
27
|
+
if first_char && first_char <= code_point
|
28
|
+
@font.widths.fetch(code_point - first_char, @missing_width.to_i).to_f
|
28
29
|
else
|
29
30
|
@missing_width.to_f
|
30
31
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
class PDF::Reader
|
@@ -13,13 +13,16 @@ class PDF::Reader
|
|
13
13
|
|
14
14
|
def initialize(font)
|
15
15
|
@font = font
|
16
|
-
@descendant_font = @font.descendantfonts.first
|
17
16
|
end
|
18
17
|
|
19
18
|
def glyph_width(code_point)
|
20
19
|
return 0 if code_point.nil? || code_point < 0
|
21
20
|
|
22
|
-
@
|
21
|
+
if descendant_font = @font.descendantfonts.first
|
22
|
+
descendant_font.glyph_width(code_point).to_f
|
23
|
+
else
|
24
|
+
0
|
25
|
+
end
|
23
26
|
end
|
24
27
|
end
|
25
28
|
end
|
data/lib/pdf/reader/xref.rb
CHANGED
@@ -73,7 +73,7 @@ class PDF::Reader
|
|
73
73
|
#
|
74
74
|
# ref - a PDF::Reader::Reference object containing an object ID and revision number
|
75
75
|
def [](ref)
|
76
|
-
@xref
|
76
|
+
@xref.fetch(ref.id, {}).fetch(ref.gen)
|
77
77
|
rescue
|
78
78
|
raise InvalidObjectError, "Object #{ref.id}, Generation #{ref.gen} is invalid"
|
79
79
|
end
|
@@ -82,8 +82,8 @@ class PDF::Reader
|
|
82
82
|
def each(&block)
|
83
83
|
ids = @xref.keys.sort
|
84
84
|
ids.each do |id|
|
85
|
-
gen = @xref
|
86
|
-
yield PDF::Reader::Reference.new(id, gen)
|
85
|
+
gen = @xref.fetch(id, {}).keys.sort[-1]
|
86
|
+
yield PDF::Reader::Reference.new(id, gen.to_i)
|
87
87
|
end
|
88
88
|
end
|
89
89
|
################################################################################
|
data/lib/pdf/reader.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -128,7 +128,7 @@ module PDF
|
|
128
128
|
doc_strings_to_utf8(dict)
|
129
129
|
end
|
130
130
|
|
131
|
-
# Return a
|
131
|
+
# Return a String with extra XML metadata provided by the author of the PDF file. Not
|
132
132
|
# always present.
|
133
133
|
#
|
134
134
|
def metadata
|
@@ -182,7 +182,7 @@ module PDF
|
|
182
182
|
#
|
183
183
|
# reader.pages.each do |page|
|
184
184
|
# puts page.fonts
|
185
|
-
# puts page.
|
185
|
+
# puts page.rectangles
|
186
186
|
# puts page.text
|
187
187
|
# end
|
188
188
|
#
|
@@ -272,13 +272,7 @@ module PDF
|
|
272
272
|
end
|
273
273
|
|
274
274
|
def root
|
275
|
-
@root ||=
|
276
|
-
obj = @objects.deref_hash(@objects.trailer[:Root]) || {}
|
277
|
-
unless obj.kind_of?(::Hash)
|
278
|
-
raise MalformedPDFError, "PDF malformed, trailer Root should be a dictionary"
|
279
|
-
end
|
280
|
-
obj
|
281
|
-
end
|
275
|
+
@root ||= @objects.deref_hash(@objects.trailer[:Root]) || {}
|
282
276
|
end
|
283
277
|
|
284
278
|
end
|
@@ -315,6 +309,7 @@ require 'pdf/reader/print_receiver'
|
|
315
309
|
require 'pdf/reader/rectangle'
|
316
310
|
require 'pdf/reader/reference'
|
317
311
|
require 'pdf/reader/register_receiver'
|
312
|
+
require 'pdf/reader/no_text_filter'
|
318
313
|
require 'pdf/reader/null_security_handler'
|
319
314
|
require 'pdf/reader/security_handler_factory'
|
320
315
|
require 'pdf/reader/standard_key_builder'
|