pdf-reader 2.9.2 → 2.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +8 -0
- data/Rakefile +1 -1
- data/lib/pdf/reader/buffer.rb +1 -1
- data/lib/pdf/reader/cid_widths.rb +7 -5
- data/lib/pdf/reader/cmap.rb +1 -1
- data/lib/pdf/reader/encoding.rb +4 -4
- data/lib/pdf/reader/error.rb +0 -4
- data/lib/pdf/reader/filter/depredict.rb +2 -2
- data/lib/pdf/reader/font.rb +10 -11
- data/lib/pdf/reader/font_descriptor.rb +3 -1
- data/lib/pdf/reader/form_xobject.rb +4 -1
- data/lib/pdf/reader/glyph_hash.rb +13 -5
- data/lib/pdf/reader/lzw.rb +25 -10
- data/lib/pdf/reader/no_text_filter.rb +14 -0
- data/lib/pdf/reader/object_hash.rb +15 -9
- data/lib/pdf/reader/object_stream.rb +3 -3
- data/lib/pdf/reader/overlapping_runs_filter.rb +1 -1
- data/lib/pdf/reader/page.rb +26 -7
- data/lib/pdf/reader/page_layout.rb +1 -1
- data/lib/pdf/reader/page_state.rb +7 -1
- data/lib/pdf/reader/page_text_receiver.rb +2 -0
- data/lib/pdf/reader/pages_strategy.rb +1 -1
- data/lib/pdf/reader/reference.rb +3 -2
- data/lib/pdf/reader/resources.rb +3 -2
- data/lib/pdf/reader/stream.rb +1 -1
- data/lib/pdf/reader/synchronized_cache.rb +1 -1
- data/lib/pdf/reader/text_run.rb +5 -2
- data/lib/pdf/reader/transformation_matrix.rb +8 -8
- data/lib/pdf/reader/type_check.rb +46 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +4 -3
- data/lib/pdf/reader/width_calculator/composite.rb +6 -2
- data/lib/pdf/reader/width_calculator/true_type.rb +10 -12
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -5
- data/lib/pdf/reader/width_calculator/type_zero.rb +6 -3
- data/lib/pdf/reader/xref.rb +3 -3
- data/lib/pdf/reader.rb +5 -10
- data/rbi/pdf-reader.rbi +428 -358
- metadata +6 -5
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
class PDF::Reader
|
@@ -51,7 +51,7 @@ class PDF::Reader
|
|
51
51
|
# displacement to speed up processing documents that use vertical
|
52
52
|
# writing systems
|
53
53
|
#
|
54
|
-
def multiply!(a,b
|
54
|
+
def multiply!(a,b,c, d,e,f)
|
55
55
|
if a == 1 && b == 0 && c == 0 && d == 1 && e == 0 && f == 0
|
56
56
|
# the identity matrix, no effect
|
57
57
|
self
|
@@ -164,12 +164,12 @@ class PDF::Reader
|
|
164
164
|
# [ e f 1 ] [ e f 1 ]
|
165
165
|
#
|
166
166
|
def regular_multiply!(a2,b2,c2,d2,e2,f2)
|
167
|
-
newa = (@a * a2) + (@b * c2) + (
|
168
|
-
newb = (@a * b2) + (@b * d2) + (
|
169
|
-
newc = (@c * a2) + (@d * c2) + (
|
170
|
-
newd = (@c * b2) + (@d * d2) + (
|
171
|
-
newe = (@e * a2) + (@f * c2) + (
|
172
|
-
newf = (@e * b2) + (@f * d2) + (
|
167
|
+
newa = (@a * a2) + (@b * c2) + (e2 * 0)
|
168
|
+
newb = (@a * b2) + (@b * d2) + (f2 * 0)
|
169
|
+
newc = (@c * a2) + (@d * c2) + (e2 * 0)
|
170
|
+
newd = (@c * b2) + (@d * d2) + (f2 * 0)
|
171
|
+
newe = (@e * a2) + (@f * c2) + (e2 * 1)
|
172
|
+
newf = (@e * b2) + (@f * d2) + (f2 * 1)
|
173
173
|
@a, @b, @c, @d, @e, @f = newa, newb, newc, newd, newe, newf
|
174
174
|
end
|
175
175
|
|
@@ -9,6 +9,18 @@ module PDF
|
|
9
9
|
#
|
10
10
|
class TypeCheck
|
11
11
|
|
12
|
+
def self.cast_to_int!(obj)
|
13
|
+
if obj.is_a?(Integer)
|
14
|
+
obj
|
15
|
+
elsif obj.nil?
|
16
|
+
0
|
17
|
+
elsif obj.respond_to?(:to_i)
|
18
|
+
obj.to_i
|
19
|
+
else
|
20
|
+
raise MalformedPDFError, "Unable to cast to integer"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
12
24
|
def self.cast_to_numeric!(obj)
|
13
25
|
if obj.is_a?(Numeric)
|
14
26
|
obj
|
@@ -46,6 +58,40 @@ module PDF
|
|
46
58
|
raise MalformedPDFError, "Unable to cast to symbol"
|
47
59
|
end
|
48
60
|
end
|
61
|
+
|
62
|
+
def self.cast_to_symbol!(obj)
|
63
|
+
res = cast_to_symbol(obj)
|
64
|
+
if res
|
65
|
+
res
|
66
|
+
else
|
67
|
+
raise MalformedPDFError, "Unable to cast to symbol"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.cast_to_pdf_dict!(obj)
|
72
|
+
if obj.is_a?(Hash)
|
73
|
+
obj
|
74
|
+
elsif obj.respond_to?(:to_h)
|
75
|
+
obj.to_h
|
76
|
+
else
|
77
|
+
raise MalformedPDFError, "Unable to cast to hash"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def self.cast_to_pdf_dict_with_stream_values!(obj)
|
82
|
+
if obj.is_a?(Hash)
|
83
|
+
result = Hash.new
|
84
|
+
obj.each do |k, v|
|
85
|
+
raise MalformedPDFError, "Expected a stream" unless v.is_a?(PDF::Reader::Stream)
|
86
|
+
result[cast_to_symbol!(k)] = v
|
87
|
+
end
|
88
|
+
result
|
89
|
+
elsif obj.respond_to?(:to_h)
|
90
|
+
cast_to_pdf_dict_with_stream_values!(obj.to_h)
|
91
|
+
else
|
92
|
+
raise MalformedPDFError, "Unable to cast to hash"
|
93
|
+
end
|
94
|
+
end
|
49
95
|
end
|
50
96
|
end
|
51
97
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
require 'afm'
|
@@ -53,12 +53,13 @@ class PDF::Reader
|
|
53
53
|
private
|
54
54
|
|
55
55
|
def control_character?(code_point)
|
56
|
-
@font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
|
56
|
+
match = @font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
|
57
|
+
match ? true : false
|
57
58
|
end
|
58
59
|
|
59
60
|
def extract_basefont(font_name)
|
60
61
|
if BUILTINS.include?(font_name)
|
61
|
-
font_name
|
62
|
+
font_name.to_s
|
62
63
|
else
|
63
64
|
"Times-Roman"
|
64
65
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
class PDF::Reader
|
@@ -22,7 +22,11 @@ class PDF::Reader
|
|
22
22
|
|
23
23
|
w = @widths[code_point]
|
24
24
|
# 0 is a valid width
|
25
|
-
|
25
|
+
if w
|
26
|
+
w.to_f
|
27
|
+
else
|
28
|
+
0
|
29
|
+
end
|
26
30
|
end
|
27
31
|
end
|
28
32
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
class PDF::Reader
|
@@ -10,8 +10,8 @@ class PDF::Reader
|
|
10
10
|
def initialize(font)
|
11
11
|
@font = font
|
12
12
|
|
13
|
-
if @font.font_descriptor
|
14
|
-
@missing_width =
|
13
|
+
if fd = @font.font_descriptor
|
14
|
+
@missing_width = fd.missing_width
|
15
15
|
else
|
16
16
|
@missing_width = 0
|
17
17
|
end
|
@@ -30,25 +30,23 @@ class PDF::Reader
|
|
30
30
|
|
31
31
|
# in ruby a negative index is valid, and will go from the end of the array
|
32
32
|
# which is undesireable in this case.
|
33
|
-
|
34
|
-
|
33
|
+
first_char = @font.first_char
|
34
|
+
if first_char && first_char <= code_point
|
35
|
+
@font.widths.fetch(code_point - first_char, @missing_width.to_i).to_f
|
35
36
|
else
|
36
37
|
@missing_width.to_f
|
37
38
|
end
|
38
39
|
end
|
39
40
|
|
40
41
|
def glyph_width_from_descriptor(code_point)
|
41
|
-
return unless @font.font_descriptor
|
42
|
-
|
43
42
|
# true type fonts will have most of their information contained
|
44
43
|
# with-in a program inside the font descriptor, however the widths
|
45
44
|
# may not be in standard PDF glyph widths (1000 units => 1 text space unit)
|
46
45
|
# so this width will need to be scaled
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
nil
|
46
|
+
if fd = @font.font_descriptor
|
47
|
+
if w = fd.glyph_width(code_point)
|
48
|
+
w.to_f * fd.glyph_to_pdf_scale_factor.to_f
|
49
|
+
end
|
52
50
|
end
|
53
51
|
end
|
54
52
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
class PDF::Reader
|
@@ -10,8 +10,8 @@ class PDF::Reader
|
|
10
10
|
def initialize(font)
|
11
11
|
@font = font
|
12
12
|
|
13
|
-
if @font.font_descriptor
|
14
|
-
@missing_width =
|
13
|
+
if fd = @font.font_descriptor
|
14
|
+
@missing_width = fd.missing_width
|
15
15
|
else
|
16
16
|
@missing_width = 0
|
17
17
|
end
|
@@ -23,8 +23,9 @@ class PDF::Reader
|
|
23
23
|
|
24
24
|
# in ruby a negative index is valid, and will go from the end of the array
|
25
25
|
# which is undesireable in this case.
|
26
|
-
|
27
|
-
|
26
|
+
first_char = @font.first_char
|
27
|
+
if first_char && first_char <= code_point
|
28
|
+
@font.widths.fetch(code_point - first_char, @missing_width.to_i).to_f
|
28
29
|
else
|
29
30
|
@missing_width.to_f
|
30
31
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
class PDF::Reader
|
@@ -13,13 +13,16 @@ class PDF::Reader
|
|
13
13
|
|
14
14
|
def initialize(font)
|
15
15
|
@font = font
|
16
|
-
@descendant_font = @font.descendantfonts.first
|
17
16
|
end
|
18
17
|
|
19
18
|
def glyph_width(code_point)
|
20
19
|
return 0 if code_point.nil? || code_point < 0
|
21
20
|
|
22
|
-
@
|
21
|
+
if descendant_font = @font.descendantfonts.first
|
22
|
+
descendant_font.glyph_width(code_point).to_f
|
23
|
+
else
|
24
|
+
0
|
25
|
+
end
|
23
26
|
end
|
24
27
|
end
|
25
28
|
end
|
data/lib/pdf/reader/xref.rb
CHANGED
@@ -73,7 +73,7 @@ class PDF::Reader
|
|
73
73
|
#
|
74
74
|
# ref - a PDF::Reader::Reference object containing an object ID and revision number
|
75
75
|
def [](ref)
|
76
|
-
@xref
|
76
|
+
@xref.fetch(ref.id, {}).fetch(ref.gen)
|
77
77
|
rescue
|
78
78
|
raise InvalidObjectError, "Object #{ref.id}, Generation #{ref.gen} is invalid"
|
79
79
|
end
|
@@ -82,8 +82,8 @@ class PDF::Reader
|
|
82
82
|
def each(&block)
|
83
83
|
ids = @xref.keys.sort
|
84
84
|
ids.each do |id|
|
85
|
-
gen = @xref
|
86
|
-
yield PDF::Reader::Reference.new(id, gen)
|
85
|
+
gen = @xref.fetch(id, {}).keys.sort[-1]
|
86
|
+
yield PDF::Reader::Reference.new(id, gen.to_i)
|
87
87
|
end
|
88
88
|
end
|
89
89
|
################################################################################
|
data/lib/pdf/reader.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -128,7 +128,7 @@ module PDF
|
|
128
128
|
doc_strings_to_utf8(dict)
|
129
129
|
end
|
130
130
|
|
131
|
-
# Return a
|
131
|
+
# Return a String with extra XML metadata provided by the author of the PDF file. Not
|
132
132
|
# always present.
|
133
133
|
#
|
134
134
|
def metadata
|
@@ -182,7 +182,7 @@ module PDF
|
|
182
182
|
#
|
183
183
|
# reader.pages.each do |page|
|
184
184
|
# puts page.fonts
|
185
|
-
# puts page.
|
185
|
+
# puts page.rectangles
|
186
186
|
# puts page.text
|
187
187
|
# end
|
188
188
|
#
|
@@ -272,13 +272,7 @@ module PDF
|
|
272
272
|
end
|
273
273
|
|
274
274
|
def root
|
275
|
-
@root ||=
|
276
|
-
obj = @objects.deref_hash(@objects.trailer[:Root]) || {}
|
277
|
-
unless obj.kind_of?(::Hash)
|
278
|
-
raise MalformedPDFError, "PDF malformed, trailer Root should be a dictionary"
|
279
|
-
end
|
280
|
-
obj
|
281
|
-
end
|
275
|
+
@root ||= @objects.deref_hash(@objects.trailer[:Root]) || {}
|
282
276
|
end
|
283
277
|
|
284
278
|
end
|
@@ -315,6 +309,7 @@ require 'pdf/reader/print_receiver'
|
|
315
309
|
require 'pdf/reader/rectangle'
|
316
310
|
require 'pdf/reader/reference'
|
317
311
|
require 'pdf/reader/register_receiver'
|
312
|
+
require 'pdf/reader/no_text_filter'
|
318
313
|
require 'pdf/reader/null_security_handler'
|
319
314
|
require 'pdf/reader/security_handler_factory'
|
320
315
|
require 'pdf/reader/standard_key_builder'
|