pdf-reader 2.5.0 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +42 -0
- data/README.md +16 -1
- data/Rakefile +1 -1
- data/examples/extract_fonts.rb +12 -7
- data/examples/rspec.rb +1 -0
- data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
- data/lib/pdf/reader/buffer.rb +90 -46
- data/lib/pdf/reader/cid_widths.rb +1 -0
- data/lib/pdf/reader/cmap.rb +65 -50
- data/lib/pdf/reader/encoding.rb +3 -2
- data/lib/pdf/reader/error.rb +19 -3
- data/lib/pdf/reader/filter/ascii85.rb +7 -1
- data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
- data/lib/pdf/reader/filter/depredict.rb +11 -9
- data/lib/pdf/reader/filter/flate.rb +4 -2
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +1 -1
- data/lib/pdf/reader/filter/run_length.rb +19 -13
- data/lib/pdf/reader/filter.rb +2 -1
- data/lib/pdf/reader/font.rb +72 -16
- data/lib/pdf/reader/font_descriptor.rb +19 -17
- data/lib/pdf/reader/form_xobject.rb +15 -5
- data/lib/pdf/reader/glyph_hash.rb +16 -9
- data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
- data/lib/pdf/reader/key_builder_v5.rb +138 -0
- data/lib/pdf/reader/lzw.rb +4 -2
- data/lib/pdf/reader/null_security_handler.rb +1 -4
- data/lib/pdf/reader/object_cache.rb +1 -0
- data/lib/pdf/reader/object_hash.rb +252 -44
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
- data/lib/pdf/reader/page.rb +99 -19
- data/lib/pdf/reader/page_layout.rb +36 -37
- data/lib/pdf/reader/page_state.rb +12 -11
- data/lib/pdf/reader/page_text_receiver.rb +57 -10
- data/lib/pdf/reader/pages_strategy.rb +1 -0
- data/lib/pdf/reader/parser.rb +23 -12
- data/lib/pdf/reader/point.rb +25 -0
- data/lib/pdf/reader/print_receiver.rb +1 -0
- data/lib/pdf/reader/rc4_security_handler.rb +38 -0
- data/lib/pdf/reader/rectangle.rb +113 -0
- data/lib/pdf/reader/reference.rb +1 -0
- data/lib/pdf/reader/register_receiver.rb +1 -0
- data/lib/pdf/reader/{resource_methods.rb → resources.rb} +16 -9
- data/lib/pdf/reader/security_handler_factory.rb +79 -0
- data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
- data/lib/pdf/reader/stream.rb +2 -1
- data/lib/pdf/reader/synchronized_cache.rb +1 -0
- data/lib/pdf/reader/text_run.rb +14 -6
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +1 -0
- data/lib/pdf/reader/type_check.rb +52 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
- data/lib/pdf/reader/validating_receiver.rb +262 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
- data/lib/pdf/reader/width_calculator/composite.rb +1 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +2 -1
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
- data/lib/pdf/reader/width_calculator.rb +1 -0
- data/lib/pdf/reader/xref.rb +27 -4
- data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
- data/lib/pdf/reader.rb +46 -15
- data/lib/pdf-reader.rb +1 -0
- data/rbi/pdf-reader.rbi +1978 -0
- metadata +21 -10
- data/lib/pdf/reader/orientation_detector.rb +0 -34
- data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
################################################################################
|
@@ -68,7 +69,7 @@ class PDF::Reader
|
|
68
69
|
#
|
69
70
|
# [25, :A, :B]
|
70
71
|
def differences=(diff)
|
71
|
-
|
72
|
+
PDF::Reader::Error.validate_type(diff, "diff", Array)
|
72
73
|
|
73
74
|
@differences = {}
|
74
75
|
byte = 0
|
@@ -208,7 +209,7 @@ class PDF::Reader
|
|
208
209
|
def load_mapping(file)
|
209
210
|
File.open(file, "r:BINARY") do |f|
|
210
211
|
f.each do |l|
|
211
|
-
_m, single_byte, unicode = *l.match(
|
212
|
+
_m, single_byte, unicode = *l.match(/\A([0-9A-Za-z]+);([0-9A-F]{4})/)
|
212
213
|
@mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
|
213
214
|
end
|
214
215
|
end
|
data/lib/pdf/reader/error.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
################################################################################
|
@@ -33,19 +34,34 @@ class PDF::Reader
|
|
33
34
|
def self.str_assert(lvalue, rvalue, chars=nil)
|
34
35
|
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
35
36
|
lvalue = lvalue[0,chars] if chars
|
36
|
-
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue != rvalue
|
37
|
+
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue != rvalue
|
37
38
|
end
|
38
39
|
################################################################################
|
39
40
|
def self.str_assert_not(lvalue, rvalue, chars=nil)
|
40
41
|
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
41
42
|
lvalue = lvalue[0,chars] if chars
|
42
|
-
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue == rvalue
|
43
|
+
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue == rvalue
|
43
44
|
end
|
44
45
|
################################################################################
|
45
46
|
def self.assert_equal(lvalue, rvalue)
|
46
|
-
raise MalformedPDFError, "PDF malformed, expected #{rvalue} but found #{lvalue} instead" if lvalue != rvalue
|
47
|
+
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue != rvalue
|
47
48
|
end
|
48
49
|
################################################################################
|
50
|
+
def self.validate_type(object, name, klass)
|
51
|
+
raise ArgumentError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
|
52
|
+
end
|
53
|
+
################################################################################
|
54
|
+
def self.validate_type_as_malformed(object, name, klass)
|
55
|
+
raise MalformedPDFError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
|
56
|
+
end
|
57
|
+
################################################################################
|
58
|
+
def self.validate_not_nil(object, name)
|
59
|
+
raise ArgumentError, "#{object} must not be nil" if object.nil?
|
60
|
+
end
|
61
|
+
################################################################################
|
62
|
+
def self.validate_not_nil_as_malformed(object, name)
|
63
|
+
raise MalformedPDFError, "#{object} must not be nil" if object.nil?
|
64
|
+
end
|
49
65
|
end
|
50
66
|
|
51
67
|
################################################################################
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
require 'ascii85'
|
@@ -7,6 +8,7 @@ class PDF::Reader
|
|
7
8
|
module Filter # :nodoc:
|
8
9
|
# implementation of the Ascii85 filter
|
9
10
|
class Ascii85
|
11
|
+
|
10
12
|
def initialize(options = {})
|
11
13
|
@options = options
|
12
14
|
end
|
@@ -17,7 +19,11 @@ class PDF::Reader
|
|
17
19
|
#
|
18
20
|
def filter(data)
|
19
21
|
data = "<~#{data}" unless data.to_s[0,2] == "<~"
|
20
|
-
::
|
22
|
+
if defined?(::Ascii85Native)
|
23
|
+
::Ascii85Native::decode(data)
|
24
|
+
else
|
25
|
+
::Ascii85::decode(data)
|
26
|
+
end
|
21
27
|
rescue Exception => e
|
22
28
|
# Oops, there was a problem decoding the stream
|
23
29
|
raise MalformedPDFError,
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
#
|
@@ -6,6 +7,7 @@ class PDF::Reader
|
|
6
7
|
module Filter # :nodoc:
|
7
8
|
# implementation of the AsciiHex stream filter
|
8
9
|
class AsciiHex
|
10
|
+
|
9
11
|
def initialize(options = {})
|
10
12
|
@options = options
|
11
13
|
end
|
@@ -16,9 +18,12 @@ class PDF::Reader
|
|
16
18
|
def filter(data)
|
17
19
|
data.chop! if data[-1,1] == ">"
|
18
20
|
data = data[1,data.size] if data[0,1] == "<"
|
21
|
+
|
22
|
+
return "" if data.nil?
|
23
|
+
|
19
24
|
data.gsub!(/[^A-Fa-f0-9]/,"")
|
20
25
|
data << "0" if data.size % 2 == 1
|
21
|
-
data.scan(/.{2}/).map { |s| s.hex.chr }.join("")
|
26
|
+
data.scan(/.{2}/).flatten.map { |s| s.hex.chr }.join("")
|
22
27
|
rescue Exception => e
|
23
28
|
# Oops, there was a problem decoding the stream
|
24
29
|
raise MalformedPDFError,
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
class PDF::Reader
|
@@ -6,8 +7,9 @@ class PDF::Reader
|
|
6
7
|
# some filter implementations support preprocessing of the data to
|
7
8
|
# improve compression
|
8
9
|
class Depredict
|
10
|
+
|
9
11
|
def initialize(options = {})
|
10
|
-
@options = options
|
12
|
+
@options = options
|
11
13
|
end
|
12
14
|
|
13
15
|
################################################################################
|
@@ -34,7 +36,7 @@ class PDF::Reader
|
|
34
36
|
################################################################################
|
35
37
|
def tiff_depredict(data)
|
36
38
|
data = data.unpack("C*")
|
37
|
-
unfiltered =
|
39
|
+
unfiltered = ''
|
38
40
|
bpc = @options[:BitsPerComponent] || 8
|
39
41
|
pixel_bits = bpc * @options[:Colors]
|
40
42
|
pixel_bytes = pixel_bits / 8
|
@@ -51,11 +53,11 @@ class PDF::Reader
|
|
51
53
|
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
52
54
|
row_data[index] = (byte + left) % 256
|
53
55
|
end
|
54
|
-
unfiltered += row_data
|
56
|
+
unfiltered += row_data.pack("C*")
|
55
57
|
pos += line_len
|
56
58
|
end
|
57
59
|
|
58
|
-
unfiltered
|
60
|
+
unfiltered
|
59
61
|
end
|
60
62
|
################################################################################
|
61
63
|
def png_depredict(data)
|
@@ -67,7 +69,7 @@ class PDF::Reader
|
|
67
69
|
scanline_length = (pixel_bytes * @options[:Columns]) + 1
|
68
70
|
row = 0
|
69
71
|
pixels = []
|
70
|
-
paeth, pa, pb, pc =
|
72
|
+
paeth, pa, pb, pc = 0, 0, 0, 0
|
71
73
|
until data.empty? do
|
72
74
|
row_data = data.slice! 0, scanline_length
|
73
75
|
filter = row_data.shift
|
@@ -94,17 +96,17 @@ class PDF::Reader
|
|
94
96
|
row_data[index] = (byte + ((left + upper)/2).floor) % 256
|
95
97
|
end
|
96
98
|
when 4 # Paeth
|
97
|
-
left = upper = upper_left =
|
99
|
+
left = upper = upper_left = 0
|
98
100
|
row_data.each_with_index do |byte, index|
|
99
101
|
col = index / pixel_bytes
|
100
102
|
|
101
|
-
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
103
|
+
left = index < pixel_bytes ? 0 : Integer(row_data[index - pixel_bytes])
|
102
104
|
if row.zero?
|
103
105
|
upper = upper_left = 0
|
104
106
|
else
|
105
|
-
upper = pixels[row-1][col][index % pixel_bytes]
|
107
|
+
upper = Integer(pixels[row-1][col][index % pixel_bytes])
|
106
108
|
upper_left = col.zero? ? 0 :
|
107
|
-
pixels[row-1][col-1][index % pixel_bytes]
|
109
|
+
Integer(pixels[row-1][col-1][index % pixel_bytes])
|
108
110
|
end
|
109
111
|
|
110
112
|
p = left + upper - upper_left
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
|
@@ -8,6 +9,7 @@ class PDF::Reader
|
|
8
9
|
module Filter # :nodoc:
|
9
10
|
# implementation of the Flate (zlib) stream filter
|
10
11
|
class Flate
|
12
|
+
|
11
13
|
ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 # Zlib::MAX_WBITS + 32
|
12
14
|
ZLIB_RAW_DEFLATE = -15 # Zlib::MAX_WBITS * -1
|
13
15
|
|
@@ -32,7 +34,7 @@ class PDF::Reader
|
|
32
34
|
def zlib_inflate(data)
|
33
35
|
begin
|
34
36
|
return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
|
35
|
-
rescue Zlib::
|
37
|
+
rescue Zlib::Error
|
36
38
|
# by default, Ruby's Zlib assumes the data it's inflating
|
37
39
|
# is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
|
38
40
|
# fails, swallow the exception and attempt to inflate the data as a raw
|
@@ -41,7 +43,7 @@ class PDF::Reader
|
|
41
43
|
|
42
44
|
begin
|
43
45
|
return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
|
44
|
-
rescue
|
46
|
+
rescue Zlib::Error
|
45
47
|
# swallow this one too, so we can try some other fallback options
|
46
48
|
end
|
47
49
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
#
|
@@ -6,6 +7,7 @@ class PDF::Reader # :nodoc:
|
|
6
7
|
module Filter # :nodoc:
|
7
8
|
# implementation of the run length stream filter
|
8
9
|
class RunLength
|
10
|
+
|
9
11
|
def initialize(options = {})
|
10
12
|
@options = options
|
11
13
|
end
|
@@ -20,19 +22,23 @@ class PDF::Reader # :nodoc:
|
|
20
22
|
length = data.getbyte(pos)
|
21
23
|
pos += 1
|
22
24
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
25
|
+
unless length.nil?
|
26
|
+
case
|
27
|
+
# nothing
|
28
|
+
when length == 128
|
29
|
+
break
|
30
|
+
when length < 128
|
31
|
+
# When the length is < 128, we copy the following length+1 bytes
|
32
|
+
# literally.
|
33
|
+
out << data[pos, length + 1]
|
34
|
+
pos += length
|
35
|
+
else
|
36
|
+
# When the length is > 128, we copy the next byte (257 - length)
|
37
|
+
# times; i.e., "\xFA\x00" ([250, 0]) will expand to
|
38
|
+
# "\x00\x00\x00\x00\x00\x00\x00".
|
39
|
+
previous_byte = data[pos, 1] || ""
|
40
|
+
out << previous_byte * (257 - length)
|
41
|
+
end
|
36
42
|
end
|
37
43
|
|
38
44
|
pos += 1
|
data/lib/pdf/reader/filter.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
################################################################################
|
@@ -41,7 +42,7 @@ class PDF::Reader
|
|
41
42
|
# returned untouched. At this stage PDF::Reader has no need to decode images.
|
42
43
|
#
|
43
44
|
def self.with(name, options = {})
|
44
|
-
case name
|
45
|
+
case name
|
45
46
|
when :ASCII85Decode then PDF::Reader::Filter::Ascii85.new(options)
|
46
47
|
when :ASCIIHexDecode then PDF::Reader::Filter::AsciiHex.new(options)
|
47
48
|
when :CCITTFaxDecode then PDF::Reader::Filter::Null.new(options)
|
data/lib/pdf/reader/font.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
################################################################################
|
@@ -42,6 +43,7 @@ class PDF::Reader
|
|
42
43
|
@tounicode = nil
|
43
44
|
|
44
45
|
extract_base_info(obj)
|
46
|
+
extract_type3_info(obj)
|
45
47
|
extract_descriptor(obj)
|
46
48
|
extract_descendants(obj)
|
47
49
|
@width_calc = build_width_calculator
|
@@ -72,8 +74,44 @@ class PDF::Reader
|
|
72
74
|
@cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
|
73
75
|
end
|
74
76
|
|
77
|
+
# In most cases glyph width is converted into text space with a simple divide by 1000.
|
78
|
+
#
|
79
|
+
# However, Type3 fonts provide their own FontMatrix that's used for the transformation.
|
80
|
+
#
|
81
|
+
def glyph_width_in_text_space(code_point)
|
82
|
+
glyph_width_in_glyph_space = glyph_width(code_point)
|
83
|
+
|
84
|
+
if @subtype == :Type3
|
85
|
+
x1, y1 = font_matrix_transform(0,0)
|
86
|
+
x2, y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
|
87
|
+
(x2 - x1).abs.round(2)
|
88
|
+
else
|
89
|
+
glyph_width_in_glyph_space / 1000.0
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
75
93
|
private
|
76
94
|
|
95
|
+
# Only valid for Type3 fonts
|
96
|
+
def font_matrix_transform(x, y)
|
97
|
+
return x, y if @font_matrix.nil?
|
98
|
+
|
99
|
+
matrix = TransformationMatrix.new(
|
100
|
+
@font_matrix[0], @font_matrix[1],
|
101
|
+
@font_matrix[2], @font_matrix[3],
|
102
|
+
@font_matrix[4], @font_matrix[5],
|
103
|
+
)
|
104
|
+
|
105
|
+
if x == 0 && y == 0
|
106
|
+
[matrix.e, matrix.f]
|
107
|
+
else
|
108
|
+
[
|
109
|
+
(matrix.a * x) + (matrix.c * y) + (matrix.e),
|
110
|
+
(matrix.b * x) + (matrix.d * y) + (matrix.f)
|
111
|
+
]
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
77
115
|
def default_encoding(font_name)
|
78
116
|
case font_name.to_s
|
79
117
|
when "Symbol" then
|
@@ -111,37 +149,55 @@ class PDF::Reader
|
|
111
149
|
end
|
112
150
|
end
|
113
151
|
|
114
|
-
def
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
152
|
+
def build_encoding(obj)
|
153
|
+
if obj[:Encoding].is_a?(Symbol)
|
154
|
+
# one of the standard encodings, referenced by name
|
155
|
+
# TODO pass in a standard shape, always a Hash
|
156
|
+
PDF::Reader::Encoding.new(obj[:Encoding])
|
157
|
+
elsif obj[:Encoding].is_a?(Hash) || obj[:Encoding].is_a?(PDF::Reader::Stream)
|
158
|
+
PDF::Reader::Encoding.new(obj[:Encoding])
|
159
|
+
elsif obj[:Encoding].nil?
|
160
|
+
default_encoding(@basefont)
|
119
161
|
else
|
120
|
-
|
162
|
+
raise MalformedPDFError, "Unexpected type for Encoding (#{obj[:Encoding].class})"
|
121
163
|
end
|
122
|
-
|
123
|
-
|
124
|
-
|
164
|
+
end
|
165
|
+
|
166
|
+
def extract_base_info(obj)
|
167
|
+
@subtype = @ohash.deref_name(obj[:Subtype])
|
168
|
+
@basefont = @ohash.deref_name(obj[:BaseFont])
|
169
|
+
@encoding = build_encoding(obj)
|
170
|
+
@widths = @ohash.deref_array_of_numbers(obj[:Widths]) || []
|
171
|
+
@first_char = @ohash.deref_integer(obj[:FirstChar])
|
172
|
+
@last_char = @ohash.deref_integer(obj[:LastChar])
|
125
173
|
|
126
174
|
# CID Fonts are not required to have a W or DW entry, if they don't exist,
|
127
175
|
# the default cid width = 1000, see Section 9.7.4.1 PDF 32000-1:2008 pp 269
|
128
|
-
@cid_widths = @ohash.
|
129
|
-
@cid_default_width = @ohash.
|
176
|
+
@cid_widths = @ohash.deref_array(obj[:W]) || []
|
177
|
+
@cid_default_width = @ohash.deref_number(obj[:DW]) || 1000
|
130
178
|
|
131
179
|
if obj[:ToUnicode]
|
132
180
|
# ToUnicode is optional for Type1 and Type3
|
133
|
-
stream = @ohash.
|
134
|
-
if stream
|
181
|
+
stream = @ohash.deref_stream(obj[:ToUnicode])
|
182
|
+
if stream
|
135
183
|
@tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
|
136
184
|
end
|
137
185
|
end
|
138
186
|
end
|
139
187
|
|
188
|
+
def extract_type3_info(obj)
|
189
|
+
if @subtype == :Type3
|
190
|
+
@font_matrix = @ohash.deref_array_of_numbers(obj[:FontMatrix]) || [
|
191
|
+
0.001, 0, 0, 0.001, 0, 0
|
192
|
+
]
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
140
196
|
def extract_descriptor(obj)
|
141
197
|
if obj[:FontDescriptor]
|
142
198
|
# create a font descriptor object if we can, in other words, unless this is
|
143
199
|
# a CID Font
|
144
|
-
fd = @ohash.
|
200
|
+
fd = @ohash.deref_hash(obj[:FontDescriptor])
|
145
201
|
@font_descriptor = PDF::Reader::FontDescriptor.new(@ohash, fd)
|
146
202
|
else
|
147
203
|
@font_descriptor = nil
|
@@ -153,9 +209,9 @@ class PDF::Reader
|
|
153
209
|
# per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
|
154
210
|
# A one-element array specifying the CIDFont dictionary that is the
|
155
211
|
# descendant of this Type 0 font.
|
156
|
-
descendants = @ohash.
|
212
|
+
descendants = @ohash.deref_array(obj[:DescendantFonts])
|
157
213
|
@descendantfonts = descendants.map { |desc|
|
158
|
-
PDF::Reader::Font.new(@ohash, @ohash.
|
214
|
+
PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc))
|
159
215
|
}
|
160
216
|
end
|
161
217
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
require 'ttfunk'
|
@@ -14,22 +15,23 @@ class PDF::Reader
|
|
14
15
|
:x_height, :font_flags
|
15
16
|
|
16
17
|
def initialize(ohash, fd_hash)
|
17
|
-
|
18
|
-
@
|
19
|
-
@
|
20
|
-
@
|
21
|
-
@
|
22
|
-
@
|
23
|
-
@
|
24
|
-
@
|
25
|
-
@
|
26
|
-
@
|
27
|
-
@
|
28
|
-
@
|
29
|
-
@
|
30
|
-
@
|
31
|
-
@
|
32
|
-
@
|
18
|
+
# TODO change these to typed derefs
|
19
|
+
@ascent = ohash.deref_number(fd_hash[:Ascent]) || 0
|
20
|
+
@descent = ohash.deref_number(fd_hash[:Descent]) || 0
|
21
|
+
@missing_width = ohash.deref_number(fd_hash[:MissingWidth]) || 0
|
22
|
+
@font_bounding_box = ohash.deref_array_of_numbers(fd_hash[:FontBBox]) || [0,0,0,0]
|
23
|
+
@avg_width = ohash.deref_number(fd_hash[:AvgWidth]) || 0
|
24
|
+
@cap_height = ohash.deref_number(fd_hash[:CapHeight]) || 0
|
25
|
+
@font_flags = ohash.deref_integer(fd_hash[:Flags]) || 0
|
26
|
+
@italic_angle = ohash.deref_number(fd_hash[:ItalicAngle])
|
27
|
+
@font_name = ohash.deref_name(fd_hash[:FontName]).to_s
|
28
|
+
@leading = ohash.deref_number(fd_hash[:Leading]) || 0
|
29
|
+
@max_width = ohash.deref_number(fd_hash[:MaxWidth]) || 0
|
30
|
+
@stem_v = ohash.deref_number(fd_hash[:StemV])
|
31
|
+
@x_height = ohash.deref_number(fd_hash[:XHeight])
|
32
|
+
@font_stretch = ohash.deref_name(fd_hash[:FontStretch]) || :Normal
|
33
|
+
@font_weight = ohash.deref_number(fd_hash[:FontWeight]) || 400
|
34
|
+
@font_family = ohash.deref_string(fd_hash[:FontFamily])
|
33
35
|
|
34
36
|
# A FontDescriptor may have an embedded font program in FontFile
|
35
37
|
# (Type 1 Font Program), FontFile2 (TrueType font program), or
|
@@ -39,7 +41,7 @@ class PDF::Reader
|
|
39
41
|
# 2) CIDFontType0C: Type 0 Font Program in Compact Font Format
|
40
42
|
# 3) OpenType: OpenType Font Program
|
41
43
|
# see Section 9.9, PDF 32000-1:2008, pp 288-292
|
42
|
-
@font_program_stream = ohash.
|
44
|
+
@font_program_stream = ohash.deref_stream(fd_hash[:FontFile2])
|
43
45
|
#TODO handle FontFile and FontFile3
|
44
46
|
|
45
47
|
@is_ttf = true if @font_program_stream
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
require 'digest/md5'
|
@@ -14,15 +15,24 @@ module PDF
|
|
14
15
|
# This behaves and looks much like a limited PDF::Reader::Page class.
|
15
16
|
#
|
16
17
|
class FormXObject
|
17
|
-
|
18
|
+
extend Forwardable
|
18
19
|
|
19
20
|
attr_reader :xobject
|
20
21
|
|
22
|
+
def_delegators :resources, :color_spaces
|
23
|
+
def_delegators :resources, :fonts
|
24
|
+
def_delegators :resources, :graphic_states
|
25
|
+
def_delegators :resources, :patterns
|
26
|
+
def_delegators :resources, :procedure_sets
|
27
|
+
def_delegators :resources, :properties
|
28
|
+
def_delegators :resources, :shadings
|
29
|
+
def_delegators :resources, :xobjects
|
30
|
+
|
21
31
|
def initialize(page, xobject, options = {})
|
22
32
|
@page = page
|
23
33
|
@objects = page.objects
|
24
34
|
@cache = options[:cache] || {}
|
25
|
-
@xobject = @objects.
|
35
|
+
@xobject = @objects.deref_stream(xobject)
|
26
36
|
end
|
27
37
|
|
28
38
|
# return a hash of fonts used on this form.
|
@@ -33,9 +43,9 @@ module PDF
|
|
33
43
|
# to most available metrics for each font.
|
34
44
|
#
|
35
45
|
def font_objects
|
36
|
-
raw_fonts = @objects.
|
46
|
+
raw_fonts = @objects.deref_hash(fonts)
|
37
47
|
::Hash[raw_fonts.map { |label, font|
|
38
|
-
[label, PDF::Reader::Font.new(@objects, @objects.
|
48
|
+
[label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font))]
|
39
49
|
}]
|
40
50
|
end
|
41
51
|
|
@@ -60,7 +70,7 @@ module PDF
|
|
60
70
|
# Returns the resources that accompany this form.
|
61
71
|
#
|
62
72
|
def resources
|
63
|
-
@resources ||= @objects.
|
73
|
+
@resources ||= Resources.new(@objects, @objects.deref_hash(@xobject.hash[:Resources]) || {})
|
64
74
|
end
|
65
75
|
|
66
76
|
def callback(receivers, name, params=[])
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
################################################################################
|
@@ -103,19 +104,25 @@ class PDF::Reader
|
|
103
104
|
|
104
105
|
# returns a hash that maps glyph names to unicode codepoints. The mapping is based on
|
105
106
|
# a text file supplied by Adobe at:
|
106
|
-
#
|
107
|
+
# https://github.com/adobe-type-tools/agl-aglfn
|
107
108
|
def load_adobe_glyph_mapping
|
108
109
|
keyed_by_name = {}
|
109
110
|
keyed_by_codepoint = {}
|
110
111
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
112
|
+
paths = [
|
113
|
+
File.dirname(__FILE__) + "/glyphlist.txt",
|
114
|
+
File.dirname(__FILE__) + "/glyphlist-zapfdingbats.txt",
|
115
|
+
]
|
116
|
+
paths.each do |path|
|
117
|
+
File.open(path, "r:BINARY") do |f|
|
118
|
+
f.each do |l|
|
119
|
+
_m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
120
|
+
if name && code
|
121
|
+
cp = "0x#{code}".hex
|
122
|
+
keyed_by_name[name.to_sym] = cp
|
123
|
+
keyed_by_codepoint[cp] ||= []
|
124
|
+
keyed_by_codepoint[cp] << name.to_sym
|
125
|
+
end
|
119
126
|
end
|
120
127
|
end
|
121
128
|
end
|