pdf-reader 2.5.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +42 -0
  3. data/README.md +16 -1
  4. data/Rakefile +1 -1
  5. data/examples/extract_fonts.rb +12 -7
  6. data/examples/rspec.rb +1 -0
  7. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  8. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  9. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  10. data/lib/pdf/reader/buffer.rb +90 -46
  11. data/lib/pdf/reader/cid_widths.rb +1 -0
  12. data/lib/pdf/reader/cmap.rb +65 -50
  13. data/lib/pdf/reader/encoding.rb +3 -2
  14. data/lib/pdf/reader/error.rb +19 -3
  15. data/lib/pdf/reader/filter/ascii85.rb +7 -1
  16. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  17. data/lib/pdf/reader/filter/depredict.rb +11 -9
  18. data/lib/pdf/reader/filter/flate.rb +4 -2
  19. data/lib/pdf/reader/filter/lzw.rb +2 -0
  20. data/lib/pdf/reader/filter/null.rb +1 -1
  21. data/lib/pdf/reader/filter/run_length.rb +19 -13
  22. data/lib/pdf/reader/filter.rb +2 -1
  23. data/lib/pdf/reader/font.rb +72 -16
  24. data/lib/pdf/reader/font_descriptor.rb +19 -17
  25. data/lib/pdf/reader/form_xobject.rb +15 -5
  26. data/lib/pdf/reader/glyph_hash.rb +16 -9
  27. data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
  28. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  29. data/lib/pdf/reader/lzw.rb +4 -2
  30. data/lib/pdf/reader/null_security_handler.rb +1 -4
  31. data/lib/pdf/reader/object_cache.rb +1 -0
  32. data/lib/pdf/reader/object_hash.rb +252 -44
  33. data/lib/pdf/reader/object_stream.rb +1 -0
  34. data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
  35. data/lib/pdf/reader/page.rb +99 -19
  36. data/lib/pdf/reader/page_layout.rb +36 -37
  37. data/lib/pdf/reader/page_state.rb +12 -11
  38. data/lib/pdf/reader/page_text_receiver.rb +57 -10
  39. data/lib/pdf/reader/pages_strategy.rb +1 -0
  40. data/lib/pdf/reader/parser.rb +23 -12
  41. data/lib/pdf/reader/point.rb +25 -0
  42. data/lib/pdf/reader/print_receiver.rb +1 -0
  43. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  44. data/lib/pdf/reader/rectangle.rb +113 -0
  45. data/lib/pdf/reader/reference.rb +1 -0
  46. data/lib/pdf/reader/register_receiver.rb +1 -0
  47. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +16 -9
  48. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  49. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  50. data/lib/pdf/reader/stream.rb +2 -1
  51. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  52. data/lib/pdf/reader/text_run.rb +14 -6
  53. data/lib/pdf/reader/token.rb +1 -0
  54. data/lib/pdf/reader/transformation_matrix.rb +1 -0
  55. data/lib/pdf/reader/type_check.rb +52 -0
  56. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  57. data/lib/pdf/reader/validating_receiver.rb +262 -0
  58. data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
  59. data/lib/pdf/reader/width_calculator/composite.rb +1 -0
  60. data/lib/pdf/reader/width_calculator/true_type.rb +2 -1
  61. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
  62. data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
  63. data/lib/pdf/reader/width_calculator.rb +1 -0
  64. data/lib/pdf/reader/xref.rb +27 -4
  65. data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
  66. data/lib/pdf/reader.rb +46 -15
  67. data/lib/pdf-reader.rb +1 -0
  68. data/rbi/pdf-reader.rbi +1978 -0
  69. metadata +21 -10
  70. data/lib/pdf/reader/orientation_detector.rb +0 -34
  71. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -68,7 +69,7 @@ class PDF::Reader
68
69
  #
69
70
  # [25, :A, :B]
70
71
  def differences=(diff)
71
- raise ArgumentError, "diff must be an array" unless diff.kind_of?(Array)
72
+ PDF::Reader::Error.validate_type(diff, "diff", Array)
72
73
 
73
74
  @differences = {}
74
75
  byte = 0
@@ -208,7 +209,7 @@ class PDF::Reader
208
209
  def load_mapping(file)
209
210
  File.open(file, "r:BINARY") do |f|
210
211
  f.each do |l|
211
- _m, single_byte, unicode = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
212
+ _m, single_byte, unicode = *l.match(/\A([0-9A-Za-z]+);([0-9A-F]{4})/)
212
213
  @mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
213
214
  end
214
215
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -33,19 +34,34 @@ class PDF::Reader
33
34
  def self.str_assert(lvalue, rvalue, chars=nil)
34
35
  raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
35
36
  lvalue = lvalue[0,chars] if chars
36
- raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue != rvalue
37
+ raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue != rvalue
37
38
  end
38
39
  ################################################################################
39
40
  def self.str_assert_not(lvalue, rvalue, chars=nil)
40
41
  raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
41
42
  lvalue = lvalue[0,chars] if chars
42
- raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue == rvalue
43
+ raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue == rvalue
43
44
  end
44
45
  ################################################################################
45
46
  def self.assert_equal(lvalue, rvalue)
46
- raise MalformedPDFError, "PDF malformed, expected #{rvalue} but found #{lvalue} instead" if lvalue != rvalue
47
+ raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue != rvalue
47
48
  end
48
49
  ################################################################################
50
+ def self.validate_type(object, name, klass)
51
+ raise ArgumentError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
52
+ end
53
+ ################################################################################
54
+ def self.validate_type_as_malformed(object, name, klass)
55
+ raise MalformedPDFError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
56
+ end
57
+ ################################################################################
58
+ def self.validate_not_nil(object, name)
59
+ raise ArgumentError, "#{object} must not be nil" if object.nil?
60
+ end
61
+ ################################################################################
62
+ def self.validate_not_nil_as_malformed(object, name)
63
+ raise MalformedPDFError, "#{object} must not be nil" if object.nil?
64
+ end
49
65
  end
50
66
 
51
67
  ################################################################################
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'ascii85'
@@ -7,6 +8,7 @@ class PDF::Reader
7
8
  module Filter # :nodoc:
8
9
  # implementation of the Ascii85 filter
9
10
  class Ascii85
11
+
10
12
  def initialize(options = {})
11
13
  @options = options
12
14
  end
@@ -17,7 +19,11 @@ class PDF::Reader
17
19
  #
18
20
  def filter(data)
19
21
  data = "<~#{data}" unless data.to_s[0,2] == "<~"
20
- ::Ascii85::decode(data)
22
+ if defined?(::Ascii85Native)
23
+ ::Ascii85Native::decode(data)
24
+ else
25
+ ::Ascii85::decode(data)
26
+ end
21
27
  rescue Exception => e
22
28
  # Oops, there was a problem decoding the stream
23
29
  raise MalformedPDFError,
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  #
@@ -6,6 +7,7 @@ class PDF::Reader
6
7
  module Filter # :nodoc:
7
8
  # implementation of the AsciiHex stream filter
8
9
  class AsciiHex
10
+
9
11
  def initialize(options = {})
10
12
  @options = options
11
13
  end
@@ -16,9 +18,12 @@ class PDF::Reader
16
18
  def filter(data)
17
19
  data.chop! if data[-1,1] == ">"
18
20
  data = data[1,data.size] if data[0,1] == "<"
21
+
22
+ return "" if data.nil?
23
+
19
24
  data.gsub!(/[^A-Fa-f0-9]/,"")
20
25
  data << "0" if data.size % 2 == 1
21
- data.scan(/.{2}/).map { |s| s.hex.chr }.join("")
26
+ data.scan(/.{2}/).flatten.map { |s| s.hex.chr }.join("")
22
27
  rescue Exception => e
23
28
  # Oops, there was a problem decoding the stream
24
29
  raise MalformedPDFError,
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -6,8 +7,9 @@ class PDF::Reader
6
7
  # some filter implementations support preprocessing of the data to
7
8
  # improve compression
8
9
  class Depredict
10
+
9
11
  def initialize(options = {})
10
- @options = options || {}
12
+ @options = options
11
13
  end
12
14
 
13
15
  ################################################################################
@@ -34,7 +36,7 @@ class PDF::Reader
34
36
  ################################################################################
35
37
  def tiff_depredict(data)
36
38
  data = data.unpack("C*")
37
- unfiltered = []
39
+ unfiltered = ''
38
40
  bpc = @options[:BitsPerComponent] || 8
39
41
  pixel_bits = bpc * @options[:Colors]
40
42
  pixel_bytes = pixel_bits / 8
@@ -51,11 +53,11 @@ class PDF::Reader
51
53
  left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
52
54
  row_data[index] = (byte + left) % 256
53
55
  end
54
- unfiltered += row_data
56
+ unfiltered += row_data.pack("C*")
55
57
  pos += line_len
56
58
  end
57
59
 
58
- unfiltered.pack("C*")
60
+ unfiltered
59
61
  end
60
62
  ################################################################################
61
63
  def png_depredict(data)
@@ -67,7 +69,7 @@ class PDF::Reader
67
69
  scanline_length = (pixel_bytes * @options[:Columns]) + 1
68
70
  row = 0
69
71
  pixels = []
70
- paeth, pa, pb, pc = nil
72
+ paeth, pa, pb, pc = 0, 0, 0, 0
71
73
  until data.empty? do
72
74
  row_data = data.slice! 0, scanline_length
73
75
  filter = row_data.shift
@@ -94,17 +96,17 @@ class PDF::Reader
94
96
  row_data[index] = (byte + ((left + upper)/2).floor) % 256
95
97
  end
96
98
  when 4 # Paeth
97
- left = upper = upper_left = nil
99
+ left = upper = upper_left = 0
98
100
  row_data.each_with_index do |byte, index|
99
101
  col = index / pixel_bytes
100
102
 
101
- left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
103
+ left = index < pixel_bytes ? 0 : Integer(row_data[index - pixel_bytes])
102
104
  if row.zero?
103
105
  upper = upper_left = 0
104
106
  else
105
- upper = pixels[row-1][col][index % pixel_bytes]
107
+ upper = Integer(pixels[row-1][col][index % pixel_bytes])
106
108
  upper_left = col.zero? ? 0 :
107
- pixels[row-1][col-1][index % pixel_bytes]
109
+ Integer(pixels[row-1][col-1][index % pixel_bytes])
108
110
  end
109
111
 
110
112
  p = left + upper - upper_left
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
 
@@ -8,6 +9,7 @@ class PDF::Reader
8
9
  module Filter # :nodoc:
9
10
  # implementation of the Flate (zlib) stream filter
10
11
  class Flate
12
+
11
13
  ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 # Zlib::MAX_WBITS + 32
12
14
  ZLIB_RAW_DEFLATE = -15 # Zlib::MAX_WBITS * -1
13
15
 
@@ -32,7 +34,7 @@ class PDF::Reader
32
34
  def zlib_inflate(data)
33
35
  begin
34
36
  return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
35
- rescue Zlib::DataError
37
+ rescue Zlib::Error
36
38
  # by default, Ruby's Zlib assumes the data it's inflating
37
39
  # is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
38
40
  # fails, swallow the exception and attempt to inflate the data as a raw
@@ -41,7 +43,7 @@ class PDF::Reader
41
43
 
42
44
  begin
43
45
  return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
44
- rescue StandardError
46
+ rescue Zlib::Error
45
47
  # swallow this one too, so we can try some other fallback options
46
48
  end
47
49
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  #
@@ -6,6 +7,7 @@ class PDF::Reader
6
7
  module Filter # :nodoc:
7
8
  # implementation of the LZW stream filter
8
9
  class Lzw
10
+
9
11
  def initialize(options = {})
10
12
  @options = options
11
13
  end
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
- #
5
5
  class PDF::Reader
6
6
  module Filter # :nodoc:
7
7
  # implementation of the null stream filter
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  #
@@ -6,6 +7,7 @@ class PDF::Reader # :nodoc:
6
7
  module Filter # :nodoc:
7
8
  # implementation of the run length stream filter
8
9
  class RunLength
10
+
9
11
  def initialize(options = {})
10
12
  @options = options
11
13
  end
@@ -20,19 +22,23 @@ class PDF::Reader # :nodoc:
20
22
  length = data.getbyte(pos)
21
23
  pos += 1
22
24
 
23
- case
24
- when length == 128
25
- break
26
- when length < 128
27
- # When the length is < 128, we copy the following length+1 bytes
28
- # literally.
29
- out << data[pos, length + 1]
30
- pos += length
31
- else
32
- # When the length is > 128, we copy the next byte (257 - length)
33
- # times; i.e., "\xFA\x00" ([250, 0]) will expand to
34
- # "\x00\x00\x00\x00\x00\x00\x00".
35
- out << data[pos, 1] * (257 - length)
25
+ unless length.nil?
26
+ case
27
+ # nothing
28
+ when length == 128
29
+ break
30
+ when length < 128
31
+ # When the length is < 128, we copy the following length+1 bytes
32
+ # literally.
33
+ out << data[pos, length + 1]
34
+ pos += length
35
+ else
36
+ # When the length is > 128, we copy the next byte (257 - length)
37
+ # times; i.e., "\xFA\x00" ([250, 0]) will expand to
38
+ # "\x00\x00\x00\x00\x00\x00\x00".
39
+ previous_byte = data[pos, 1] || ""
40
+ out << previous_byte * (257 - length)
41
+ end
36
42
  end
37
43
 
38
44
  pos += 1
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -41,7 +42,7 @@ class PDF::Reader
41
42
  # returned untouched. At this stage PDF::Reader has no need to decode images.
42
43
  #
43
44
  def self.with(name, options = {})
44
- case name.to_sym
45
+ case name
45
46
  when :ASCII85Decode then PDF::Reader::Filter::Ascii85.new(options)
46
47
  when :ASCIIHexDecode then PDF::Reader::Filter::AsciiHex.new(options)
47
48
  when :CCITTFaxDecode then PDF::Reader::Filter::Null.new(options)
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -42,6 +43,7 @@ class PDF::Reader
42
43
  @tounicode = nil
43
44
 
44
45
  extract_base_info(obj)
46
+ extract_type3_info(obj)
45
47
  extract_descriptor(obj)
46
48
  extract_descendants(obj)
47
49
  @width_calc = build_width_calculator
@@ -72,8 +74,44 @@ class PDF::Reader
72
74
  @cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
73
75
  end
74
76
 
77
+ # In most cases glyph width is converted into text space with a simple divide by 1000.
78
+ #
79
+ # However, Type3 fonts provide their own FontMatrix that's used for the transformation.
80
+ #
81
+ def glyph_width_in_text_space(code_point)
82
+ glyph_width_in_glyph_space = glyph_width(code_point)
83
+
84
+ if @subtype == :Type3
85
+ x1, y1 = font_matrix_transform(0,0)
86
+ x2, y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
87
+ (x2 - x1).abs.round(2)
88
+ else
89
+ glyph_width_in_glyph_space / 1000.0
90
+ end
91
+ end
92
+
75
93
  private
76
94
 
95
+ # Only valid for Type3 fonts
96
+ def font_matrix_transform(x, y)
97
+ return x, y if @font_matrix.nil?
98
+
99
+ matrix = TransformationMatrix.new(
100
+ @font_matrix[0], @font_matrix[1],
101
+ @font_matrix[2], @font_matrix[3],
102
+ @font_matrix[4], @font_matrix[5],
103
+ )
104
+
105
+ if x == 0 && y == 0
106
+ [matrix.e, matrix.f]
107
+ else
108
+ [
109
+ (matrix.a * x) + (matrix.c * y) + (matrix.e),
110
+ (matrix.b * x) + (matrix.d * y) + (matrix.f)
111
+ ]
112
+ end
113
+ end
114
+
77
115
  def default_encoding(font_name)
78
116
  case font_name.to_s
79
117
  when "Symbol" then
@@ -111,37 +149,55 @@ class PDF::Reader
111
149
  end
112
150
  end
113
151
 
114
- def extract_base_info(obj)
115
- @subtype = @ohash.object(obj[:Subtype])
116
- @basefont = @ohash.object(obj[:BaseFont])
117
- if @ohash.object(obj[:Encoding])
118
- @encoding = PDF::Reader::Encoding.new(@ohash.object(obj[:Encoding]))
152
+ def build_encoding(obj)
153
+ if obj[:Encoding].is_a?(Symbol)
154
+ # one of the standard encodings, referenced by name
155
+ # TODO pass in a standard shape, always a Hash
156
+ PDF::Reader::Encoding.new(obj[:Encoding])
157
+ elsif obj[:Encoding].is_a?(Hash) || obj[:Encoding].is_a?(PDF::Reader::Stream)
158
+ PDF::Reader::Encoding.new(obj[:Encoding])
159
+ elsif obj[:Encoding].nil?
160
+ default_encoding(@basefont)
119
161
  else
120
- @encoding = default_encoding(@basefont)
162
+ raise MalformedPDFError, "Unexpected type for Encoding (#{obj[:Encoding].class})"
121
163
  end
122
- @widths = @ohash.object(obj[:Widths]) || []
123
- @first_char = @ohash.object(obj[:FirstChar])
124
- @last_char = @ohash.object(obj[:LastChar])
164
+ end
165
+
166
+ def extract_base_info(obj)
167
+ @subtype = @ohash.deref_name(obj[:Subtype])
168
+ @basefont = @ohash.deref_name(obj[:BaseFont])
169
+ @encoding = build_encoding(obj)
170
+ @widths = @ohash.deref_array_of_numbers(obj[:Widths]) || []
171
+ @first_char = @ohash.deref_integer(obj[:FirstChar])
172
+ @last_char = @ohash.deref_integer(obj[:LastChar])
125
173
 
126
174
  # CID Fonts are not required to have a W or DW entry, if they don't exist,
127
175
  # the default cid width = 1000, see Section 9.7.4.1 PDF 32000-1:2008 pp 269
128
- @cid_widths = @ohash.object(obj[:W]) || []
129
- @cid_default_width = @ohash.object(obj[:DW]) || 1000
176
+ @cid_widths = @ohash.deref_array(obj[:W]) || []
177
+ @cid_default_width = @ohash.deref_number(obj[:DW]) || 1000
130
178
 
131
179
  if obj[:ToUnicode]
132
180
  # ToUnicode is optional for Type1 and Type3
133
- stream = @ohash.object(obj[:ToUnicode])
134
- if stream.is_a?(PDF::Reader::Stream)
181
+ stream = @ohash.deref_stream(obj[:ToUnicode])
182
+ if stream
135
183
  @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
136
184
  end
137
185
  end
138
186
  end
139
187
 
188
+ def extract_type3_info(obj)
189
+ if @subtype == :Type3
190
+ @font_matrix = @ohash.deref_array_of_numbers(obj[:FontMatrix]) || [
191
+ 0.001, 0, 0, 0.001, 0, 0
192
+ ]
193
+ end
194
+ end
195
+
140
196
  def extract_descriptor(obj)
141
197
  if obj[:FontDescriptor]
142
198
  # create a font descriptor object if we can, in other words, unless this is
143
199
  # a CID Font
144
- fd = @ohash.object(obj[:FontDescriptor])
200
+ fd = @ohash.deref_hash(obj[:FontDescriptor])
145
201
  @font_descriptor = PDF::Reader::FontDescriptor.new(@ohash, fd)
146
202
  else
147
203
  @font_descriptor = nil
@@ -153,9 +209,9 @@ class PDF::Reader
153
209
  # per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
154
210
  # A one-element array specifying the CIDFont dictionary that is the
155
211
  # descendant of this Type 0 font.
156
- descendants = @ohash.object(obj[:DescendantFonts])
212
+ descendants = @ohash.deref_array(obj[:DescendantFonts])
157
213
  @descendantfonts = descendants.map { |desc|
158
- PDF::Reader::Font.new(@ohash, @ohash.object(desc))
214
+ PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc))
159
215
  }
160
216
  end
161
217
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'ttfunk'
@@ -14,22 +15,23 @@ class PDF::Reader
14
15
  :x_height, :font_flags
15
16
 
16
17
  def initialize(ohash, fd_hash)
17
- @ascent = ohash.object(fd_hash[:Ascent]) || 0
18
- @descent = ohash.object(fd_hash[:Descent]) || 0
19
- @missing_width = ohash.object(fd_hash[:MissingWidth]) || 0
20
- @font_bounding_box = ohash.object(fd_hash[:FontBBox]) || [0,0,0,0]
21
- @avg_width = ohash.object(fd_hash[:AvgWidth]) || 0
22
- @cap_height = ohash.object(fd_hash[:CapHeight]) || 0
23
- @font_flags = ohash.object(fd_hash[:Flags]) || 0
24
- @italic_angle = ohash.object(fd_hash[:ItalicAngle])
25
- @font_name = ohash.object(fd_hash[:FontName]).to_s
26
- @leading = ohash.object(fd_hash[:Leading]) || 0
27
- @max_width = ohash.object(fd_hash[:MaxWidth]) || 0
28
- @stem_v = ohash.object(fd_hash[:StemV])
29
- @x_height = ohash.object(fd_hash[:XHeight])
30
- @font_stretch = ohash.object(fd_hash[:FontStretch]) || :Normal
31
- @font_weight = ohash.object(fd_hash[:FontWeight]) || 400
32
- @font_family = ohash.object(fd_hash[:FontFamily])
18
+ # TODO change these to typed derefs
19
+ @ascent = ohash.deref_number(fd_hash[:Ascent]) || 0
20
+ @descent = ohash.deref_number(fd_hash[:Descent]) || 0
21
+ @missing_width = ohash.deref_number(fd_hash[:MissingWidth]) || 0
22
+ @font_bounding_box = ohash.deref_array_of_numbers(fd_hash[:FontBBox]) || [0,0,0,0]
23
+ @avg_width = ohash.deref_number(fd_hash[:AvgWidth]) || 0
24
+ @cap_height = ohash.deref_number(fd_hash[:CapHeight]) || 0
25
+ @font_flags = ohash.deref_integer(fd_hash[:Flags]) || 0
26
+ @italic_angle = ohash.deref_number(fd_hash[:ItalicAngle])
27
+ @font_name = ohash.deref_name(fd_hash[:FontName]).to_s
28
+ @leading = ohash.deref_number(fd_hash[:Leading]) || 0
29
+ @max_width = ohash.deref_number(fd_hash[:MaxWidth]) || 0
30
+ @stem_v = ohash.deref_number(fd_hash[:StemV])
31
+ @x_height = ohash.deref_number(fd_hash[:XHeight])
32
+ @font_stretch = ohash.deref_name(fd_hash[:FontStretch]) || :Normal
33
+ @font_weight = ohash.deref_number(fd_hash[:FontWeight]) || 400
34
+ @font_family = ohash.deref_string(fd_hash[:FontFamily])
33
35
 
34
36
  # A FontDescriptor may have an embedded font program in FontFile
35
37
  # (Type 1 Font Program), FontFile2 (TrueType font program), or
@@ -39,7 +41,7 @@ class PDF::Reader
39
41
  # 2) CIDFontType0C: Type 0 Font Program in Compact Font Format
40
42
  # 3) OpenType: OpenType Font Program
41
43
  # see Section 9.9, PDF 32000-1:2008, pp 288-292
42
- @font_program_stream = ohash.object(fd_hash[:FontFile2])
44
+ @font_program_stream = ohash.deref_stream(fd_hash[:FontFile2])
43
45
  #TODO handle FontFile and FontFile3
44
46
 
45
47
  @is_ttf = true if @font_program_stream
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'digest/md5'
@@ -14,15 +15,24 @@ module PDF
14
15
  # This behaves and looks much like a limited PDF::Reader::Page class.
15
16
  #
16
17
  class FormXObject
17
- include ResourceMethods
18
+ extend Forwardable
18
19
 
19
20
  attr_reader :xobject
20
21
 
22
+ def_delegators :resources, :color_spaces
23
+ def_delegators :resources, :fonts
24
+ def_delegators :resources, :graphic_states
25
+ def_delegators :resources, :patterns
26
+ def_delegators :resources, :procedure_sets
27
+ def_delegators :resources, :properties
28
+ def_delegators :resources, :shadings
29
+ def_delegators :resources, :xobjects
30
+
21
31
  def initialize(page, xobject, options = {})
22
32
  @page = page
23
33
  @objects = page.objects
24
34
  @cache = options[:cache] || {}
25
- @xobject = @objects.deref(xobject)
35
+ @xobject = @objects.deref_stream(xobject)
26
36
  end
27
37
 
28
38
  # return a hash of fonts used on this form.
@@ -33,9 +43,9 @@ module PDF
33
43
  # to most available metrics for each font.
34
44
  #
35
45
  def font_objects
36
- raw_fonts = @objects.deref(resources[:Font] || {})
46
+ raw_fonts = @objects.deref_hash(fonts)
37
47
  ::Hash[raw_fonts.map { |label, font|
38
- [label, PDF::Reader::Font.new(@objects, @objects.deref(font))]
48
+ [label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font))]
39
49
  }]
40
50
  end
41
51
 
@@ -60,7 +70,7 @@ module PDF
60
70
  # Returns the resources that accompany this form.
61
71
  #
62
72
  def resources
63
- @resources ||= @objects.deref(@xobject.hash[:Resources]) || {}
73
+ @resources ||= Resources.new(@objects, @objects.deref_hash(@xobject.hash[:Resources]) || {})
64
74
  end
65
75
 
66
76
  def callback(receivers, name, params=[])
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -103,19 +104,25 @@ class PDF::Reader
103
104
 
104
105
  # returns a hash that maps glyph names to unicode codepoints. The mapping is based on
105
106
  # a text file supplied by Adobe at:
106
- # http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
107
+ # https://github.com/adobe-type-tools/agl-aglfn
107
108
  def load_adobe_glyph_mapping
108
109
  keyed_by_name = {}
109
110
  keyed_by_codepoint = {}
110
111
 
111
- File.open(File.dirname(__FILE__) + "/glyphlist.txt", "r:BINARY") do |f|
112
- f.each do |l|
113
- _m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
114
- if name && code
115
- cp = "0x#{code}".hex
116
- keyed_by_name[name.to_sym] = cp
117
- keyed_by_codepoint[cp] ||= []
118
- keyed_by_codepoint[cp] << name.to_sym
112
+ paths = [
113
+ File.dirname(__FILE__) + "/glyphlist.txt",
114
+ File.dirname(__FILE__) + "/glyphlist-zapfdingbats.txt",
115
+ ]
116
+ paths.each do |path|
117
+ File.open(path, "r:BINARY") do |f|
118
+ f.each do |l|
119
+ _m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
120
+ if name && code
121
+ cp = "0x#{code}".hex
122
+ keyed_by_name[name.to_sym] = cp
123
+ keyed_by_codepoint[cp] ||= []
124
+ keyed_by_codepoint[cp] << name.to_sym
125
+ end
119
126
  end
120
127
  end
121
128
  end