pdf-reader 2.5.0 → 2.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +42 -0
  3. data/README.md +16 -1
  4. data/Rakefile +1 -1
  5. data/examples/extract_fonts.rb +12 -7
  6. data/examples/rspec.rb +1 -0
  7. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  8. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  9. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  10. data/lib/pdf/reader/buffer.rb +90 -46
  11. data/lib/pdf/reader/cid_widths.rb +1 -0
  12. data/lib/pdf/reader/cmap.rb +65 -50
  13. data/lib/pdf/reader/encoding.rb +3 -2
  14. data/lib/pdf/reader/error.rb +19 -3
  15. data/lib/pdf/reader/filter/ascii85.rb +7 -1
  16. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  17. data/lib/pdf/reader/filter/depredict.rb +11 -9
  18. data/lib/pdf/reader/filter/flate.rb +4 -2
  19. data/lib/pdf/reader/filter/lzw.rb +2 -0
  20. data/lib/pdf/reader/filter/null.rb +1 -1
  21. data/lib/pdf/reader/filter/run_length.rb +19 -13
  22. data/lib/pdf/reader/filter.rb +2 -1
  23. data/lib/pdf/reader/font.rb +72 -16
  24. data/lib/pdf/reader/font_descriptor.rb +19 -17
  25. data/lib/pdf/reader/form_xobject.rb +15 -5
  26. data/lib/pdf/reader/glyph_hash.rb +16 -9
  27. data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
  28. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  29. data/lib/pdf/reader/lzw.rb +4 -2
  30. data/lib/pdf/reader/null_security_handler.rb +1 -4
  31. data/lib/pdf/reader/object_cache.rb +1 -0
  32. data/lib/pdf/reader/object_hash.rb +252 -44
  33. data/lib/pdf/reader/object_stream.rb +1 -0
  34. data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
  35. data/lib/pdf/reader/page.rb +99 -19
  36. data/lib/pdf/reader/page_layout.rb +36 -37
  37. data/lib/pdf/reader/page_state.rb +12 -11
  38. data/lib/pdf/reader/page_text_receiver.rb +57 -10
  39. data/lib/pdf/reader/pages_strategy.rb +1 -0
  40. data/lib/pdf/reader/parser.rb +23 -12
  41. data/lib/pdf/reader/point.rb +25 -0
  42. data/lib/pdf/reader/print_receiver.rb +1 -0
  43. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  44. data/lib/pdf/reader/rectangle.rb +113 -0
  45. data/lib/pdf/reader/reference.rb +1 -0
  46. data/lib/pdf/reader/register_receiver.rb +1 -0
  47. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +16 -9
  48. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  49. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  50. data/lib/pdf/reader/stream.rb +2 -1
  51. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  52. data/lib/pdf/reader/text_run.rb +14 -6
  53. data/lib/pdf/reader/token.rb +1 -0
  54. data/lib/pdf/reader/transformation_matrix.rb +1 -0
  55. data/lib/pdf/reader/type_check.rb +52 -0
  56. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  57. data/lib/pdf/reader/validating_receiver.rb +262 -0
  58. data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
  59. data/lib/pdf/reader/width_calculator/composite.rb +1 -0
  60. data/lib/pdf/reader/width_calculator/true_type.rb +2 -1
  61. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
  62. data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
  63. data/lib/pdf/reader/width_calculator.rb +1 -0
  64. data/lib/pdf/reader/xref.rb +27 -4
  65. data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
  66. data/lib/pdf/reader.rb +46 -15
  67. data/lib/pdf-reader.rb +1 -0
  68. data/rbi/pdf-reader.rbi +1978 -0
  69. metadata +21 -10
  70. data/lib/pdf/reader/orientation_detector.rb +0 -34
  71. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -68,7 +69,7 @@ class PDF::Reader
68
69
  #
69
70
  # [25, :A, :B]
70
71
  def differences=(diff)
71
- raise ArgumentError, "diff must be an array" unless diff.kind_of?(Array)
72
+ PDF::Reader::Error.validate_type(diff, "diff", Array)
72
73
 
73
74
  @differences = {}
74
75
  byte = 0
@@ -208,7 +209,7 @@ class PDF::Reader
208
209
  def load_mapping(file)
209
210
  File.open(file, "r:BINARY") do |f|
210
211
  f.each do |l|
211
- _m, single_byte, unicode = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
212
+ _m, single_byte, unicode = *l.match(/\A([0-9A-Za-z]+);([0-9A-F]{4})/)
212
213
  @mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
213
214
  end
214
215
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -33,19 +34,34 @@ class PDF::Reader
33
34
  def self.str_assert(lvalue, rvalue, chars=nil)
34
35
  raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
35
36
  lvalue = lvalue[0,chars] if chars
36
- raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue != rvalue
37
+ raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue != rvalue
37
38
  end
38
39
  ################################################################################
39
40
  def self.str_assert_not(lvalue, rvalue, chars=nil)
40
41
  raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
41
42
  lvalue = lvalue[0,chars] if chars
42
- raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue == rvalue
43
+ raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue == rvalue
43
44
  end
44
45
  ################################################################################
45
46
  def self.assert_equal(lvalue, rvalue)
46
- raise MalformedPDFError, "PDF malformed, expected #{rvalue} but found #{lvalue} instead" if lvalue != rvalue
47
+ raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue != rvalue
47
48
  end
48
49
  ################################################################################
50
+ def self.validate_type(object, name, klass)
51
+ raise ArgumentError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
52
+ end
53
+ ################################################################################
54
+ def self.validate_type_as_malformed(object, name, klass)
55
+ raise MalformedPDFError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
56
+ end
57
+ ################################################################################
58
+ def self.validate_not_nil(object, name)
59
+ raise ArgumentError, "#{object} must not be nil" if object.nil?
60
+ end
61
+ ################################################################################
62
+ def self.validate_not_nil_as_malformed(object, name)
63
+ raise MalformedPDFError, "#{object} must not be nil" if object.nil?
64
+ end
49
65
  end
50
66
 
51
67
  ################################################################################
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'ascii85'
@@ -7,6 +8,7 @@ class PDF::Reader
7
8
  module Filter # :nodoc:
8
9
  # implementation of the Ascii85 filter
9
10
  class Ascii85
11
+
10
12
  def initialize(options = {})
11
13
  @options = options
12
14
  end
@@ -17,7 +19,11 @@ class PDF::Reader
17
19
  #
18
20
  def filter(data)
19
21
  data = "<~#{data}" unless data.to_s[0,2] == "<~"
20
- ::Ascii85::decode(data)
22
+ if defined?(::Ascii85Native)
23
+ ::Ascii85Native::decode(data)
24
+ else
25
+ ::Ascii85::decode(data)
26
+ end
21
27
  rescue Exception => e
22
28
  # Oops, there was a problem decoding the stream
23
29
  raise MalformedPDFError,
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  #
@@ -6,6 +7,7 @@ class PDF::Reader
6
7
  module Filter # :nodoc:
7
8
  # implementation of the AsciiHex stream filter
8
9
  class AsciiHex
10
+
9
11
  def initialize(options = {})
10
12
  @options = options
11
13
  end
@@ -16,9 +18,12 @@ class PDF::Reader
16
18
  def filter(data)
17
19
  data.chop! if data[-1,1] == ">"
18
20
  data = data[1,data.size] if data[0,1] == "<"
21
+
22
+ return "" if data.nil?
23
+
19
24
  data.gsub!(/[^A-Fa-f0-9]/,"")
20
25
  data << "0" if data.size % 2 == 1
21
- data.scan(/.{2}/).map { |s| s.hex.chr }.join("")
26
+ data.scan(/.{2}/).flatten.map { |s| s.hex.chr }.join("")
22
27
  rescue Exception => e
23
28
  # Oops, there was a problem decoding the stream
24
29
  raise MalformedPDFError,
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -6,8 +7,9 @@ class PDF::Reader
6
7
  # some filter implementations support preprocessing of the data to
7
8
  # improve compression
8
9
  class Depredict
10
+
9
11
  def initialize(options = {})
10
- @options = options || {}
12
+ @options = options
11
13
  end
12
14
 
13
15
  ################################################################################
@@ -34,7 +36,7 @@ class PDF::Reader
34
36
  ################################################################################
35
37
  def tiff_depredict(data)
36
38
  data = data.unpack("C*")
37
- unfiltered = []
39
+ unfiltered = ''
38
40
  bpc = @options[:BitsPerComponent] || 8
39
41
  pixel_bits = bpc * @options[:Colors]
40
42
  pixel_bytes = pixel_bits / 8
@@ -51,11 +53,11 @@ class PDF::Reader
51
53
  left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
52
54
  row_data[index] = (byte + left) % 256
53
55
  end
54
- unfiltered += row_data
56
+ unfiltered += row_data.pack("C*")
55
57
  pos += line_len
56
58
  end
57
59
 
58
- unfiltered.pack("C*")
60
+ unfiltered
59
61
  end
60
62
  ################################################################################
61
63
  def png_depredict(data)
@@ -67,7 +69,7 @@ class PDF::Reader
67
69
  scanline_length = (pixel_bytes * @options[:Columns]) + 1
68
70
  row = 0
69
71
  pixels = []
70
- paeth, pa, pb, pc = nil
72
+ paeth, pa, pb, pc = 0, 0, 0, 0
71
73
  until data.empty? do
72
74
  row_data = data.slice! 0, scanline_length
73
75
  filter = row_data.shift
@@ -94,17 +96,17 @@ class PDF::Reader
94
96
  row_data[index] = (byte + ((left + upper)/2).floor) % 256
95
97
  end
96
98
  when 4 # Paeth
97
- left = upper = upper_left = nil
99
+ left = upper = upper_left = 0
98
100
  row_data.each_with_index do |byte, index|
99
101
  col = index / pixel_bytes
100
102
 
101
- left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
103
+ left = index < pixel_bytes ? 0 : Integer(row_data[index - pixel_bytes])
102
104
  if row.zero?
103
105
  upper = upper_left = 0
104
106
  else
105
- upper = pixels[row-1][col][index % pixel_bytes]
107
+ upper = Integer(pixels[row-1][col][index % pixel_bytes])
106
108
  upper_left = col.zero? ? 0 :
107
- pixels[row-1][col-1][index % pixel_bytes]
109
+ Integer(pixels[row-1][col-1][index % pixel_bytes])
108
110
  end
109
111
 
110
112
  p = left + upper - upper_left
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
 
@@ -8,6 +9,7 @@ class PDF::Reader
8
9
  module Filter # :nodoc:
9
10
  # implementation of the Flate (zlib) stream filter
10
11
  class Flate
12
+
11
13
  ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 # Zlib::MAX_WBITS + 32
12
14
  ZLIB_RAW_DEFLATE = -15 # Zlib::MAX_WBITS * -1
13
15
 
@@ -32,7 +34,7 @@ class PDF::Reader
32
34
  def zlib_inflate(data)
33
35
  begin
34
36
  return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
35
- rescue Zlib::DataError
37
+ rescue Zlib::Error
36
38
  # by default, Ruby's Zlib assumes the data it's inflating
37
39
  # is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
38
40
  # fails, swallow the exception and attempt to inflate the data as a raw
@@ -41,7 +43,7 @@ class PDF::Reader
41
43
 
42
44
  begin
43
45
  return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
44
- rescue StandardError
46
+ rescue Zlib::Error
45
47
  # swallow this one too, so we can try some other fallback options
46
48
  end
47
49
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  #
@@ -6,6 +7,7 @@ class PDF::Reader
6
7
  module Filter # :nodoc:
7
8
  # implementation of the LZW stream filter
8
9
  class Lzw
10
+
9
11
  def initialize(options = {})
10
12
  @options = options
11
13
  end
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
- #
5
5
  class PDF::Reader
6
6
  module Filter # :nodoc:
7
7
  # implementation of the null stream filter
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  #
@@ -6,6 +7,7 @@ class PDF::Reader # :nodoc:
6
7
  module Filter # :nodoc:
7
8
  # implementation of the run length stream filter
8
9
  class RunLength
10
+
9
11
  def initialize(options = {})
10
12
  @options = options
11
13
  end
@@ -20,19 +22,23 @@ class PDF::Reader # :nodoc:
20
22
  length = data.getbyte(pos)
21
23
  pos += 1
22
24
 
23
- case
24
- when length == 128
25
- break
26
- when length < 128
27
- # When the length is < 128, we copy the following length+1 bytes
28
- # literally.
29
- out << data[pos, length + 1]
30
- pos += length
31
- else
32
- # When the length is > 128, we copy the next byte (257 - length)
33
- # times; i.e., "\xFA\x00" ([250, 0]) will expand to
34
- # "\x00\x00\x00\x00\x00\x00\x00".
35
- out << data[pos, 1] * (257 - length)
25
+ unless length.nil?
26
+ case
27
+ # nothing
28
+ when length == 128
29
+ break
30
+ when length < 128
31
+ # When the length is < 128, we copy the following length+1 bytes
32
+ # literally.
33
+ out << data[pos, length + 1]
34
+ pos += length
35
+ else
36
+ # When the length is > 128, we copy the next byte (257 - length)
37
+ # times; i.e., "\xFA\x00" ([250, 0]) will expand to
38
+ # "\x00\x00\x00\x00\x00\x00\x00".
39
+ previous_byte = data[pos, 1] || ""
40
+ out << previous_byte * (257 - length)
41
+ end
36
42
  end
37
43
 
38
44
  pos += 1
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -41,7 +42,7 @@ class PDF::Reader
41
42
  # returned untouched. At this stage PDF::Reader has no need to decode images.
42
43
  #
43
44
  def self.with(name, options = {})
44
- case name.to_sym
45
+ case name
45
46
  when :ASCII85Decode then PDF::Reader::Filter::Ascii85.new(options)
46
47
  when :ASCIIHexDecode then PDF::Reader::Filter::AsciiHex.new(options)
47
48
  when :CCITTFaxDecode then PDF::Reader::Filter::Null.new(options)
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -42,6 +43,7 @@ class PDF::Reader
42
43
  @tounicode = nil
43
44
 
44
45
  extract_base_info(obj)
46
+ extract_type3_info(obj)
45
47
  extract_descriptor(obj)
46
48
  extract_descendants(obj)
47
49
  @width_calc = build_width_calculator
@@ -72,8 +74,44 @@ class PDF::Reader
72
74
  @cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
73
75
  end
74
76
 
77
+ # In most cases glyph width is converted into text space with a simple divide by 1000.
78
+ #
79
+ # However, Type3 fonts provide their own FontMatrix that's used for the transformation.
80
+ #
81
+ def glyph_width_in_text_space(code_point)
82
+ glyph_width_in_glyph_space = glyph_width(code_point)
83
+
84
+ if @subtype == :Type3
85
+ x1, y1 = font_matrix_transform(0,0)
86
+ x2, y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
87
+ (x2 - x1).abs.round(2)
88
+ else
89
+ glyph_width_in_glyph_space / 1000.0
90
+ end
91
+ end
92
+
75
93
  private
76
94
 
95
+ # Only valid for Type3 fonts
96
+ def font_matrix_transform(x, y)
97
+ return x, y if @font_matrix.nil?
98
+
99
+ matrix = TransformationMatrix.new(
100
+ @font_matrix[0], @font_matrix[1],
101
+ @font_matrix[2], @font_matrix[3],
102
+ @font_matrix[4], @font_matrix[5],
103
+ )
104
+
105
+ if x == 0 && y == 0
106
+ [matrix.e, matrix.f]
107
+ else
108
+ [
109
+ (matrix.a * x) + (matrix.c * y) + (matrix.e),
110
+ (matrix.b * x) + (matrix.d * y) + (matrix.f)
111
+ ]
112
+ end
113
+ end
114
+
77
115
  def default_encoding(font_name)
78
116
  case font_name.to_s
79
117
  when "Symbol" then
@@ -111,37 +149,55 @@ class PDF::Reader
111
149
  end
112
150
  end
113
151
 
114
- def extract_base_info(obj)
115
- @subtype = @ohash.object(obj[:Subtype])
116
- @basefont = @ohash.object(obj[:BaseFont])
117
- if @ohash.object(obj[:Encoding])
118
- @encoding = PDF::Reader::Encoding.new(@ohash.object(obj[:Encoding]))
152
+ def build_encoding(obj)
153
+ if obj[:Encoding].is_a?(Symbol)
154
+ # one of the standard encodings, referenced by name
155
+ # TODO pass in a standard shape, always a Hash
156
+ PDF::Reader::Encoding.new(obj[:Encoding])
157
+ elsif obj[:Encoding].is_a?(Hash) || obj[:Encoding].is_a?(PDF::Reader::Stream)
158
+ PDF::Reader::Encoding.new(obj[:Encoding])
159
+ elsif obj[:Encoding].nil?
160
+ default_encoding(@basefont)
119
161
  else
120
- @encoding = default_encoding(@basefont)
162
+ raise MalformedPDFError, "Unexpected type for Encoding (#{obj[:Encoding].class})"
121
163
  end
122
- @widths = @ohash.object(obj[:Widths]) || []
123
- @first_char = @ohash.object(obj[:FirstChar])
124
- @last_char = @ohash.object(obj[:LastChar])
164
+ end
165
+
166
+ def extract_base_info(obj)
167
+ @subtype = @ohash.deref_name(obj[:Subtype])
168
+ @basefont = @ohash.deref_name(obj[:BaseFont])
169
+ @encoding = build_encoding(obj)
170
+ @widths = @ohash.deref_array_of_numbers(obj[:Widths]) || []
171
+ @first_char = @ohash.deref_integer(obj[:FirstChar])
172
+ @last_char = @ohash.deref_integer(obj[:LastChar])
125
173
 
126
174
  # CID Fonts are not required to have a W or DW entry, if they don't exist,
127
175
  # the default cid width = 1000, see Section 9.7.4.1 PDF 32000-1:2008 pp 269
128
- @cid_widths = @ohash.object(obj[:W]) || []
129
- @cid_default_width = @ohash.object(obj[:DW]) || 1000
176
+ @cid_widths = @ohash.deref_array(obj[:W]) || []
177
+ @cid_default_width = @ohash.deref_number(obj[:DW]) || 1000
130
178
 
131
179
  if obj[:ToUnicode]
132
180
  # ToUnicode is optional for Type1 and Type3
133
- stream = @ohash.object(obj[:ToUnicode])
134
- if stream.is_a?(PDF::Reader::Stream)
181
+ stream = @ohash.deref_stream(obj[:ToUnicode])
182
+ if stream
135
183
  @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
136
184
  end
137
185
  end
138
186
  end
139
187
 
188
+ def extract_type3_info(obj)
189
+ if @subtype == :Type3
190
+ @font_matrix = @ohash.deref_array_of_numbers(obj[:FontMatrix]) || [
191
+ 0.001, 0, 0, 0.001, 0, 0
192
+ ]
193
+ end
194
+ end
195
+
140
196
  def extract_descriptor(obj)
141
197
  if obj[:FontDescriptor]
142
198
  # create a font descriptor object if we can, in other words, unless this is
143
199
  # a CID Font
144
- fd = @ohash.object(obj[:FontDescriptor])
200
+ fd = @ohash.deref_hash(obj[:FontDescriptor])
145
201
  @font_descriptor = PDF::Reader::FontDescriptor.new(@ohash, fd)
146
202
  else
147
203
  @font_descriptor = nil
@@ -153,9 +209,9 @@ class PDF::Reader
153
209
  # per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
154
210
  # A one-element array specifying the CIDFont dictionary that is the
155
211
  # descendant of this Type 0 font.
156
- descendants = @ohash.object(obj[:DescendantFonts])
212
+ descendants = @ohash.deref_array(obj[:DescendantFonts])
157
213
  @descendantfonts = descendants.map { |desc|
158
- PDF::Reader::Font.new(@ohash, @ohash.object(desc))
214
+ PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc))
159
215
  }
160
216
  end
161
217
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'ttfunk'
@@ -14,22 +15,23 @@ class PDF::Reader
14
15
  :x_height, :font_flags
15
16
 
16
17
  def initialize(ohash, fd_hash)
17
- @ascent = ohash.object(fd_hash[:Ascent]) || 0
18
- @descent = ohash.object(fd_hash[:Descent]) || 0
19
- @missing_width = ohash.object(fd_hash[:MissingWidth]) || 0
20
- @font_bounding_box = ohash.object(fd_hash[:FontBBox]) || [0,0,0,0]
21
- @avg_width = ohash.object(fd_hash[:AvgWidth]) || 0
22
- @cap_height = ohash.object(fd_hash[:CapHeight]) || 0
23
- @font_flags = ohash.object(fd_hash[:Flags]) || 0
24
- @italic_angle = ohash.object(fd_hash[:ItalicAngle])
25
- @font_name = ohash.object(fd_hash[:FontName]).to_s
26
- @leading = ohash.object(fd_hash[:Leading]) || 0
27
- @max_width = ohash.object(fd_hash[:MaxWidth]) || 0
28
- @stem_v = ohash.object(fd_hash[:StemV])
29
- @x_height = ohash.object(fd_hash[:XHeight])
30
- @font_stretch = ohash.object(fd_hash[:FontStretch]) || :Normal
31
- @font_weight = ohash.object(fd_hash[:FontWeight]) || 400
32
- @font_family = ohash.object(fd_hash[:FontFamily])
18
+ # TODO change these to typed derefs
19
+ @ascent = ohash.deref_number(fd_hash[:Ascent]) || 0
20
+ @descent = ohash.deref_number(fd_hash[:Descent]) || 0
21
+ @missing_width = ohash.deref_number(fd_hash[:MissingWidth]) || 0
22
+ @font_bounding_box = ohash.deref_array_of_numbers(fd_hash[:FontBBox]) || [0,0,0,0]
23
+ @avg_width = ohash.deref_number(fd_hash[:AvgWidth]) || 0
24
+ @cap_height = ohash.deref_number(fd_hash[:CapHeight]) || 0
25
+ @font_flags = ohash.deref_integer(fd_hash[:Flags]) || 0
26
+ @italic_angle = ohash.deref_number(fd_hash[:ItalicAngle])
27
+ @font_name = ohash.deref_name(fd_hash[:FontName]).to_s
28
+ @leading = ohash.deref_number(fd_hash[:Leading]) || 0
29
+ @max_width = ohash.deref_number(fd_hash[:MaxWidth]) || 0
30
+ @stem_v = ohash.deref_number(fd_hash[:StemV])
31
+ @x_height = ohash.deref_number(fd_hash[:XHeight])
32
+ @font_stretch = ohash.deref_name(fd_hash[:FontStretch]) || :Normal
33
+ @font_weight = ohash.deref_number(fd_hash[:FontWeight]) || 400
34
+ @font_family = ohash.deref_string(fd_hash[:FontFamily])
33
35
 
34
36
  # A FontDescriptor may have an embedded font program in FontFile
35
37
  # (Type 1 Font Program), FontFile2 (TrueType font program), or
@@ -39,7 +41,7 @@ class PDF::Reader
39
41
  # 2) CIDFontType0C: Type 0 Font Program in Compact Font Format
40
42
  # 3) OpenType: OpenType Font Program
41
43
  # see Section 9.9, PDF 32000-1:2008, pp 288-292
42
- @font_program_stream = ohash.object(fd_hash[:FontFile2])
44
+ @font_program_stream = ohash.deref_stream(fd_hash[:FontFile2])
43
45
  #TODO handle FontFile and FontFile3
44
46
 
45
47
  @is_ttf = true if @font_program_stream
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'digest/md5'
@@ -14,15 +15,24 @@ module PDF
14
15
  # This behaves and looks much like a limited PDF::Reader::Page class.
15
16
  #
16
17
  class FormXObject
17
- include ResourceMethods
18
+ extend Forwardable
18
19
 
19
20
  attr_reader :xobject
20
21
 
22
+ def_delegators :resources, :color_spaces
23
+ def_delegators :resources, :fonts
24
+ def_delegators :resources, :graphic_states
25
+ def_delegators :resources, :patterns
26
+ def_delegators :resources, :procedure_sets
27
+ def_delegators :resources, :properties
28
+ def_delegators :resources, :shadings
29
+ def_delegators :resources, :xobjects
30
+
21
31
  def initialize(page, xobject, options = {})
22
32
  @page = page
23
33
  @objects = page.objects
24
34
  @cache = options[:cache] || {}
25
- @xobject = @objects.deref(xobject)
35
+ @xobject = @objects.deref_stream(xobject)
26
36
  end
27
37
 
28
38
  # return a hash of fonts used on this form.
@@ -33,9 +43,9 @@ module PDF
33
43
  # to most available metrics for each font.
34
44
  #
35
45
  def font_objects
36
- raw_fonts = @objects.deref(resources[:Font] || {})
46
+ raw_fonts = @objects.deref_hash(fonts)
37
47
  ::Hash[raw_fonts.map { |label, font|
38
- [label, PDF::Reader::Font.new(@objects, @objects.deref(font))]
48
+ [label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font))]
39
49
  }]
40
50
  end
41
51
 
@@ -60,7 +70,7 @@ module PDF
60
70
  # Returns the resources that accompany this form.
61
71
  #
62
72
  def resources
63
- @resources ||= @objects.deref(@xobject.hash[:Resources]) || {}
73
+ @resources ||= Resources.new(@objects, @objects.deref_hash(@xobject.hash[:Resources]) || {})
64
74
  end
65
75
 
66
76
  def callback(receivers, name, params=[])
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -103,19 +104,25 @@ class PDF::Reader
103
104
 
104
105
  # returns a hash that maps glyph names to unicode codepoints. The mapping is based on
105
106
  # a text file supplied by Adobe at:
106
- # http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
107
+ # https://github.com/adobe-type-tools/agl-aglfn
107
108
  def load_adobe_glyph_mapping
108
109
  keyed_by_name = {}
109
110
  keyed_by_codepoint = {}
110
111
 
111
- File.open(File.dirname(__FILE__) + "/glyphlist.txt", "r:BINARY") do |f|
112
- f.each do |l|
113
- _m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
114
- if name && code
115
- cp = "0x#{code}".hex
116
- keyed_by_name[name.to_sym] = cp
117
- keyed_by_codepoint[cp] ||= []
118
- keyed_by_codepoint[cp] << name.to_sym
112
+ paths = [
113
+ File.dirname(__FILE__) + "/glyphlist.txt",
114
+ File.dirname(__FILE__) + "/glyphlist-zapfdingbats.txt",
115
+ ]
116
+ paths.each do |path|
117
+ File.open(path, "r:BINARY") do |f|
118
+ f.each do |l|
119
+ _m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
120
+ if name && code
121
+ cp = "0x#{code}".hex
122
+ keyed_by_name[name.to_sym] = cp
123
+ keyed_by_codepoint[cp] ||= []
124
+ keyed_by_codepoint[cp] << name.to_sym
125
+ end
119
126
  end
120
127
  end
121
128
  end