pdf-reader 2.2.0 → 2.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +90 -0
  3. data/README.md +18 -3
  4. data/Rakefile +1 -1
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_text +1 -1
  7. data/examples/extract_fonts.rb +12 -7
  8. data/examples/rspec.rb +1 -0
  9. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  10. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  11. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
  12. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
  13. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
  14. data/lib/pdf/reader/afm/Courier.afm +342 -342
  15. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
  16. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
  17. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
  18. data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
  19. data/lib/pdf/reader/afm/MustRead.html +19 -0
  20. data/lib/pdf/reader/afm/Symbol.afm +213 -213
  21. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
  22. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
  23. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
  24. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
  25. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
  26. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  27. data/lib/pdf/reader/buffer.rb +91 -47
  28. data/lib/pdf/reader/cid_widths.rb +7 -4
  29. data/lib/pdf/reader/cmap.rb +83 -59
  30. data/lib/pdf/reader/encoding.rb +17 -14
  31. data/lib/pdf/reader/error.rb +15 -3
  32. data/lib/pdf/reader/filter/ascii85.rb +7 -1
  33. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  34. data/lib/pdf/reader/filter/depredict.rb +12 -10
  35. data/lib/pdf/reader/filter/flate.rb +30 -16
  36. data/lib/pdf/reader/filter/lzw.rb +2 -0
  37. data/lib/pdf/reader/filter/null.rb +1 -1
  38. data/lib/pdf/reader/filter/run_length.rb +19 -13
  39. data/lib/pdf/reader/filter.rb +11 -11
  40. data/lib/pdf/reader/font.rb +89 -26
  41. data/lib/pdf/reader/font_descriptor.rb +22 -18
  42. data/lib/pdf/reader/form_xobject.rb +18 -5
  43. data/lib/pdf/reader/glyph_hash.rb +28 -13
  44. data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
  45. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  46. data/lib/pdf/reader/lzw.rb +28 -11
  47. data/lib/pdf/reader/no_text_filter.rb +14 -0
  48. data/lib/pdf/reader/null_security_handler.rb +1 -4
  49. data/lib/pdf/reader/object_cache.rb +1 -0
  50. data/lib/pdf/reader/object_hash.rb +292 -63
  51. data/lib/pdf/reader/object_stream.rb +3 -2
  52. data/lib/pdf/reader/overlapping_runs_filter.rb +72 -0
  53. data/lib/pdf/reader/page.rb +143 -16
  54. data/lib/pdf/reader/page_layout.rb +43 -39
  55. data/lib/pdf/reader/page_state.rb +26 -17
  56. data/lib/pdf/reader/page_text_receiver.rb +74 -4
  57. data/lib/pdf/reader/pages_strategy.rb +1 -0
  58. data/lib/pdf/reader/parser.rb +34 -14
  59. data/lib/pdf/reader/point.rb +25 -0
  60. data/lib/pdf/reader/print_receiver.rb +1 -0
  61. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  62. data/lib/pdf/reader/rectangle.rb +113 -0
  63. data/lib/pdf/reader/reference.rb +3 -1
  64. data/lib/pdf/reader/register_receiver.rb +1 -0
  65. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +17 -9
  66. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  67. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  68. data/lib/pdf/reader/stream.rb +3 -2
  69. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  70. data/lib/pdf/reader/text_run.rb +40 -5
  71. data/lib/pdf/reader/token.rb +1 -0
  72. data/lib/pdf/reader/transformation_matrix.rb +8 -7
  73. data/lib/pdf/reader/type_check.rb +98 -0
  74. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  75. data/lib/pdf/reader/validating_receiver.rb +262 -0
  76. data/lib/pdf/reader/width_calculator/built_in.rb +27 -17
  77. data/lib/pdf/reader/width_calculator/composite.rb +6 -1
  78. data/lib/pdf/reader/width_calculator/true_type.rb +10 -11
  79. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -4
  80. data/lib/pdf/reader/width_calculator/type_zero.rb +6 -2
  81. data/lib/pdf/reader/width_calculator.rb +1 -0
  82. data/lib/pdf/reader/xref.rb +37 -11
  83. data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
  84. data/lib/pdf/reader.rb +49 -24
  85. data/lib/pdf-reader.rb +1 -0
  86. data/rbi/pdf-reader.rbi +2048 -0
  87. metadata +39 -23
  88. data/lib/pdf/hash.rb +0 -20
  89. data/lib/pdf/reader/orientation_detector.rb +0 -34
  90. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -6,8 +7,9 @@ class PDF::Reader
6
7
  # some filter implementations support preprocessing of the data to
7
8
  # improve compression
8
9
  class Depredict
10
+
9
11
  def initialize(options = {})
10
- @options = options || {}
12
+ @options = options
11
13
  end
12
14
 
13
15
  ################################################################################
@@ -34,7 +36,7 @@ class PDF::Reader
34
36
  ################################################################################
35
37
  def tiff_depredict(data)
36
38
  data = data.unpack("C*")
37
- unfiltered = []
39
+ unfiltered = ''
38
40
  bpc = @options[:BitsPerComponent] || 8
39
41
  pixel_bits = bpc * @options[:Colors]
40
42
  pixel_bytes = pixel_bits / 8
@@ -51,11 +53,11 @@ class PDF::Reader
51
53
  left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
52
54
  row_data[index] = (byte + left) % 256
53
55
  end
54
- unfiltered += row_data
56
+ unfiltered += row_data.pack("C*")
55
57
  pos += line_len
56
58
  end
57
59
 
58
- unfiltered.pack("C*")
60
+ unfiltered
59
61
  end
60
62
  ################################################################################
61
63
  def png_depredict(data)
@@ -67,7 +69,7 @@ class PDF::Reader
67
69
  scanline_length = (pixel_bytes * @options[:Columns]) + 1
68
70
  row = 0
69
71
  pixels = []
70
- paeth, pa, pb, pc = nil
72
+ paeth, pa, pb, pc = 0, 0, 0, 0
71
73
  until data.empty? do
72
74
  row_data = data.slice! 0, scanline_length
73
75
  filter = row_data.shift
@@ -94,17 +96,17 @@ class PDF::Reader
94
96
  row_data[index] = (byte + ((left + upper)/2).floor) % 256
95
97
  end
96
98
  when 4 # Paeth
97
- left = upper = upper_left = nil
99
+ left = upper = upper_left = 0
98
100
  row_data.each_with_index do |byte, index|
99
101
  col = index / pixel_bytes
100
102
 
101
- left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
103
+ left = index < pixel_bytes ? 0 : Integer(row_data[index - pixel_bytes])
102
104
  if row.zero?
103
105
  upper = upper_left = 0
104
106
  else
105
- upper = pixels[row-1][col][index % pixel_bytes]
107
+ upper = Integer(pixels[row-1][col][index % pixel_bytes])
106
108
  upper_left = col.zero? ? 0 :
107
- pixels[row-1][col-1][index % pixel_bytes]
109
+ Integer(pixels[row-1][col-1][index % pixel_bytes])
108
110
  end
109
111
 
110
112
  p = left + upper - upper_left
@@ -123,7 +125,7 @@ class PDF::Reader
123
125
  row_data[index] = (byte + paeth) % 256
124
126
  end
125
127
  else
126
- raise ArgumentError, "Invalid filter algorithm #{filter}"
128
+ raise MalformedPDFError, "Invalid filter algorithm #{filter}"
127
129
  end
128
130
 
129
131
  s = []
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
 
@@ -8,6 +9,10 @@ class PDF::Reader
8
9
  module Filter # :nodoc:
9
10
  # implementation of the Flate (zlib) stream filter
10
11
  class Flate
12
+
13
+ ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 # Zlib::MAX_WBITS + 32
14
+ ZLIB_RAW_DEFLATE = -15 # Zlib::MAX_WBITS * -1
15
+
11
16
  def initialize(options = {})
12
17
  @options = options
13
18
  end
@@ -15,25 +20,34 @@ class PDF::Reader
15
20
  ################################################################################
16
21
  # Decode the specified data with the Zlib compression algorithm
17
22
  def filter(data)
18
- deflated = nil
23
+ deflated = zlib_inflate(data) || zlib_inflate(data[0, data.bytesize-1])
24
+
25
+ if deflated.nil?
26
+ raise MalformedPDFError,
27
+ "Error while inflating a compressed stream (no suitable inflation algorithm found)"
28
+ end
29
+ Depredict.new(@options).filter(deflated)
30
+ end
31
+
32
+ private
33
+
34
+ def zlib_inflate(data)
19
35
  begin
20
- deflated = Zlib::Inflate.new.inflate(data)
21
- rescue Zlib::DataError => e
36
+ return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
37
+ rescue Zlib::Error
22
38
  # by default, Ruby's Zlib assumes the data it's inflating
23
- # is RFC1951 deflated data, wrapped in a RFC1951 zlib container.
24
- # If that fails, then use an undocumented 'feature' to attempt to inflate
25
- # the data as a raw RFC1951 stream.
26
- #
27
- # See
28
- # - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
29
- # - http://www.gzip.org/zlib/zlib_faq.html#faq38
30
- deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
39
+ # is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
40
+ # fails, swallow the exception and attempt to inflate the data as a raw
41
+ # RFC1951 stream.
31
42
  end
32
- Depredict.new(@options).filter(deflated)
33
- rescue Exception => e
34
- # Oops, there was a problem inflating the stream
35
- raise MalformedPDFError,
36
- "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})"
43
+
44
+ begin
45
+ return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
46
+ rescue Zlib::Error
47
+ # swallow this one too, so we can try some other fallback options
48
+ end
49
+
50
+ nil
37
51
  end
38
52
  end
39
53
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  #
@@ -6,6 +7,7 @@ class PDF::Reader
6
7
  module Filter # :nodoc:
7
8
  # implementation of the LZW stream filter
8
9
  class Lzw
10
+
9
11
  def initialize(options = {})
10
12
  @options = options
11
13
  end
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
- #
5
5
  class PDF::Reader
6
6
  module Filter # :nodoc:
7
7
  # implementation of the null stream filter
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  #
@@ -6,6 +7,7 @@ class PDF::Reader # :nodoc:
6
7
  module Filter # :nodoc:
7
8
  # implementation of the run length stream filter
8
9
  class RunLength
10
+
9
11
  def initialize(options = {})
10
12
  @options = options
11
13
  end
@@ -20,19 +22,23 @@ class PDF::Reader # :nodoc:
20
22
  length = data.getbyte(pos)
21
23
  pos += 1
22
24
 
23
- case
24
- when length == 128
25
- break
26
- when length < 128
27
- # When the length is < 128, we copy the following length+1 bytes
28
- # literally.
29
- out << data[pos, length + 1]
30
- pos += length
31
- else
32
- # When the length is > 128, we copy the next byte (257 - length)
33
- # times; i.e., "\xFA\x00" ([250, 0]) will expand to
34
- # "\x00\x00\x00\x00\x00\x00\x00".
35
- out << data[pos, 1] * (257 - length)
25
+ unless length.nil?
26
+ case
27
+ # nothing
28
+ when length == 128
29
+ break
30
+ when length < 128
31
+ # When the length is < 128, we copy the following length+1 bytes
32
+ # literally.
33
+ out << data[pos, length + 1]
34
+ pos += length
35
+ else
36
+ # When the length is > 128, we copy the next byte (257 - length)
37
+ # times; i.e., "\xFA\x00" ([250, 0]) will expand to
38
+ # "\x00\x00\x00\x00\x00\x00\x00".
39
+ previous_byte = data[pos, 1] || ""
40
+ out << previous_byte * (257 - length)
41
+ end
36
42
  end
37
43
 
38
44
  pos += 1
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -41,17 +42,16 @@ class PDF::Reader
41
42
  # returned untouched. At this stage PDF::Reader has no need to decode images.
42
43
  #
43
44
  def self.with(name, options = {})
44
- case name.to_sym
45
- when :ASCII85Decode then PDF::Reader::Filter::Ascii85.new(options)
46
- when :ASCIIHexDecode then PDF::Reader::Filter::AsciiHex.new(options)
47
- when :CCITTFaxDecode then PDF::Reader::Filter::Null.new(options)
48
- when :DCTDecode then PDF::Reader::Filter::Null.new(options)
49
- when :FlateDecode then PDF::Reader::Filter::Flate.new(options)
50
- when :Fl then PDF::Reader::Filter::Flate.new(options)
51
- when :JBIG2Decode then PDF::Reader::Filter::Null.new(options)
52
- when :JPXDecode then PDF::Reader::Filter::Null.new(options)
53
- when :LZWDecode then PDF::Reader::Filter::Lzw.new(options)
54
- when :RunLengthDecode then PDF::Reader::Filter::RunLength.new(options)
45
+ case name
46
+ when :ASCII85Decode, :A85 then PDF::Reader::Filter::Ascii85.new(options)
47
+ when :ASCIIHexDecode, :AHx then PDF::Reader::Filter::AsciiHex.new(options)
48
+ when :CCITTFaxDecode, :CCF then PDF::Reader::Filter::Null.new(options)
49
+ when :DCTDecode, :DCT then PDF::Reader::Filter::Null.new(options)
50
+ when :FlateDecode, :Fl then PDF::Reader::Filter::Flate.new(options)
51
+ when :JBIG2Decode then PDF::Reader::Filter::Null.new(options)
52
+ when :JPXDecode then PDF::Reader::Filter::Null.new(options)
53
+ when :LZWDecode, :LZW then PDF::Reader::Filter::Lzw.new(options)
54
+ when :RunLengthDecode, :RL then PDF::Reader::Filter::RunLength.new(options)
55
55
  else
56
56
  raise UnsupportedFeatureError, "Unknown filter: #{name}"
57
57
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -42,6 +43,7 @@ class PDF::Reader
42
43
  @tounicode = nil
43
44
 
44
45
  extract_base_info(obj)
46
+ extract_type3_info(obj)
45
47
  extract_descriptor(obj)
46
48
  extract_descendants(obj)
47
49
  @width_calc = build_width_calculator
@@ -72,8 +74,44 @@ class PDF::Reader
72
74
  @cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
73
75
  end
74
76
 
77
+ # In most cases glyph width is converted into text space with a simple divide by 1000.
78
+ #
79
+ # However, Type3 fonts provide their own FontMatrix that's used for the transformation.
80
+ #
81
+ def glyph_width_in_text_space(code_point)
82
+ glyph_width_in_glyph_space = glyph_width(code_point)
83
+
84
+ if @subtype == :Type3
85
+ x1, y1 = font_matrix_transform(0,0)
86
+ x2, y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
87
+ (x2 - x1).abs.round(2)
88
+ else
89
+ glyph_width_in_glyph_space / 1000.0
90
+ end
91
+ end
92
+
75
93
  private
76
94
 
95
+ # Only valid for Type3 fonts
96
+ def font_matrix_transform(x, y)
97
+ return x, y if @font_matrix.nil?
98
+
99
+ matrix = TransformationMatrix.new(
100
+ @font_matrix[0], @font_matrix[1],
101
+ @font_matrix[2], @font_matrix[3],
102
+ @font_matrix[4], @font_matrix[5],
103
+ )
104
+
105
+ if x == 0 && y == 0
106
+ [matrix.e, matrix.f]
107
+ else
108
+ [
109
+ (matrix.a * x) + (matrix.c * y) + (matrix.e),
110
+ (matrix.b * x) + (matrix.d * y) + (matrix.f)
111
+ ]
112
+ end
113
+ end
114
+
77
115
  def default_encoding(font_name)
78
116
  case font_name.to_s
79
117
  when "Symbol" then
@@ -97,7 +135,13 @@ class PDF::Reader
97
135
  elsif @subtype == :Type3
98
136
  PDF::Reader::WidthCalculator::TypeOneOrThree.new(self)
99
137
  elsif @subtype == :TrueType
100
- PDF::Reader::WidthCalculator::TrueType.new(self)
138
+ if @font_descriptor
139
+ PDF::Reader::WidthCalculator::TrueType.new(self)
140
+ else
141
+ # A TrueType font that isn't embedded. Most readers look for a version on the
142
+ # local system and fallback to a substitute. For now, we go straight to a substitute
143
+ PDF::Reader::WidthCalculator::BuiltIn.new(self)
144
+ end
101
145
  elsif @subtype == :CIDFontType0 || @subtype == :CIDFontType2
102
146
  PDF::Reader::WidthCalculator::Composite.new(self)
103
147
  else
@@ -105,27 +149,47 @@ class PDF::Reader
105
149
  end
106
150
  end
107
151
 
108
- def extract_base_info(obj)
109
- @subtype = @ohash.object(obj[:Subtype])
110
- @basefont = @ohash.object(obj[:BaseFont])
111
- if @ohash.object(obj[:Encoding])
112
- @encoding = PDF::Reader::Encoding.new(@ohash.object(obj[:Encoding]))
152
+ def build_encoding(obj)
153
+ if obj[:Encoding].is_a?(Symbol)
154
+ # one of the standard encodings, referenced by name
155
+ # TODO pass in a standard shape, always a Hash
156
+ PDF::Reader::Encoding.new(obj[:Encoding])
157
+ elsif obj[:Encoding].is_a?(Hash) || obj[:Encoding].is_a?(PDF::Reader::Stream)
158
+ PDF::Reader::Encoding.new(obj[:Encoding])
159
+ elsif obj[:Encoding].nil?
160
+ default_encoding(@basefont)
113
161
  else
114
- @encoding = default_encoding(@basefont)
162
+ raise MalformedPDFError, "Unexpected type for Encoding (#{obj[:Encoding].class})"
115
163
  end
116
- @widths = @ohash.object(obj[:Widths]) || []
117
- @first_char = @ohash.object(obj[:FirstChar])
118
- @last_char = @ohash.object(obj[:LastChar])
164
+ end
165
+
166
+ def extract_base_info(obj)
167
+ @subtype = @ohash.deref_name(obj[:Subtype])
168
+ @basefont = @ohash.deref_name(obj[:BaseFont])
169
+ @encoding = build_encoding(obj)
170
+ @widths = @ohash.deref_array_of_numbers(obj[:Widths]) || []
171
+ @first_char = @ohash.deref_integer(obj[:FirstChar])
172
+ @last_char = @ohash.deref_integer(obj[:LastChar])
119
173
 
120
174
  # CID Fonts are not required to have a W or DW entry, if they don't exist,
121
175
  # the default cid width = 1000, see Section 9.7.4.1 PDF 32000-1:2008 pp 269
122
- @cid_widths = @ohash.object(obj[:W]) || []
123
- @cid_default_width = @ohash.object(obj[:DW]) || 1000
176
+ @cid_widths = @ohash.deref_array(obj[:W]) || []
177
+ @cid_default_width = @ohash.deref_number(obj[:DW]) || 1000
124
178
 
125
179
  if obj[:ToUnicode]
126
180
  # ToUnicode is optional for Type1 and Type3
127
- stream = @ohash.object(obj[:ToUnicode])
128
- @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
181
+ stream = @ohash.deref_stream(obj[:ToUnicode])
182
+ if stream
183
+ @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
184
+ end
185
+ end
186
+ end
187
+
188
+ def extract_type3_info(obj)
189
+ if @subtype == :Type3
190
+ @font_matrix = @ohash.deref_array_of_numbers(obj[:FontMatrix]) || [
191
+ 0.001, 0, 0, 0.001, 0, 0
192
+ ]
129
193
  end
130
194
  end
131
195
 
@@ -133,7 +197,7 @@ class PDF::Reader
133
197
  if obj[:FontDescriptor]
134
198
  # create a font descriptor object if we can, in other words, unless this is
135
199
  # a CID Font
136
- fd = @ohash.object(obj[:FontDescriptor])
200
+ fd = @ohash.deref_hash(obj[:FontDescriptor])
137
201
  @font_descriptor = PDF::Reader::FontDescriptor.new(@ohash, fd)
138
202
  else
139
203
  @font_descriptor = nil
@@ -141,14 +205,17 @@ class PDF::Reader
141
205
  end
142
206
 
143
207
  def extract_descendants(obj)
144
- return unless obj[:DescendantFonts]
145
208
  # per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
146
209
  # A one-element array specifying the CIDFont dictionary that is the
147
210
  # descendant of this Type 0 font.
148
- descendants = @ohash.object(obj[:DescendantFonts])
149
- @descendantfonts = descendants.map { |desc|
150
- PDF::Reader::Font.new(@ohash, @ohash.object(desc))
151
- }
211
+ if obj[:DescendantFonts]
212
+ descendants = @ohash.deref_array(obj[:DescendantFonts])
213
+ @descendantfonts = descendants.map { |desc|
214
+ PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc))
215
+ }
216
+ else
217
+ @descendantfonts = []
218
+ end
152
219
  end
153
220
 
154
221
  def to_utf8_via_cmap(params)
@@ -162,9 +229,7 @@ class PDF::Reader
162
229
  @tounicode.decode(c) || PDF::Reader::Encoding::UNKNOWN_CHAR
163
230
  }.flatten.pack("U*")
164
231
  when Array
165
- params.collect { |param| to_utf8_via_cmap(param) }
166
- else
167
- params
232
+ params.collect { |param| to_utf8_via_cmap(param) }.join("")
168
233
  end
169
234
  end
170
235
 
@@ -179,9 +244,7 @@ class PDF::Reader
179
244
  when String
180
245
  encoding.to_utf8(params)
181
246
  when Array
182
- params.collect { |param| to_utf8_via_encoding(param) }
183
- else
184
- params
247
+ params.collect { |param| to_utf8_via_encoding(param) }.join("")
185
248
  end
186
249
  end
187
250
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'ttfunk'
@@ -14,22 +15,23 @@ class PDF::Reader
14
15
  :x_height, :font_flags
15
16
 
16
17
  def initialize(ohash, fd_hash)
17
- @ascent = ohash.object(fd_hash[:Ascent]) || 0
18
- @descent = ohash.object(fd_hash[:Descent]) || 0
19
- @missing_width = ohash.object(fd_hash[:MissingWidth]) || 0
20
- @font_bounding_box = ohash.object(fd_hash[:FontBBox]) || [0,0,0,0]
21
- @avg_width = ohash.object(fd_hash[:AvgWidth]) || 0
22
- @cap_height = ohash.object(fd_hash[:CapHeight]) || 0
23
- @font_flags = ohash.object(fd_hash[:Flags]) || 0
24
- @italic_angle = ohash.object(fd_hash[:ItalicAngle])
25
- @font_name = ohash.object(fd_hash[:FontName]).to_s
26
- @leading = ohash.object(fd_hash[:Leading]) || 0
27
- @max_width = ohash.object(fd_hash[:MaxWidth]) || 0
28
- @stem_v = ohash.object(fd_hash[:StemV])
29
- @x_height = ohash.object(fd_hash[:XHeight])
30
- @font_stretch = ohash.object(fd_hash[:FontStretch]) || :Normal
31
- @font_weight = ohash.object(fd_hash[:FontWeight]) || 400
32
- @font_family = ohash.object(fd_hash[:FontFamily])
18
+ # TODO change these to typed derefs
19
+ @ascent = ohash.deref_number(fd_hash[:Ascent]) || 0
20
+ @descent = ohash.deref_number(fd_hash[:Descent]) || 0
21
+ @missing_width = ohash.deref_number(fd_hash[:MissingWidth]) || 0
22
+ @font_bounding_box = ohash.deref_array_of_numbers(fd_hash[:FontBBox]) || [0,0,0,0]
23
+ @avg_width = ohash.deref_number(fd_hash[:AvgWidth]) || 0
24
+ @cap_height = ohash.deref_number(fd_hash[:CapHeight]) || 0
25
+ @font_flags = ohash.deref_integer(fd_hash[:Flags]) || 0
26
+ @italic_angle = ohash.deref_number(fd_hash[:ItalicAngle])
27
+ @font_name = ohash.deref_name(fd_hash[:FontName]).to_s
28
+ @leading = ohash.deref_number(fd_hash[:Leading]) || 0
29
+ @max_width = ohash.deref_number(fd_hash[:MaxWidth]) || 0
30
+ @stem_v = ohash.deref_number(fd_hash[:StemV])
31
+ @x_height = ohash.deref_number(fd_hash[:XHeight])
32
+ @font_stretch = ohash.deref_name(fd_hash[:FontStretch]) || :Normal
33
+ @font_weight = ohash.deref_number(fd_hash[:FontWeight]) || 400
34
+ @font_family = ohash.deref_string(fd_hash[:FontFamily])
33
35
 
34
36
  # A FontDescriptor may have an embedded font program in FontFile
35
37
  # (Type 1 Font Program), FontFile2 (TrueType font program), or
@@ -39,7 +41,7 @@ class PDF::Reader
39
41
  # 2) CIDFontType0C: Type 0 Font Program in Compact Font Format
40
42
  # 3) OpenType: OpenType Font Program
41
43
  # see Section 9.9, PDF 32000-1:2008, pp 288-292
42
- @font_program_stream = ohash.object(fd_hash[:FontFile2])
44
+ @font_program_stream = ohash.deref_stream(fd_hash[:FontFile2])
43
45
  #TODO handle FontFile and FontFile3
44
46
 
45
47
  @is_ttf = true if @font_program_stream
@@ -54,7 +56,9 @@ class PDF::Reader
54
56
  end
55
57
  char_metric = ttf_program_stream.horizontal_metrics.metrics[glyph_id]
56
58
  if char_metric
57
- return char_metric.advance_width
59
+ char_metric.advance_width
60
+ else
61
+ 0
58
62
  end
59
63
  end
60
64
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'digest/md5'
@@ -14,15 +15,24 @@ module PDF
14
15
  # This behaves and looks much like a limited PDF::Reader::Page class.
15
16
  #
16
17
  class FormXObject
17
- include ResourceMethods
18
+ extend Forwardable
18
19
 
19
20
  attr_reader :xobject
20
21
 
22
+ def_delegators :resources, :color_spaces
23
+ def_delegators :resources, :fonts
24
+ def_delegators :resources, :graphic_states
25
+ def_delegators :resources, :patterns
26
+ def_delegators :resources, :procedure_sets
27
+ def_delegators :resources, :properties
28
+ def_delegators :resources, :shadings
29
+ def_delegators :resources, :xobjects
30
+
21
31
  def initialize(page, xobject, options = {})
22
32
  @page = page
23
33
  @objects = page.objects
24
34
  @cache = options[:cache] || {}
25
- @xobject = @objects.deref(xobject)
35
+ @xobject = @objects.deref_stream(xobject)
26
36
  end
27
37
 
28
38
  # return a hash of fonts used on this form.
@@ -33,9 +43,9 @@ module PDF
33
43
  # to most available metrics for each font.
34
44
  #
35
45
  def font_objects
36
- raw_fonts = @objects.deref(resources[:Font] || {})
46
+ raw_fonts = @objects.deref_hash(fonts)
37
47
  ::Hash[raw_fonts.map { |label, font|
38
- [label, PDF::Reader::Font.new(@objects, @objects.deref(font))]
48
+ [label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font) || {})]
39
49
  }]
40
50
  end
41
51
 
@@ -45,6 +55,9 @@ module PDF
45
55
  # See the comments on PDF::Reader::Page#walk for more detail.
46
56
  #
47
57
  def walk(*receivers)
58
+ receivers = receivers.map { |receiver|
59
+ ValidatingReceiver.new(receiver)
60
+ }
48
61
  content_stream(receivers, raw_content)
49
62
  end
50
63
 
@@ -60,7 +73,7 @@ module PDF
60
73
  # Returns the resources that accompany this form.
61
74
  #
62
75
  def resources
63
- @resources ||= @objects.deref(@xobject.hash[:Resources]) || {}
76
+ @resources ||= Resources.new(@objects, @objects.deref_hash(@xobject.hash[:Resources]) || {})
64
77
  end
65
78
 
66
79
  def callback(receivers, name, params=[])
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -32,10 +33,18 @@ class PDF::Reader
32
33
  #
33
34
  class GlyphHash # :nodoc:
34
35
  def initialize
36
+ @@by_codepoint_cache ||= nil
37
+ @@by_name_cache ||= nil
38
+
35
39
  # only parse the glyph list once, and cache the results (for performance)
36
- adobe = @@cache ||= load_adobe_glyph_mapping
37
- @by_name = adobe.first
38
- @by_codepoint = adobe.last
40
+ if @@by_codepoint_cache != nil && @@by_name_cache != nil
41
+ @by_name = @@by_name_cache
42
+ @by_codepoint = @@by_codepoint_cache
43
+ else
44
+ by_name, by_codepoint = load_adobe_glyph_mapping
45
+ @by_name = @@by_name_cache ||= by_name
46
+ @by_codepoint = @@by_codepoint_cache ||= by_codepoint
47
+ end
39
48
  end
40
49
 
41
50
  # attempt to convert a PDF Name to a unicode codepoint. Returns nil
@@ -103,24 +112,30 @@ class PDF::Reader
103
112
 
104
113
  # returns a hash that maps glyph names to unicode codepoints. The mapping is based on
105
114
  # a text file supplied by Adobe at:
106
- # http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
115
+ # https://github.com/adobe-type-tools/agl-aglfn
107
116
  def load_adobe_glyph_mapping
108
117
  keyed_by_name = {}
109
118
  keyed_by_codepoint = {}
110
119
 
111
- File.open(File.dirname(__FILE__) + "/glyphlist.txt", "r:BINARY") do |f|
112
- f.each do |l|
113
- _m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
114
- if name && code
115
- cp = "0x#{code}".hex
116
- keyed_by_name[name.to_sym] = cp
117
- keyed_by_codepoint[cp] ||= []
118
- keyed_by_codepoint[cp] << name.to_sym
120
+ paths = [
121
+ File.dirname(__FILE__) + "/glyphlist.txt",
122
+ File.dirname(__FILE__) + "/glyphlist-zapfdingbats.txt",
123
+ ]
124
+ paths.each do |path|
125
+ File.open(path, "r:BINARY") do |f|
126
+ f.each do |l|
127
+ _m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
128
+ if name && code
129
+ cp = "0x#{code}".hex
130
+ keyed_by_name[name.to_sym] = cp
131
+ keyed_by_codepoint[cp] ||= []
132
+ keyed_by_codepoint[cp] << name.to_sym
133
+ end
119
134
  end
120
135
  end
121
136
  end
122
137
 
123
- [keyed_by_name.freeze, keyed_by_codepoint.freeze]
138
+ return keyed_by_name.freeze, keyed_by_codepoint.freeze
124
139
  end
125
140
 
126
141
  end