pdf-reader 2.2.0 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +90 -0
  3. data/README.md +18 -3
  4. data/Rakefile +1 -1
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_text +1 -1
  7. data/examples/extract_fonts.rb +12 -7
  8. data/examples/rspec.rb +1 -0
  9. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  10. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  11. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
  12. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
  13. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
  14. data/lib/pdf/reader/afm/Courier.afm +342 -342
  15. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
  16. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
  17. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
  18. data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
  19. data/lib/pdf/reader/afm/MustRead.html +19 -0
  20. data/lib/pdf/reader/afm/Symbol.afm +213 -213
  21. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
  22. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
  23. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
  24. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
  25. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
  26. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  27. data/lib/pdf/reader/buffer.rb +91 -47
  28. data/lib/pdf/reader/cid_widths.rb +7 -4
  29. data/lib/pdf/reader/cmap.rb +83 -59
  30. data/lib/pdf/reader/encoding.rb +17 -14
  31. data/lib/pdf/reader/error.rb +15 -3
  32. data/lib/pdf/reader/filter/ascii85.rb +7 -1
  33. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  34. data/lib/pdf/reader/filter/depredict.rb +12 -10
  35. data/lib/pdf/reader/filter/flate.rb +30 -16
  36. data/lib/pdf/reader/filter/lzw.rb +2 -0
  37. data/lib/pdf/reader/filter/null.rb +1 -1
  38. data/lib/pdf/reader/filter/run_length.rb +19 -13
  39. data/lib/pdf/reader/filter.rb +11 -11
  40. data/lib/pdf/reader/font.rb +89 -26
  41. data/lib/pdf/reader/font_descriptor.rb +22 -18
  42. data/lib/pdf/reader/form_xobject.rb +18 -5
  43. data/lib/pdf/reader/glyph_hash.rb +28 -13
  44. data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
  45. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  46. data/lib/pdf/reader/lzw.rb +28 -11
  47. data/lib/pdf/reader/no_text_filter.rb +14 -0
  48. data/lib/pdf/reader/null_security_handler.rb +1 -4
  49. data/lib/pdf/reader/object_cache.rb +1 -0
  50. data/lib/pdf/reader/object_hash.rb +292 -63
  51. data/lib/pdf/reader/object_stream.rb +3 -2
  52. data/lib/pdf/reader/overlapping_runs_filter.rb +72 -0
  53. data/lib/pdf/reader/page.rb +143 -16
  54. data/lib/pdf/reader/page_layout.rb +43 -39
  55. data/lib/pdf/reader/page_state.rb +26 -17
  56. data/lib/pdf/reader/page_text_receiver.rb +74 -4
  57. data/lib/pdf/reader/pages_strategy.rb +1 -0
  58. data/lib/pdf/reader/parser.rb +34 -14
  59. data/lib/pdf/reader/point.rb +25 -0
  60. data/lib/pdf/reader/print_receiver.rb +1 -0
  61. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  62. data/lib/pdf/reader/rectangle.rb +113 -0
  63. data/lib/pdf/reader/reference.rb +3 -1
  64. data/lib/pdf/reader/register_receiver.rb +1 -0
  65. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +17 -9
  66. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  67. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  68. data/lib/pdf/reader/stream.rb +3 -2
  69. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  70. data/lib/pdf/reader/text_run.rb +40 -5
  71. data/lib/pdf/reader/token.rb +1 -0
  72. data/lib/pdf/reader/transformation_matrix.rb +8 -7
  73. data/lib/pdf/reader/type_check.rb +98 -0
  74. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  75. data/lib/pdf/reader/validating_receiver.rb +262 -0
  76. data/lib/pdf/reader/width_calculator/built_in.rb +27 -17
  77. data/lib/pdf/reader/width_calculator/composite.rb +6 -1
  78. data/lib/pdf/reader/width_calculator/true_type.rb +10 -11
  79. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -4
  80. data/lib/pdf/reader/width_calculator/type_zero.rb +6 -2
  81. data/lib/pdf/reader/width_calculator.rb +1 -0
  82. data/lib/pdf/reader/xref.rb +37 -11
  83. data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
  84. data/lib/pdf/reader.rb +49 -24
  85. data/lib/pdf-reader.rb +1 -0
  86. data/rbi/pdf-reader.rbi +2048 -0
  87. metadata +39 -23
  88. data/lib/pdf/hash.rb +0 -20
  89. data/lib/pdf/reader/orientation_detector.rb +0 -34
  90. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -6,8 +7,9 @@ class PDF::Reader
6
7
  # some filter implementations support preprocessing of the data to
7
8
  # improve compression
8
9
  class Depredict
10
+
9
11
  def initialize(options = {})
10
- @options = options || {}
12
+ @options = options
11
13
  end
12
14
 
13
15
  ################################################################################
@@ -34,7 +36,7 @@ class PDF::Reader
34
36
  ################################################################################
35
37
  def tiff_depredict(data)
36
38
  data = data.unpack("C*")
37
- unfiltered = []
39
+ unfiltered = ''
38
40
  bpc = @options[:BitsPerComponent] || 8
39
41
  pixel_bits = bpc * @options[:Colors]
40
42
  pixel_bytes = pixel_bits / 8
@@ -51,11 +53,11 @@ class PDF::Reader
51
53
  left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
52
54
  row_data[index] = (byte + left) % 256
53
55
  end
54
- unfiltered += row_data
56
+ unfiltered += row_data.pack("C*")
55
57
  pos += line_len
56
58
  end
57
59
 
58
- unfiltered.pack("C*")
60
+ unfiltered
59
61
  end
60
62
  ################################################################################
61
63
  def png_depredict(data)
@@ -67,7 +69,7 @@ class PDF::Reader
67
69
  scanline_length = (pixel_bytes * @options[:Columns]) + 1
68
70
  row = 0
69
71
  pixels = []
70
- paeth, pa, pb, pc = nil
72
+ paeth, pa, pb, pc = 0, 0, 0, 0
71
73
  until data.empty? do
72
74
  row_data = data.slice! 0, scanline_length
73
75
  filter = row_data.shift
@@ -94,17 +96,17 @@ class PDF::Reader
94
96
  row_data[index] = (byte + ((left + upper)/2).floor) % 256
95
97
  end
96
98
  when 4 # Paeth
97
- left = upper = upper_left = nil
99
+ left = upper = upper_left = 0
98
100
  row_data.each_with_index do |byte, index|
99
101
  col = index / pixel_bytes
100
102
 
101
- left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
103
+ left = index < pixel_bytes ? 0 : Integer(row_data[index - pixel_bytes])
102
104
  if row.zero?
103
105
  upper = upper_left = 0
104
106
  else
105
- upper = pixels[row-1][col][index % pixel_bytes]
107
+ upper = Integer(pixels[row-1][col][index % pixel_bytes])
106
108
  upper_left = col.zero? ? 0 :
107
- pixels[row-1][col-1][index % pixel_bytes]
109
+ Integer(pixels[row-1][col-1][index % pixel_bytes])
108
110
  end
109
111
 
110
112
  p = left + upper - upper_left
@@ -123,7 +125,7 @@ class PDF::Reader
123
125
  row_data[index] = (byte + paeth) % 256
124
126
  end
125
127
  else
126
- raise ArgumentError, "Invalid filter algorithm #{filter}"
128
+ raise MalformedPDFError, "Invalid filter algorithm #{filter}"
127
129
  end
128
130
 
129
131
  s = []
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
 
@@ -8,6 +9,10 @@ class PDF::Reader
8
9
  module Filter # :nodoc:
9
10
  # implementation of the Flate (zlib) stream filter
10
11
  class Flate
12
+
13
+ ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 # Zlib::MAX_WBITS + 32
14
+ ZLIB_RAW_DEFLATE = -15 # Zlib::MAX_WBITS * -1
15
+
11
16
  def initialize(options = {})
12
17
  @options = options
13
18
  end
@@ -15,25 +20,34 @@ class PDF::Reader
15
20
  ################################################################################
16
21
  # Decode the specified data with the Zlib compression algorithm
17
22
  def filter(data)
18
- deflated = nil
23
+ deflated = zlib_inflate(data) || zlib_inflate(data[0, data.bytesize-1])
24
+
25
+ if deflated.nil?
26
+ raise MalformedPDFError,
27
+ "Error while inflating a compressed stream (no suitable inflation algorithm found)"
28
+ end
29
+ Depredict.new(@options).filter(deflated)
30
+ end
31
+
32
+ private
33
+
34
+ def zlib_inflate(data)
19
35
  begin
20
- deflated = Zlib::Inflate.new.inflate(data)
21
- rescue Zlib::DataError => e
36
+ return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
37
+ rescue Zlib::Error
22
38
  # by default, Ruby's Zlib assumes the data it's inflating
23
- # is RFC1951 deflated data, wrapped in a RFC1951 zlib container.
24
- # If that fails, then use an undocumented 'feature' to attempt to inflate
25
- # the data as a raw RFC1951 stream.
26
- #
27
- # See
28
- # - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
29
- # - http://www.gzip.org/zlib/zlib_faq.html#faq38
30
- deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
39
+ # is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
40
+ # fails, swallow the exception and attempt to inflate the data as a raw
41
+ # RFC1951 stream.
31
42
  end
32
- Depredict.new(@options).filter(deflated)
33
- rescue Exception => e
34
- # Oops, there was a problem inflating the stream
35
- raise MalformedPDFError,
36
- "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})"
43
+
44
+ begin
45
+ return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
46
+ rescue Zlib::Error
47
+ # swallow this one too, so we can try some other fallback options
48
+ end
49
+
50
+ nil
37
51
  end
38
52
  end
39
53
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  #
@@ -6,6 +7,7 @@ class PDF::Reader
6
7
  module Filter # :nodoc:
7
8
  # implementation of the LZW stream filter
8
9
  class Lzw
10
+
9
11
  def initialize(options = {})
10
12
  @options = options
11
13
  end
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
- #
5
5
  class PDF::Reader
6
6
  module Filter # :nodoc:
7
7
  # implementation of the null stream filter
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  #
@@ -6,6 +7,7 @@ class PDF::Reader # :nodoc:
6
7
  module Filter # :nodoc:
7
8
  # implementation of the run length stream filter
8
9
  class RunLength
10
+
9
11
  def initialize(options = {})
10
12
  @options = options
11
13
  end
@@ -20,19 +22,23 @@ class PDF::Reader # :nodoc:
20
22
  length = data.getbyte(pos)
21
23
  pos += 1
22
24
 
23
- case
24
- when length == 128
25
- break
26
- when length < 128
27
- # When the length is < 128, we copy the following length+1 bytes
28
- # literally.
29
- out << data[pos, length + 1]
30
- pos += length
31
- else
32
- # When the length is > 128, we copy the next byte (257 - length)
33
- # times; i.e., "\xFA\x00" ([250, 0]) will expand to
34
- # "\x00\x00\x00\x00\x00\x00\x00".
35
- out << data[pos, 1] * (257 - length)
25
+ unless length.nil?
26
+ case
27
+ # nothing
28
+ when length == 128
29
+ break
30
+ when length < 128
31
+ # When the length is < 128, we copy the following length+1 bytes
32
+ # literally.
33
+ out << data[pos, length + 1]
34
+ pos += length
35
+ else
36
+ # When the length is > 128, we copy the next byte (257 - length)
37
+ # times; i.e., "\xFA\x00" ([250, 0]) will expand to
38
+ # "\x00\x00\x00\x00\x00\x00\x00".
39
+ previous_byte = data[pos, 1] || ""
40
+ out << previous_byte * (257 - length)
41
+ end
36
42
  end
37
43
 
38
44
  pos += 1
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -41,17 +42,16 @@ class PDF::Reader
41
42
  # returned untouched. At this stage PDF::Reader has no need to decode images.
42
43
  #
43
44
  def self.with(name, options = {})
44
- case name.to_sym
45
- when :ASCII85Decode then PDF::Reader::Filter::Ascii85.new(options)
46
- when :ASCIIHexDecode then PDF::Reader::Filter::AsciiHex.new(options)
47
- when :CCITTFaxDecode then PDF::Reader::Filter::Null.new(options)
48
- when :DCTDecode then PDF::Reader::Filter::Null.new(options)
49
- when :FlateDecode then PDF::Reader::Filter::Flate.new(options)
50
- when :Fl then PDF::Reader::Filter::Flate.new(options)
51
- when :JBIG2Decode then PDF::Reader::Filter::Null.new(options)
52
- when :JPXDecode then PDF::Reader::Filter::Null.new(options)
53
- when :LZWDecode then PDF::Reader::Filter::Lzw.new(options)
54
- when :RunLengthDecode then PDF::Reader::Filter::RunLength.new(options)
45
+ case name
46
+ when :ASCII85Decode, :A85 then PDF::Reader::Filter::Ascii85.new(options)
47
+ when :ASCIIHexDecode, :AHx then PDF::Reader::Filter::AsciiHex.new(options)
48
+ when :CCITTFaxDecode, :CCF then PDF::Reader::Filter::Null.new(options)
49
+ when :DCTDecode, :DCT then PDF::Reader::Filter::Null.new(options)
50
+ when :FlateDecode, :Fl then PDF::Reader::Filter::Flate.new(options)
51
+ when :JBIG2Decode then PDF::Reader::Filter::Null.new(options)
52
+ when :JPXDecode then PDF::Reader::Filter::Null.new(options)
53
+ when :LZWDecode, :LZW then PDF::Reader::Filter::Lzw.new(options)
54
+ when :RunLengthDecode, :RL then PDF::Reader::Filter::RunLength.new(options)
55
55
  else
56
56
  raise UnsupportedFeatureError, "Unknown filter: #{name}"
57
57
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -42,6 +43,7 @@ class PDF::Reader
42
43
  @tounicode = nil
43
44
 
44
45
  extract_base_info(obj)
46
+ extract_type3_info(obj)
45
47
  extract_descriptor(obj)
46
48
  extract_descendants(obj)
47
49
  @width_calc = build_width_calculator
@@ -72,8 +74,44 @@ class PDF::Reader
72
74
  @cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
73
75
  end
74
76
 
77
+ # In most cases glyph width is converted into text space with a simple divide by 1000.
78
+ #
79
+ # However, Type3 fonts provide their own FontMatrix that's used for the transformation.
80
+ #
81
+ def glyph_width_in_text_space(code_point)
82
+ glyph_width_in_glyph_space = glyph_width(code_point)
83
+
84
+ if @subtype == :Type3
85
+ x1, y1 = font_matrix_transform(0,0)
86
+ x2, y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
87
+ (x2 - x1).abs.round(2)
88
+ else
89
+ glyph_width_in_glyph_space / 1000.0
90
+ end
91
+ end
92
+
75
93
  private
76
94
 
95
+ # Only valid for Type3 fonts
96
+ def font_matrix_transform(x, y)
97
+ return x, y if @font_matrix.nil?
98
+
99
+ matrix = TransformationMatrix.new(
100
+ @font_matrix[0], @font_matrix[1],
101
+ @font_matrix[2], @font_matrix[3],
102
+ @font_matrix[4], @font_matrix[5],
103
+ )
104
+
105
+ if x == 0 && y == 0
106
+ [matrix.e, matrix.f]
107
+ else
108
+ [
109
+ (matrix.a * x) + (matrix.c * y) + (matrix.e),
110
+ (matrix.b * x) + (matrix.d * y) + (matrix.f)
111
+ ]
112
+ end
113
+ end
114
+
77
115
  def default_encoding(font_name)
78
116
  case font_name.to_s
79
117
  when "Symbol" then
@@ -97,7 +135,13 @@ class PDF::Reader
97
135
  elsif @subtype == :Type3
98
136
  PDF::Reader::WidthCalculator::TypeOneOrThree.new(self)
99
137
  elsif @subtype == :TrueType
100
- PDF::Reader::WidthCalculator::TrueType.new(self)
138
+ if @font_descriptor
139
+ PDF::Reader::WidthCalculator::TrueType.new(self)
140
+ else
141
+ # A TrueType font that isn't embedded. Most readers look for a version on the
142
+ # local system and fallback to a substitute. For now, we go straight to a substitute
143
+ PDF::Reader::WidthCalculator::BuiltIn.new(self)
144
+ end
101
145
  elsif @subtype == :CIDFontType0 || @subtype == :CIDFontType2
102
146
  PDF::Reader::WidthCalculator::Composite.new(self)
103
147
  else
@@ -105,27 +149,47 @@ class PDF::Reader
105
149
  end
106
150
  end
107
151
 
108
- def extract_base_info(obj)
109
- @subtype = @ohash.object(obj[:Subtype])
110
- @basefont = @ohash.object(obj[:BaseFont])
111
- if @ohash.object(obj[:Encoding])
112
- @encoding = PDF::Reader::Encoding.new(@ohash.object(obj[:Encoding]))
152
+ def build_encoding(obj)
153
+ if obj[:Encoding].is_a?(Symbol)
154
+ # one of the standard encodings, referenced by name
155
+ # TODO pass in a standard shape, always a Hash
156
+ PDF::Reader::Encoding.new(obj[:Encoding])
157
+ elsif obj[:Encoding].is_a?(Hash) || obj[:Encoding].is_a?(PDF::Reader::Stream)
158
+ PDF::Reader::Encoding.new(obj[:Encoding])
159
+ elsif obj[:Encoding].nil?
160
+ default_encoding(@basefont)
113
161
  else
114
- @encoding = default_encoding(@basefont)
162
+ raise MalformedPDFError, "Unexpected type for Encoding (#{obj[:Encoding].class})"
115
163
  end
116
- @widths = @ohash.object(obj[:Widths]) || []
117
- @first_char = @ohash.object(obj[:FirstChar])
118
- @last_char = @ohash.object(obj[:LastChar])
164
+ end
165
+
166
+ def extract_base_info(obj)
167
+ @subtype = @ohash.deref_name(obj[:Subtype])
168
+ @basefont = @ohash.deref_name(obj[:BaseFont])
169
+ @encoding = build_encoding(obj)
170
+ @widths = @ohash.deref_array_of_numbers(obj[:Widths]) || []
171
+ @first_char = @ohash.deref_integer(obj[:FirstChar])
172
+ @last_char = @ohash.deref_integer(obj[:LastChar])
119
173
 
120
174
  # CID Fonts are not required to have a W or DW entry, if they don't exist,
121
175
  # the default cid width = 1000, see Section 9.7.4.1 PDF 32000-1:2008 pp 269
122
- @cid_widths = @ohash.object(obj[:W]) || []
123
- @cid_default_width = @ohash.object(obj[:DW]) || 1000
176
+ @cid_widths = @ohash.deref_array(obj[:W]) || []
177
+ @cid_default_width = @ohash.deref_number(obj[:DW]) || 1000
124
178
 
125
179
  if obj[:ToUnicode]
126
180
  # ToUnicode is optional for Type1 and Type3
127
- stream = @ohash.object(obj[:ToUnicode])
128
- @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
181
+ stream = @ohash.deref_stream(obj[:ToUnicode])
182
+ if stream
183
+ @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
184
+ end
185
+ end
186
+ end
187
+
188
+ def extract_type3_info(obj)
189
+ if @subtype == :Type3
190
+ @font_matrix = @ohash.deref_array_of_numbers(obj[:FontMatrix]) || [
191
+ 0.001, 0, 0, 0.001, 0, 0
192
+ ]
129
193
  end
130
194
  end
131
195
 
@@ -133,7 +197,7 @@ class PDF::Reader
133
197
  if obj[:FontDescriptor]
134
198
  # create a font descriptor object if we can, in other words, unless this is
135
199
  # a CID Font
136
- fd = @ohash.object(obj[:FontDescriptor])
200
+ fd = @ohash.deref_hash(obj[:FontDescriptor])
137
201
  @font_descriptor = PDF::Reader::FontDescriptor.new(@ohash, fd)
138
202
  else
139
203
  @font_descriptor = nil
@@ -141,14 +205,17 @@ class PDF::Reader
141
205
  end
142
206
 
143
207
  def extract_descendants(obj)
144
- return unless obj[:DescendantFonts]
145
208
  # per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
146
209
  # A one-element array specifying the CIDFont dictionary that is the
147
210
  # descendant of this Type 0 font.
148
- descendants = @ohash.object(obj[:DescendantFonts])
149
- @descendantfonts = descendants.map { |desc|
150
- PDF::Reader::Font.new(@ohash, @ohash.object(desc))
151
- }
211
+ if obj[:DescendantFonts]
212
+ descendants = @ohash.deref_array(obj[:DescendantFonts])
213
+ @descendantfonts = descendants.map { |desc|
214
+ PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc))
215
+ }
216
+ else
217
+ @descendantfonts = []
218
+ end
152
219
  end
153
220
 
154
221
  def to_utf8_via_cmap(params)
@@ -162,9 +229,7 @@ class PDF::Reader
162
229
  @tounicode.decode(c) || PDF::Reader::Encoding::UNKNOWN_CHAR
163
230
  }.flatten.pack("U*")
164
231
  when Array
165
- params.collect { |param| to_utf8_via_cmap(param) }
166
- else
167
- params
232
+ params.collect { |param| to_utf8_via_cmap(param) }.join("")
168
233
  end
169
234
  end
170
235
 
@@ -179,9 +244,7 @@ class PDF::Reader
179
244
  when String
180
245
  encoding.to_utf8(params)
181
246
  when Array
182
- params.collect { |param| to_utf8_via_encoding(param) }
183
- else
184
- params
247
+ params.collect { |param| to_utf8_via_encoding(param) }.join("")
185
248
  end
186
249
  end
187
250
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'ttfunk'
@@ -14,22 +15,23 @@ class PDF::Reader
14
15
  :x_height, :font_flags
15
16
 
16
17
  def initialize(ohash, fd_hash)
17
- @ascent = ohash.object(fd_hash[:Ascent]) || 0
18
- @descent = ohash.object(fd_hash[:Descent]) || 0
19
- @missing_width = ohash.object(fd_hash[:MissingWidth]) || 0
20
- @font_bounding_box = ohash.object(fd_hash[:FontBBox]) || [0,0,0,0]
21
- @avg_width = ohash.object(fd_hash[:AvgWidth]) || 0
22
- @cap_height = ohash.object(fd_hash[:CapHeight]) || 0
23
- @font_flags = ohash.object(fd_hash[:Flags]) || 0
24
- @italic_angle = ohash.object(fd_hash[:ItalicAngle])
25
- @font_name = ohash.object(fd_hash[:FontName]).to_s
26
- @leading = ohash.object(fd_hash[:Leading]) || 0
27
- @max_width = ohash.object(fd_hash[:MaxWidth]) || 0
28
- @stem_v = ohash.object(fd_hash[:StemV])
29
- @x_height = ohash.object(fd_hash[:XHeight])
30
- @font_stretch = ohash.object(fd_hash[:FontStretch]) || :Normal
31
- @font_weight = ohash.object(fd_hash[:FontWeight]) || 400
32
- @font_family = ohash.object(fd_hash[:FontFamily])
18
+ # TODO change these to typed derefs
19
+ @ascent = ohash.deref_number(fd_hash[:Ascent]) || 0
20
+ @descent = ohash.deref_number(fd_hash[:Descent]) || 0
21
+ @missing_width = ohash.deref_number(fd_hash[:MissingWidth]) || 0
22
+ @font_bounding_box = ohash.deref_array_of_numbers(fd_hash[:FontBBox]) || [0,0,0,0]
23
+ @avg_width = ohash.deref_number(fd_hash[:AvgWidth]) || 0
24
+ @cap_height = ohash.deref_number(fd_hash[:CapHeight]) || 0
25
+ @font_flags = ohash.deref_integer(fd_hash[:Flags]) || 0
26
+ @italic_angle = ohash.deref_number(fd_hash[:ItalicAngle])
27
+ @font_name = ohash.deref_name(fd_hash[:FontName]).to_s
28
+ @leading = ohash.deref_number(fd_hash[:Leading]) || 0
29
+ @max_width = ohash.deref_number(fd_hash[:MaxWidth]) || 0
30
+ @stem_v = ohash.deref_number(fd_hash[:StemV])
31
+ @x_height = ohash.deref_number(fd_hash[:XHeight])
32
+ @font_stretch = ohash.deref_name(fd_hash[:FontStretch]) || :Normal
33
+ @font_weight = ohash.deref_number(fd_hash[:FontWeight]) || 400
34
+ @font_family = ohash.deref_string(fd_hash[:FontFamily])
33
35
 
34
36
  # A FontDescriptor may have an embedded font program in FontFile
35
37
  # (Type 1 Font Program), FontFile2 (TrueType font program), or
@@ -39,7 +41,7 @@ class PDF::Reader
39
41
  # 2) CIDFontType0C: Type 0 Font Program in Compact Font Format
40
42
  # 3) OpenType: OpenType Font Program
41
43
  # see Section 9.9, PDF 32000-1:2008, pp 288-292
42
- @font_program_stream = ohash.object(fd_hash[:FontFile2])
44
+ @font_program_stream = ohash.deref_stream(fd_hash[:FontFile2])
43
45
  #TODO handle FontFile and FontFile3
44
46
 
45
47
  @is_ttf = true if @font_program_stream
@@ -54,7 +56,9 @@ class PDF::Reader
54
56
  end
55
57
  char_metric = ttf_program_stream.horizontal_metrics.metrics[glyph_id]
56
58
  if char_metric
57
- return char_metric.advance_width
59
+ char_metric.advance_width
60
+ else
61
+ 0
58
62
  end
59
63
  end
60
64
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'digest/md5'
@@ -14,15 +15,24 @@ module PDF
14
15
  # This behaves and looks much like a limited PDF::Reader::Page class.
15
16
  #
16
17
  class FormXObject
17
- include ResourceMethods
18
+ extend Forwardable
18
19
 
19
20
  attr_reader :xobject
20
21
 
22
+ def_delegators :resources, :color_spaces
23
+ def_delegators :resources, :fonts
24
+ def_delegators :resources, :graphic_states
25
+ def_delegators :resources, :patterns
26
+ def_delegators :resources, :procedure_sets
27
+ def_delegators :resources, :properties
28
+ def_delegators :resources, :shadings
29
+ def_delegators :resources, :xobjects
30
+
21
31
  def initialize(page, xobject, options = {})
22
32
  @page = page
23
33
  @objects = page.objects
24
34
  @cache = options[:cache] || {}
25
- @xobject = @objects.deref(xobject)
35
+ @xobject = @objects.deref_stream(xobject)
26
36
  end
27
37
 
28
38
  # return a hash of fonts used on this form.
@@ -33,9 +43,9 @@ module PDF
33
43
  # to most available metrics for each font.
34
44
  #
35
45
  def font_objects
36
- raw_fonts = @objects.deref(resources[:Font] || {})
46
+ raw_fonts = @objects.deref_hash(fonts)
37
47
  ::Hash[raw_fonts.map { |label, font|
38
- [label, PDF::Reader::Font.new(@objects, @objects.deref(font))]
48
+ [label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font) || {})]
39
49
  }]
40
50
  end
41
51
 
@@ -45,6 +55,9 @@ module PDF
45
55
  # See the comments on PDF::Reader::Page#walk for more detail.
46
56
  #
47
57
  def walk(*receivers)
58
+ receivers = receivers.map { |receiver|
59
+ ValidatingReceiver.new(receiver)
60
+ }
48
61
  content_stream(receivers, raw_content)
49
62
  end
50
63
 
@@ -60,7 +73,7 @@ module PDF
60
73
  # Returns the resources that accompany this form.
61
74
  #
62
75
  def resources
63
- @resources ||= @objects.deref(@xobject.hash[:Resources]) || {}
76
+ @resources ||= Resources.new(@objects, @objects.deref_hash(@xobject.hash[:Resources]) || {})
64
77
  end
65
78
 
66
79
  def callback(receivers, name, params=[])
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -32,10 +33,18 @@ class PDF::Reader
32
33
  #
33
34
  class GlyphHash # :nodoc:
34
35
  def initialize
36
+ @@by_codepoint_cache ||= nil
37
+ @@by_name_cache ||= nil
38
+
35
39
  # only parse the glyph list once, and cache the results (for performance)
36
- adobe = @@cache ||= load_adobe_glyph_mapping
37
- @by_name = adobe.first
38
- @by_codepoint = adobe.last
40
+ if @@by_codepoint_cache != nil && @@by_name_cache != nil
41
+ @by_name = @@by_name_cache
42
+ @by_codepoint = @@by_codepoint_cache
43
+ else
44
+ by_name, by_codepoint = load_adobe_glyph_mapping
45
+ @by_name = @@by_name_cache ||= by_name
46
+ @by_codepoint = @@by_codepoint_cache ||= by_codepoint
47
+ end
39
48
  end
40
49
 
41
50
  # attempt to convert a PDF Name to a unicode codepoint. Returns nil
@@ -103,24 +112,30 @@ class PDF::Reader
103
112
 
104
113
  # returns a hash that maps glyph names to unicode codepoints. The mapping is based on
105
114
  # a text file supplied by Adobe at:
106
- # http://www.adobe.com/devnet/opentype/archives/glyphlist.txt
115
+ # https://github.com/adobe-type-tools/agl-aglfn
107
116
  def load_adobe_glyph_mapping
108
117
  keyed_by_name = {}
109
118
  keyed_by_codepoint = {}
110
119
 
111
- File.open(File.dirname(__FILE__) + "/glyphlist.txt", "r:BINARY") do |f|
112
- f.each do |l|
113
- _m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
114
- if name && code
115
- cp = "0x#{code}".hex
116
- keyed_by_name[name.to_sym] = cp
117
- keyed_by_codepoint[cp] ||= []
118
- keyed_by_codepoint[cp] << name.to_sym
120
+ paths = [
121
+ File.dirname(__FILE__) + "/glyphlist.txt",
122
+ File.dirname(__FILE__) + "/glyphlist-zapfdingbats.txt",
123
+ ]
124
+ paths.each do |path|
125
+ File.open(path, "r:BINARY") do |f|
126
+ f.each do |l|
127
+ _m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
128
+ if name && code
129
+ cp = "0x#{code}".hex
130
+ keyed_by_name[name.to_sym] = cp
131
+ keyed_by_codepoint[cp] ||= []
132
+ keyed_by_codepoint[cp] << name.to_sym
133
+ end
119
134
  end
120
135
  end
121
136
  end
122
137
 
123
- [keyed_by_name.freeze, keyed_by_codepoint.freeze]
138
+ return keyed_by_name.freeze, keyed_by_codepoint.freeze
124
139
  end
125
140
 
126
141
  end