pdf-reader 1.4.1 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG +53 -3
  3. data/{README.rdoc → README.md} +40 -23
  4. data/Rakefile +2 -2
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_object +4 -1
  7. data/bin/pdf_text +1 -1
  8. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
  9. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
  10. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
  11. data/lib/pdf/reader/afm/Courier.afm +342 -342
  12. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
  13. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
  14. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
  15. data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
  16. data/lib/pdf/reader/afm/MustRead.html +19 -0
  17. data/lib/pdf/reader/afm/Symbol.afm +213 -213
  18. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
  19. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
  20. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
  21. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
  22. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
  23. data/lib/pdf/reader/buffer.rb +14 -12
  24. data/lib/pdf/reader/cid_widths.rb +2 -0
  25. data/lib/pdf/reader/cmap.rb +48 -36
  26. data/lib/pdf/reader/encoding.rb +16 -18
  27. data/lib/pdf/reader/error.rb +5 -0
  28. data/lib/pdf/reader/filter/ascii85.rb +1 -0
  29. data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
  30. data/lib/pdf/reader/filter/depredict.rb +1 -0
  31. data/lib/pdf/reader/filter/flate.rb +29 -16
  32. data/lib/pdf/reader/filter/lzw.rb +2 -0
  33. data/lib/pdf/reader/filter/null.rb +2 -0
  34. data/lib/pdf/reader/filter/run_length.rb +4 -6
  35. data/lib/pdf/reader/filter.rb +2 -0
  36. data/lib/pdf/reader/font.rb +12 -13
  37. data/lib/pdf/reader/font_descriptor.rb +1 -0
  38. data/lib/pdf/reader/form_xobject.rb +1 -0
  39. data/lib/pdf/reader/glyph_hash.rb +7 -2
  40. data/lib/pdf/reader/lzw.rb +4 -4
  41. data/lib/pdf/reader/null_security_handler.rb +17 -0
  42. data/lib/pdf/reader/object_cache.rb +1 -0
  43. data/lib/pdf/reader/object_hash.rb +91 -37
  44. data/lib/pdf/reader/object_stream.rb +1 -0
  45. data/lib/pdf/reader/orientation_detector.rb +5 -4
  46. data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
  47. data/lib/pdf/reader/page.rb +30 -1
  48. data/lib/pdf/reader/page_layout.rb +19 -24
  49. data/lib/pdf/reader/page_state.rb +8 -5
  50. data/lib/pdf/reader/page_text_receiver.rb +23 -1
  51. data/lib/pdf/reader/pages_strategy.rb +2 -304
  52. data/lib/pdf/reader/parser.rb +10 -7
  53. data/lib/pdf/reader/print_receiver.rb +1 -0
  54. data/lib/pdf/reader/reference.rb +1 -0
  55. data/lib/pdf/reader/register_receiver.rb +1 -0
  56. data/lib/pdf/reader/resource_methods.rb +1 -0
  57. data/lib/pdf/reader/standard_security_handler.rb +80 -42
  58. data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
  59. data/lib/pdf/reader/stream.rb +1 -0
  60. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  61. data/lib/pdf/reader/text_run.rb +28 -9
  62. data/lib/pdf/reader/token.rb +1 -0
  63. data/lib/pdf/reader/transformation_matrix.rb +1 -0
  64. data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
  65. data/lib/pdf/reader/width_calculator/built_in.rb +25 -16
  66. data/lib/pdf/reader/width_calculator/composite.rb +1 -0
  67. data/lib/pdf/reader/width_calculator/true_type.rb +2 -2
  68. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
  69. data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
  70. data/lib/pdf/reader/width_calculator.rb +1 -0
  71. data/lib/pdf/reader/xref.rb +11 -5
  72. data/lib/pdf/reader.rb +30 -119
  73. data/lib/pdf-reader.rb +1 -0
  74. metadata +35 -61
  75. data/bin/pdf_list_callbacks +0 -17
  76. data/lib/pdf/hash.rb +0 -19
  77. data/lib/pdf/reader/abstract_strategy.rb +0 -81
  78. data/lib/pdf/reader/metadata_strategy.rb +0 -56
  79. data/lib/pdf/reader/text_receiver.rb +0 -265
@@ -0,0 +1,91 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require 'digest'
5
+ require 'openssl'
6
+
7
+ class PDF::Reader
8
+
9
+ # class creates interface to encrypt dictionary for use in Decrypt
10
+ class StandardSecurityHandlerV5
11
+
12
+ attr_reader :key_length, :encrypt_key
13
+
14
+ def initialize(opts = {})
15
+ @key_length = 256
16
+ @O = opts[:O] # hash(32B) + validation salt(8B) + key salt(8B)
17
+ @U = opts[:U] # hash(32B) + validation salt(8B) + key salt(8B)
18
+ @OE = opts[:OE] # decryption key, encrypted w/ owner password
19
+ @UE = opts[:UE] # decryption key, encrypted w/ user password
20
+ @encrypt_key = build_standard_key(opts[:password] || '')
21
+ end
22
+
23
+ # This handler supports AES-256 encryption defined in PDF 1.7 Extension Level 3
24
+ def self.supports?(encrypt)
25
+ return false if encrypt.nil?
26
+
27
+ filter = encrypt.fetch(:Filter, :Standard)
28
+ version = encrypt.fetch(:V, 0)
29
+ revision = encrypt.fetch(:R, 0)
30
+ algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
31
+ (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
32
+ ((version == 5) && (revision == 5) && (algorithm == :AESV3))
33
+ end
34
+
35
+ ##7.6.2 General Encryption Algorithm
36
+ #
37
+ # Algorithm 1: Encryption of data using the RC4 or AES algorithms
38
+ #
39
+ # used to decrypt RC4/AES encrypted PDF streams (buf)
40
+ #
41
+ # buf - a string to decrypt
42
+ # ref - a PDF::Reader::Reference for the object to decrypt
43
+ #
44
+ def decrypt( buf, ref )
45
+ cipher = OpenSSL::Cipher.new("AES-#{@key_length}-CBC")
46
+ cipher.decrypt
47
+ cipher.key = @encrypt_key.dup
48
+ cipher.iv = buf[0..15]
49
+ cipher.update(buf[16..-1]) + cipher.final
50
+ end
51
+
52
+ private
53
+ # Algorithm 3.2a - Computing an encryption key
54
+ #
55
+ # Defined in PDF 1.7 Extension Level 3
56
+ #
57
+ # if the string is a valid user/owner password, this will return the decryption key
58
+ #
59
+ def auth_owner_pass(password)
60
+ if Digest::SHA256.digest(password + @O[32..39] + @U) == @O[0..31]
61
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
62
+ cipher.decrypt
63
+ cipher.key = Digest::SHA256.digest(password + @O[40..-1] + @U)
64
+ cipher.iv = "\x00" * 16
65
+ cipher.padding = 0
66
+ cipher.update(@OE) + cipher.final
67
+ end
68
+ end
69
+
70
+ def auth_user_pass(password)
71
+ if Digest::SHA256.digest(password + @U[32..39]) == @U[0..31]
72
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
73
+ cipher.decrypt
74
+ cipher.key = Digest::SHA256.digest(password + @U[40..-1])
75
+ cipher.iv = "\x00" * 16
76
+ cipher.padding = 0
77
+ cipher.update(@UE) + cipher.final
78
+ end
79
+ end
80
+
81
+ def build_standard_key(pass)
82
+ pass = pass.byteslice(0...127) # UTF-8 encoded password. first 127 bytes
83
+
84
+ encrypt_key = auth_owner_pass(pass)
85
+ encrypt_key ||= auth_user_pass(pass)
86
+
87
+ raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
88
+ encrypt_key
89
+ end
90
+ end
91
+ end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  # utilities.rb : General-purpose utility classes which don't fit anywhere else
4
5
  #
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  class PDF::Reader
4
5
  # A value object that represents one or more consecutive characters on a page.
@@ -37,6 +38,10 @@ class PDF::Reader
37
38
  @endx ||= x + width
38
39
  end
39
40
 
41
+ def endy
42
+ @endy ||= y + font_size
43
+ end
44
+
40
45
  def mean_character_width
41
46
  @width / character_count
42
47
  end
@@ -59,22 +64,36 @@ class PDF::Reader
59
64
  "#{text} w:#{width} f:#{font_size} @#{x},#{y}"
60
65
  end
61
66
 
67
+ def intersect?(other_run)
68
+ x <= other_run.endx && endx >= other_run.x &&
69
+ endy >= other_run.y && y <= other_run.endy
70
+ end
71
+
72
+ # return what percentage of this text run is overlapped by another run
73
+ def intersection_area_percent(other_run)
74
+ return 0 unless intersect?(other_run)
75
+
76
+ dx = [endx, other_run.endx].min - [x, other_run.x].max
77
+ dy = [endy, other_run.endy].min - [y, other_run.y].max
78
+ intersection_area = dx*dy
79
+
80
+ intersection_area.to_f / area
81
+ end
82
+
62
83
  private
63
84
 
85
+ def area
86
+ (endx - x) * (endy - y)
87
+ end
88
+
64
89
  def mergable_range
65
90
  @mergable_range ||= Range.new(endx - 3, endx + font_size)
66
91
  end
67
92
 
93
+ # Assume string encoding is marked correctly and we can trust String#size to return a
94
+ # character count
68
95
  def character_count
69
- if @text.size == 1
70
- 1.0
71
- elsif @text.respond_to?(:bytesize)
72
- # M17N aware VM
73
- # so we can trust String#size to return a character count
74
- @text.size.to_f
75
- else
76
- text.unpack("U*").size.to_f
77
- end
96
+ @text.size.to_f
78
97
  end
79
98
  end
80
99
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  class PDF::Reader
4
5
  # co-ordinate systems in PDF files are specified using a 3x3 matrix that looks
@@ -0,0 +1,17 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ class PDF::Reader
5
+
6
+ # Security handler for when we don't support the flavour of encryption
7
+ # used in a PDF.
8
+ class UnimplementedSecurityHandler
9
+ def self.supports?(encrypt)
10
+ true
11
+ end
12
+
13
+ def decrypt(buf, ref)
14
+ raise PDF::Reader::EncryptedPDFError, "Unsupported encryption style"
15
+ end
16
+ end
17
+ end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'afm'
4
5
  require 'pdf/reader/synchronized_cache'
@@ -11,11 +12,20 @@ class PDF::Reader
11
12
  # see Section 9.6.2.2, PDF 32000-1:2008, pp 256
12
13
  class BuiltIn
13
14
 
15
+ BUILTINS = [
16
+ :Courier, :"Courier-Bold", :"Courier-BoldOblique", :"Courier-Oblique",
17
+ :Helvetica, :"Helvetica-Bold", :"Helvetica-BoldOblique", :"Helvetica-Oblique",
18
+ :Symbol,
19
+ :"Times-Roman", :"Times-Bold", :"Times-BoldItalic", :"Times-Italic",
20
+ :ZapfDingbats
21
+ ]
22
+
14
23
  def initialize(font)
15
24
  @font = font
16
25
  @@all_metrics ||= PDF::Reader::SynchronizedCache.new
17
26
 
18
- metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{font.basefont}.afm")
27
+ basefont = extract_basefont(font.basefont)
28
+ metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{basefont}.afm")
19
29
 
20
30
  if File.file?(metrics_path)
21
31
  @metrics = @@all_metrics[metrics_path] ||= AFM::Font.new(metrics_path)
@@ -27,23 +37,15 @@ class PDF::Reader
27
37
  def glyph_width(code_point)
28
38
  return 0 if code_point.nil? || code_point < 0
29
39
 
30
- m = @metrics.char_metrics_by_code[code_point]
31
- if m.nil?
32
- names = @font.encoding.int_to_name(code_point)
33
-
34
- m = names.map { |name|
35
- @metrics.char_metrics[name.to_s]
36
- }.compact.first
37
- end
40
+ names = @font.encoding.int_to_name(code_point)
41
+ metrics = names.map { |name|
42
+ @metrics.char_metrics[name.to_s]
43
+ }.compact.first
38
44
 
39
- if m
40
- m[:wx]
41
- elsif @font.widths[code_point - 1]
42
- @font.widths[code_point - 1]
43
- elsif control_character?(code_point)
44
- 0
45
+ if metrics
46
+ metrics[:wx]
45
47
  else
46
- 0
48
+ @font.widths[code_point - 1] || 0
47
49
  end
48
50
  end
49
51
 
@@ -53,6 +55,13 @@ class PDF::Reader
53
55
  @font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
54
56
  end
55
57
 
58
+ def extract_basefont(font_name)
59
+ if BUILTINS.include?(font_name)
60
+ font_name
61
+ else
62
+ "Times-Roman"
63
+ end
64
+ end
56
65
  end
57
66
  end
58
67
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  class PDF::Reader
4
5
  module WidthCalculator
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  class PDF::Reader
4
5
  module WidthCalculator
@@ -17,8 +18,7 @@ class PDF::Reader
17
18
 
18
19
  def glyph_width(code_point)
19
20
  return 0 if code_point.nil? || code_point < 0
20
-
21
- glyph_width_from_font(code_point) || glyph_width_from_descriptor(code_point)
21
+ glyph_width_from_font(code_point) || glyph_width_from_descriptor(code_point) || 0
22
22
  end
23
23
 
24
24
  private
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  class PDF::Reader
4
5
  module WidthCalculator
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  class PDF::Reader
4
5
  module WidthCalculator
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  # PDF files may define fonts in a number of ways. Each approach means we must
4
5
  # calculate glyph widths differently, so this set of classes conform to an
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -203,8 +204,10 @@ class PDF::Reader
203
204
  ("\x00" + bytes).unpack("N")[0]
204
205
  elsif bytes.size == 4
205
206
  bytes.unpack("N")[0]
207
+ elsif bytes.size == 8
208
+ bytes.unpack("Q>")[0]
206
209
  else
207
- raise UnsupportedFeatureError, "Unable to unpack xref stream entries with more than 4 bytes"
210
+ raise UnsupportedFeatureError, "Unable to unpack xref stream entries of #{bytes.size} bytes"
208
211
  end
209
212
  end
210
213
  ################################################################################
@@ -227,18 +230,21 @@ class PDF::Reader
227
230
  # should always be 0, but all sort of crazy junk is prefixed to PDF files
228
231
  # in the real world.
229
232
  #
230
- # Checks up to 50 chars into the file, returns nil if no PDF data detected.
233
+ # Checks up to 1024 chars into the file,
234
+ # returns nil if no PDF data detected.
235
+ # Adobe PDF 1.4 spec (3.4.1) 12. Acrobat viewers require only that the
236
+ # header appear somewhere within the first 1024 bytes of the file
231
237
  #
232
238
  def calc_junk_offset(io)
233
239
  io.rewind
234
240
  offset = io.pos
235
- until (c = io.readchar) == '%' || c == 37 || offset > 50
241
+ until (c = io.readchar) == '%' || c == 37 || offset > 1024
236
242
  offset += 1
237
243
  end
238
244
  io.rewind
239
- offset < 50 ? offset : nil
245
+ offset < 1024 ? offset : nil
240
246
  rescue EOFError
241
- return nil
247
+ nil
242
248
  end
243
249
  end
244
250
  ################################################################################
data/lib/pdf/reader.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -110,16 +111,10 @@ module PDF
110
111
  #
111
112
  # reader = PDF::Reader.new("somefile.pdf", :password => "apples")
112
113
  #
113
- def initialize(input = nil, opts = {})
114
- if input # support the deprecated Reader API
115
- @cache = PDF::Reader::ObjectCache.new
116
- opts.merge!(:cache => @cache)
117
- @objects = PDF::Reader::ObjectHash.new(input, opts)
118
- else
119
- msg = "Calling PDF::Reader#new with no arguments is deprecated and will be removed "
120
- msg += "in the 2.0 release"
121
- $stderr.puts(msg)
122
- end
114
+ def initialize(input, opts = {})
115
+ @cache = PDF::Reader::ObjectCache.new
116
+ opts.merge!(:cache => @cache)
117
+ @objects = PDF::Reader::ObjectHash.new(input, opts)
123
118
  end
124
119
 
125
120
  def info
@@ -133,13 +128,16 @@ module PDF
133
128
  nil
134
129
  else
135
130
  xml = stream.unfiltered_data
136
- xml.force_encoding("utf-8") if xml.respond_to?(:force_encoding)
131
+ xml.force_encoding("utf-8")
137
132
  xml
138
133
  end
139
134
  end
140
135
 
141
136
  def page_count
142
137
  pages = @objects.deref(root[:Pages])
138
+ unless pages.kind_of?(::Hash)
139
+ raise MalformedPDFError, 'Pages structure is missing'
140
+ end
143
141
  @page_count ||= @objects.deref(pages[:Count])
144
142
  end
145
143
 
@@ -164,61 +162,6 @@ module PDF
164
162
  yield PDF::Reader.new(input, opts)
165
163
  end
166
164
 
167
- # DEPRECATED: this method was deprecated in version 1.0.0 and will
168
- # eventually be removed
169
- #
170
- #
171
- # Parse the file with the given name, sending events to the given receiver.
172
- #
173
- def self.file(name, receivers, opts = {})
174
- msg = "PDF::Reader#file is deprecated and will be removed in the 2.0 release"
175
- $stderr.puts(msg)
176
- File.open(name,"rb") do |f|
177
- new.parse(f, receivers, opts)
178
- end
179
- end
180
-
181
- # DEPRECATED: this method was deprecated in version 1.0.0 and will
182
- # eventually be removed
183
- #
184
- # Parse the given string, sending events to the given receiver.
185
- #
186
- def self.string(str, receivers, opts = {})
187
- msg = "PDF::Reader#string is deprecated and will be removed in the 2.0 release"
188
- $stderr.puts(msg)
189
- StringIO.open(str) do |s|
190
- new.parse(s, receivers, opts)
191
- end
192
- end
193
-
194
- # DEPRECATED: this method was deprecated in version 1.0.0 and will
195
- # eventually be removed
196
- #
197
- # Parse the file with the given name, returning an unmarshalled ruby version of
198
- # represents the requested pdf object
199
- #
200
- def self.object_file(name, id, gen = 0)
201
- msg = "PDF::Reader#object_file is deprecated and will be removed in the 2.0 release"
202
- $stderr.puts(msg)
203
- File.open(name,"rb") { |f|
204
- new.object(f, id.to_i, gen.to_i)
205
- }
206
- end
207
-
208
- # DEPRECATED: this method was deprecated in version 1.0.0 and will
209
- # eventually be removed
210
- #
211
- # Parse the given string, returning an unmarshalled ruby version of represents
212
- # the requested pdf object
213
- #
214
- def self.object_string(str, id, gen = 0)
215
- msg = "PDF::Reader#object_string is deprecated and will be removed in the 2.0 release"
216
- $stderr.puts(msg)
217
- StringIO.open(str) { |s|
218
- new.object(s, id.to_i, gen.to_i)
219
- }
220
- end
221
-
222
165
  # returns an array of PDF::Reader::Page objects, one for each
223
166
  # page in the source PDF.
224
167
  #
@@ -234,9 +177,13 @@ module PDF
234
177
  # methods available on each page
235
178
  #
236
179
  def pages
237
- (1..self.page_count).map { |num|
238
- PDF::Reader::Page.new(@objects, num, :cache => @cache)
239
- }
180
+ (1..self.page_count).map do |num|
181
+ begin
182
+ PDF::Reader::Page.new(@objects, num, :cache => @cache)
183
+ rescue InvalidPageError
184
+ raise MalformedPDFError, "Missing data for page: #{num}"
185
+ end
186
+ end
240
187
  end
241
188
 
242
189
  # returns a single PDF::Reader::Page for the specified page.
@@ -254,45 +201,11 @@ module PDF
254
201
  def page(num)
255
202
  num = num.to_i
256
203
  if num < 1 || num > self.page_count
257
- raise ArgumentError, "valid pages are 1 .. #{self.page_count}"
204
+ raise InvalidPageError, "Valid pages are 1 .. #{self.page_count}"
258
205
  end
259
206
  PDF::Reader::Page.new(@objects, num, :cache => @cache)
260
207
  end
261
208
 
262
-
263
- # DEPRECATED: this method was deprecated in version 1.0.0 and will
264
- # eventually be removed
265
- #
266
- # Given an IO object that contains PDF data, parse it.
267
- #
268
- def parse(io, receivers, opts = {})
269
- msg = "PDF::Reader#parse is deprecated and will be removed in the 2.0 release"
270
- $stderr.puts(msg)
271
- ohash = ObjectHash.new(io)
272
-
273
- options = {:pages => true, :raw_text => false, :metadata => true}
274
- options.merge!(opts)
275
-
276
- strategies.each do |s|
277
- s.new(ohash, receivers, options).process
278
- end
279
-
280
- self
281
- end
282
-
283
- # DEPRECATED: this method was deprecated in version 1.0.0 and will
284
- # eventually be removed
285
- #
286
- # Given an IO object that contains PDF data, return the contents of a single object
287
- #
288
- def object(io, id, gen)
289
- msg = "PDF::Reader#object is deprecated and will be removed in the 2.0 release"
290
- $stderr.puts(msg)
291
- @objects = ObjectHash.new(io)
292
-
293
- @objects.deref(Reference.new(id, gen))
294
- end
295
-
296
209
  private
297
210
 
298
211
  # recursively convert strings from outside a content stream into UTF-8
@@ -314,14 +227,14 @@ module PDF
314
227
  pdfdoc_to_utf8(obj)
315
228
  end
316
229
  else
317
- obj
230
+ @objects.deref(obj)
318
231
  end
319
232
  end
320
233
 
321
234
  # TODO find a PDF I can use to spec this behaviour
322
235
  #
323
236
  def pdfdoc_to_utf8(obj)
324
- obj.force_encoding("utf-8") if obj.respond_to?(:force_encoding)
237
+ obj.force_encoding("utf-8")
325
238
  obj
326
239
  end
327
240
 
@@ -331,19 +244,18 @@ module PDF
331
244
  def utf16_to_utf8(obj)
332
245
  str = obj[2, obj.size]
333
246
  str = str.unpack("n*").pack("U*")
334
- str.force_encoding("utf-8") if str.respond_to?(:force_encoding)
247
+ str.force_encoding("utf-8")
335
248
  str
336
249
  end
337
250
 
338
- def strategies
339
- @strategies ||= [
340
- ::PDF::Reader::MetadataStrategy,
341
- ::PDF::Reader::PagesStrategy
342
- ]
343
- end
344
-
345
251
  def root
346
- @root ||= @objects.deref(@objects.trailer[:Root])
252
+ @root ||= begin
253
+ obj = @objects.deref(@objects.trailer[:Root])
254
+ unless obj.kind_of?(::Hash)
255
+ raise MalformedPDFError, "PDF malformed, trailer Root should be a dictionary"
256
+ end
257
+ obj
258
+ end
347
259
  end
348
260
 
349
261
  end
@@ -351,7 +263,6 @@ end
351
263
  ################################################################################
352
264
 
353
265
  require 'pdf/reader/resource_methods'
354
- require 'pdf/reader/abstract_strategy'
355
266
  require 'pdf/reader/buffer'
356
267
  require 'pdf/reader/cid_widths'
357
268
  require 'pdf/reader/cmap'
@@ -370,7 +281,6 @@ require 'pdf/reader/font_descriptor'
370
281
  require 'pdf/reader/form_xobject'
371
282
  require 'pdf/reader/glyph_hash'
372
283
  require 'pdf/reader/lzw'
373
- require 'pdf/reader/metadata_strategy'
374
284
  require 'pdf/reader/object_cache'
375
285
  require 'pdf/reader/object_hash'
376
286
  require 'pdf/reader/object_stream'
@@ -379,9 +289,11 @@ require 'pdf/reader/parser'
379
289
  require 'pdf/reader/print_receiver'
380
290
  require 'pdf/reader/reference'
381
291
  require 'pdf/reader/register_receiver'
292
+ require 'pdf/reader/null_security_handler'
382
293
  require 'pdf/reader/standard_security_handler'
294
+ require 'pdf/reader/standard_security_handler_v5'
295
+ require 'pdf/reader/unimplemented_security_handler'
383
296
  require 'pdf/reader/stream'
384
- require 'pdf/reader/text_receiver'
385
297
  require 'pdf/reader/text_run'
386
298
  require 'pdf/reader/page_state'
387
299
  require 'pdf/reader/page_text_receiver'
@@ -389,4 +301,3 @@ require 'pdf/reader/token'
389
301
  require 'pdf/reader/xref'
390
302
  require 'pdf/reader/orientation_detector'
391
303
  require 'pdf/reader/page'
392
- require 'pdf/hash'
data/lib/pdf-reader.rb CHANGED
@@ -1,3 +1,4 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  require "pdf/reader"