pdf-reader 1.4.1 → 2.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG +53 -3
  3. data/{README.rdoc → README.md} +40 -23
  4. data/Rakefile +2 -2
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_object +4 -1
  7. data/bin/pdf_text +1 -1
  8. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
  9. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
  10. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
  11. data/lib/pdf/reader/afm/Courier.afm +342 -342
  12. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
  13. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
  14. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
  15. data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
  16. data/lib/pdf/reader/afm/MustRead.html +19 -0
  17. data/lib/pdf/reader/afm/Symbol.afm +213 -213
  18. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
  19. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
  20. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
  21. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
  22. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
  23. data/lib/pdf/reader/buffer.rb +14 -12
  24. data/lib/pdf/reader/cid_widths.rb +2 -0
  25. data/lib/pdf/reader/cmap.rb +48 -36
  26. data/lib/pdf/reader/encoding.rb +16 -18
  27. data/lib/pdf/reader/error.rb +5 -0
  28. data/lib/pdf/reader/filter/ascii85.rb +1 -0
  29. data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
  30. data/lib/pdf/reader/filter/depredict.rb +1 -0
  31. data/lib/pdf/reader/filter/flate.rb +29 -16
  32. data/lib/pdf/reader/filter/lzw.rb +2 -0
  33. data/lib/pdf/reader/filter/null.rb +2 -0
  34. data/lib/pdf/reader/filter/run_length.rb +4 -6
  35. data/lib/pdf/reader/filter.rb +2 -0
  36. data/lib/pdf/reader/font.rb +12 -13
  37. data/lib/pdf/reader/font_descriptor.rb +1 -0
  38. data/lib/pdf/reader/form_xobject.rb +1 -0
  39. data/lib/pdf/reader/glyph_hash.rb +7 -2
  40. data/lib/pdf/reader/lzw.rb +4 -4
  41. data/lib/pdf/reader/null_security_handler.rb +17 -0
  42. data/lib/pdf/reader/object_cache.rb +1 -0
  43. data/lib/pdf/reader/object_hash.rb +91 -37
  44. data/lib/pdf/reader/object_stream.rb +1 -0
  45. data/lib/pdf/reader/orientation_detector.rb +5 -4
  46. data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
  47. data/lib/pdf/reader/page.rb +30 -1
  48. data/lib/pdf/reader/page_layout.rb +19 -24
  49. data/lib/pdf/reader/page_state.rb +8 -5
  50. data/lib/pdf/reader/page_text_receiver.rb +23 -1
  51. data/lib/pdf/reader/pages_strategy.rb +2 -304
  52. data/lib/pdf/reader/parser.rb +10 -7
  53. data/lib/pdf/reader/print_receiver.rb +1 -0
  54. data/lib/pdf/reader/reference.rb +1 -0
  55. data/lib/pdf/reader/register_receiver.rb +1 -0
  56. data/lib/pdf/reader/resource_methods.rb +1 -0
  57. data/lib/pdf/reader/standard_security_handler.rb +80 -42
  58. data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
  59. data/lib/pdf/reader/stream.rb +1 -0
  60. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  61. data/lib/pdf/reader/text_run.rb +28 -9
  62. data/lib/pdf/reader/token.rb +1 -0
  63. data/lib/pdf/reader/transformation_matrix.rb +1 -0
  64. data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
  65. data/lib/pdf/reader/width_calculator/built_in.rb +25 -16
  66. data/lib/pdf/reader/width_calculator/composite.rb +1 -0
  67. data/lib/pdf/reader/width_calculator/true_type.rb +2 -2
  68. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
  69. data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
  70. data/lib/pdf/reader/width_calculator.rb +1 -0
  71. data/lib/pdf/reader/xref.rb +11 -5
  72. data/lib/pdf/reader.rb +30 -119
  73. data/lib/pdf-reader.rb +1 -0
  74. metadata +35 -61
  75. data/bin/pdf_list_callbacks +0 -17
  76. data/lib/pdf/hash.rb +0 -19
  77. data/lib/pdf/reader/abstract_strategy.rb +0 -81
  78. data/lib/pdf/reader/metadata_strategy.rb +0 -56
  79. data/lib/pdf/reader/text_receiver.rb +0 -265
@@ -0,0 +1,91 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require 'digest'
5
+ require 'openssl'
6
+
7
+ class PDF::Reader
8
+
9
+ # class creates interface to encrypt dictionary for use in Decrypt
10
+ class StandardSecurityHandlerV5
11
+
12
+ attr_reader :key_length, :encrypt_key
13
+
14
+ def initialize(opts = {})
15
+ @key_length = 256
16
+ @O = opts[:O] # hash(32B) + validation salt(8B) + key salt(8B)
17
+ @U = opts[:U] # hash(32B) + validation salt(8B) + key salt(8B)
18
+ @OE = opts[:OE] # decryption key, encrypted w/ owner password
19
+ @UE = opts[:UE] # decryption key, encrypted w/ user password
20
+ @encrypt_key = build_standard_key(opts[:password] || '')
21
+ end
22
+
23
+ # This handler supports AES-256 encryption defined in PDF 1.7 Extension Level 3
24
+ def self.supports?(encrypt)
25
+ return false if encrypt.nil?
26
+
27
+ filter = encrypt.fetch(:Filter, :Standard)
28
+ version = encrypt.fetch(:V, 0)
29
+ revision = encrypt.fetch(:R, 0)
30
+ algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
31
+ (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
32
+ ((version == 5) && (revision == 5) && (algorithm == :AESV3))
33
+ end
34
+
35
+ ##7.6.2 General Encryption Algorithm
36
+ #
37
+ # Algorithm 1: Encryption of data using the RC4 or AES algorithms
38
+ #
39
+ # used to decrypt RC4/AES encrypted PDF streams (buf)
40
+ #
41
+ # buf - a string to decrypt
42
+ # ref - a PDF::Reader::Reference for the object to decrypt
43
+ #
44
+ def decrypt( buf, ref )
45
+ cipher = OpenSSL::Cipher.new("AES-#{@key_length}-CBC")
46
+ cipher.decrypt
47
+ cipher.key = @encrypt_key.dup
48
+ cipher.iv = buf[0..15]
49
+ cipher.update(buf[16..-1]) + cipher.final
50
+ end
51
+
52
+ private
53
+ # Algorithm 3.2a - Computing an encryption key
54
+ #
55
+ # Defined in PDF 1.7 Extension Level 3
56
+ #
57
+ # if the string is a valid user/owner password, this will return the decryption key
58
+ #
59
+ def auth_owner_pass(password)
60
+ if Digest::SHA256.digest(password + @O[32..39] + @U) == @O[0..31]
61
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
62
+ cipher.decrypt
63
+ cipher.key = Digest::SHA256.digest(password + @O[40..-1] + @U)
64
+ cipher.iv = "\x00" * 16
65
+ cipher.padding = 0
66
+ cipher.update(@OE) + cipher.final
67
+ end
68
+ end
69
+
70
+ def auth_user_pass(password)
71
+ if Digest::SHA256.digest(password + @U[32..39]) == @U[0..31]
72
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
73
+ cipher.decrypt
74
+ cipher.key = Digest::SHA256.digest(password + @U[40..-1])
75
+ cipher.iv = "\x00" * 16
76
+ cipher.padding = 0
77
+ cipher.update(@UE) + cipher.final
78
+ end
79
+ end
80
+
81
+ def build_standard_key(pass)
82
+ pass = pass.byteslice(0...127) # UTF-8 encoded password. first 127 bytes
83
+
84
+ encrypt_key = auth_owner_pass(pass)
85
+ encrypt_key ||= auth_user_pass(pass)
86
+
87
+ raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
88
+ encrypt_key
89
+ end
90
+ end
91
+ end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  # utilities.rb : General-purpose utility classes which don't fit anywhere else
4
5
  #
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  class PDF::Reader
4
5
  # A value object that represents one or more consecutive characters on a page.
@@ -37,6 +38,10 @@ class PDF::Reader
37
38
  @endx ||= x + width
38
39
  end
39
40
 
41
+ def endy
42
+ @endy ||= y + font_size
43
+ end
44
+
40
45
  def mean_character_width
41
46
  @width / character_count
42
47
  end
@@ -59,22 +64,36 @@ class PDF::Reader
59
64
  "#{text} w:#{width} f:#{font_size} @#{x},#{y}"
60
65
  end
61
66
 
67
+ def intersect?(other_run)
68
+ x <= other_run.endx && endx >= other_run.x &&
69
+ endy >= other_run.y && y <= other_run.endy
70
+ end
71
+
72
+ # return what percentage of this text run is overlapped by another run
73
+ def intersection_area_percent(other_run)
74
+ return 0 unless intersect?(other_run)
75
+
76
+ dx = [endx, other_run.endx].min - [x, other_run.x].max
77
+ dy = [endy, other_run.endy].min - [y, other_run.y].max
78
+ intersection_area = dx*dy
79
+
80
+ intersection_area.to_f / area
81
+ end
82
+
62
83
  private
63
84
 
85
+ def area
86
+ (endx - x) * (endy - y)
87
+ end
88
+
64
89
  def mergable_range
65
90
  @mergable_range ||= Range.new(endx - 3, endx + font_size)
66
91
  end
67
92
 
93
+ # Assume string encoding is marked correctly and we can trust String#size to return a
94
+ # character count
68
95
  def character_count
69
- if @text.size == 1
70
- 1.0
71
- elsif @text.respond_to?(:bytesize)
72
- # M17N aware VM
73
- # so we can trust String#size to return a character count
74
- @text.size.to_f
75
- else
76
- text.unpack("U*").size.to_f
77
- end
96
+ @text.size.to_f
78
97
  end
79
98
  end
80
99
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  class PDF::Reader
4
5
  # co-ordinate systems in PDF files are specified using a 3x3 matrix that looks
@@ -0,0 +1,17 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ class PDF::Reader
5
+
6
+ # Security handler for when we don't support the flavour of encryption
7
+ # used in a PDF.
8
+ class UnimplementedSecurityHandler
9
+ def self.supports?(encrypt)
10
+ true
11
+ end
12
+
13
+ def decrypt(buf, ref)
14
+ raise PDF::Reader::EncryptedPDFError, "Unsupported encryption style"
15
+ end
16
+ end
17
+ end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'afm'
4
5
  require 'pdf/reader/synchronized_cache'
@@ -11,11 +12,20 @@ class PDF::Reader
11
12
  # see Section 9.6.2.2, PDF 32000-1:2008, pp 256
12
13
  class BuiltIn
13
14
 
15
+ BUILTINS = [
16
+ :Courier, :"Courier-Bold", :"Courier-BoldOblique", :"Courier-Oblique",
17
+ :Helvetica, :"Helvetica-Bold", :"Helvetica-BoldOblique", :"Helvetica-Oblique",
18
+ :Symbol,
19
+ :"Times-Roman", :"Times-Bold", :"Times-BoldItalic", :"Times-Italic",
20
+ :ZapfDingbats
21
+ ]
22
+
14
23
  def initialize(font)
15
24
  @font = font
16
25
  @@all_metrics ||= PDF::Reader::SynchronizedCache.new
17
26
 
18
- metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{font.basefont}.afm")
27
+ basefont = extract_basefont(font.basefont)
28
+ metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{basefont}.afm")
19
29
 
20
30
  if File.file?(metrics_path)
21
31
  @metrics = @@all_metrics[metrics_path] ||= AFM::Font.new(metrics_path)
@@ -27,23 +37,15 @@ class PDF::Reader
27
37
  def glyph_width(code_point)
28
38
  return 0 if code_point.nil? || code_point < 0
29
39
 
30
- m = @metrics.char_metrics_by_code[code_point]
31
- if m.nil?
32
- names = @font.encoding.int_to_name(code_point)
33
-
34
- m = names.map { |name|
35
- @metrics.char_metrics[name.to_s]
36
- }.compact.first
37
- end
40
+ names = @font.encoding.int_to_name(code_point)
41
+ metrics = names.map { |name|
42
+ @metrics.char_metrics[name.to_s]
43
+ }.compact.first
38
44
 
39
- if m
40
- m[:wx]
41
- elsif @font.widths[code_point - 1]
42
- @font.widths[code_point - 1]
43
- elsif control_character?(code_point)
44
- 0
45
+ if metrics
46
+ metrics[:wx]
45
47
  else
46
- 0
48
+ @font.widths[code_point - 1] || 0
47
49
  end
48
50
  end
49
51
 
@@ -53,6 +55,13 @@ class PDF::Reader
53
55
  @font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
54
56
  end
55
57
 
58
+ def extract_basefont(font_name)
59
+ if BUILTINS.include?(font_name)
60
+ font_name
61
+ else
62
+ "Times-Roman"
63
+ end
64
+ end
56
65
  end
57
66
  end
58
67
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  class PDF::Reader
4
5
  module WidthCalculator
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  class PDF::Reader
4
5
  module WidthCalculator
@@ -17,8 +18,7 @@ class PDF::Reader
17
18
 
18
19
  def glyph_width(code_point)
19
20
  return 0 if code_point.nil? || code_point < 0
20
-
21
- glyph_width_from_font(code_point) || glyph_width_from_descriptor(code_point)
21
+ glyph_width_from_font(code_point) || glyph_width_from_descriptor(code_point) || 0
22
22
  end
23
23
 
24
24
  private
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  class PDF::Reader
4
5
  module WidthCalculator
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  class PDF::Reader
4
5
  module WidthCalculator
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  # PDF files may define fonts in a number of ways. Each approach means we must
4
5
  # calculate glyph widths differently, so this set of classes conform to an
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -203,8 +204,10 @@ class PDF::Reader
203
204
  ("\x00" + bytes).unpack("N")[0]
204
205
  elsif bytes.size == 4
205
206
  bytes.unpack("N")[0]
207
+ elsif bytes.size == 8
208
+ bytes.unpack("Q>")[0]
206
209
  else
207
- raise UnsupportedFeatureError, "Unable to unpack xref stream entries with more than 4 bytes"
210
+ raise UnsupportedFeatureError, "Unable to unpack xref stream entries of #{bytes.size} bytes"
208
211
  end
209
212
  end
210
213
  ################################################################################
@@ -227,18 +230,21 @@ class PDF::Reader
227
230
  # should always be 0, but all sort of crazy junk is prefixed to PDF files
228
231
  # in the real world.
229
232
  #
230
- # Checks up to 50 chars into the file, returns nil if no PDF data detected.
233
+ # Checks up to 1024 chars into the file,
234
+ # returns nil if no PDF data detected.
235
+ # Adobe PDF 1.4 spec (3.4.1) 12. Acrobat viewers require only that the
236
+ # header appear somewhere within the first 1024 bytes of the file
231
237
  #
232
238
  def calc_junk_offset(io)
233
239
  io.rewind
234
240
  offset = io.pos
235
- until (c = io.readchar) == '%' || c == 37 || offset > 50
241
+ until (c = io.readchar) == '%' || c == 37 || offset > 1024
236
242
  offset += 1
237
243
  end
238
244
  io.rewind
239
- offset < 50 ? offset : nil
245
+ offset < 1024 ? offset : nil
240
246
  rescue EOFError
241
- return nil
247
+ nil
242
248
  end
243
249
  end
244
250
  ################################################################################
data/lib/pdf/reader.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -110,16 +111,10 @@ module PDF
110
111
  #
111
112
  # reader = PDF::Reader.new("somefile.pdf", :password => "apples")
112
113
  #
113
- def initialize(input = nil, opts = {})
114
- if input # support the deprecated Reader API
115
- @cache = PDF::Reader::ObjectCache.new
116
- opts.merge!(:cache => @cache)
117
- @objects = PDF::Reader::ObjectHash.new(input, opts)
118
- else
119
- msg = "Calling PDF::Reader#new with no arguments is deprecated and will be removed "
120
- msg += "in the 2.0 release"
121
- $stderr.puts(msg)
122
- end
114
+ def initialize(input, opts = {})
115
+ @cache = PDF::Reader::ObjectCache.new
116
+ opts.merge!(:cache => @cache)
117
+ @objects = PDF::Reader::ObjectHash.new(input, opts)
123
118
  end
124
119
 
125
120
  def info
@@ -133,13 +128,16 @@ module PDF
133
128
  nil
134
129
  else
135
130
  xml = stream.unfiltered_data
136
- xml.force_encoding("utf-8") if xml.respond_to?(:force_encoding)
131
+ xml.force_encoding("utf-8")
137
132
  xml
138
133
  end
139
134
  end
140
135
 
141
136
  def page_count
142
137
  pages = @objects.deref(root[:Pages])
138
+ unless pages.kind_of?(::Hash)
139
+ raise MalformedPDFError, 'Pages structure is missing'
140
+ end
143
141
  @page_count ||= @objects.deref(pages[:Count])
144
142
  end
145
143
 
@@ -164,61 +162,6 @@ module PDF
164
162
  yield PDF::Reader.new(input, opts)
165
163
  end
166
164
 
167
- # DEPRECATED: this method was deprecated in version 1.0.0 and will
168
- # eventually be removed
169
- #
170
- #
171
- # Parse the file with the given name, sending events to the given receiver.
172
- #
173
- def self.file(name, receivers, opts = {})
174
- msg = "PDF::Reader#file is deprecated and will be removed in the 2.0 release"
175
- $stderr.puts(msg)
176
- File.open(name,"rb") do |f|
177
- new.parse(f, receivers, opts)
178
- end
179
- end
180
-
181
- # DEPRECATED: this method was deprecated in version 1.0.0 and will
182
- # eventually be removed
183
- #
184
- # Parse the given string, sending events to the given receiver.
185
- #
186
- def self.string(str, receivers, opts = {})
187
- msg = "PDF::Reader#string is deprecated and will be removed in the 2.0 release"
188
- $stderr.puts(msg)
189
- StringIO.open(str) do |s|
190
- new.parse(s, receivers, opts)
191
- end
192
- end
193
-
194
- # DEPRECATED: this method was deprecated in version 1.0.0 and will
195
- # eventually be removed
196
- #
197
- # Parse the file with the given name, returning an unmarshalled ruby version of
198
- # represents the requested pdf object
199
- #
200
- def self.object_file(name, id, gen = 0)
201
- msg = "PDF::Reader#object_file is deprecated and will be removed in the 2.0 release"
202
- $stderr.puts(msg)
203
- File.open(name,"rb") { |f|
204
- new.object(f, id.to_i, gen.to_i)
205
- }
206
- end
207
-
208
- # DEPRECATED: this method was deprecated in version 1.0.0 and will
209
- # eventually be removed
210
- #
211
- # Parse the given string, returning an unmarshalled ruby version of represents
212
- # the requested pdf object
213
- #
214
- def self.object_string(str, id, gen = 0)
215
- msg = "PDF::Reader#object_string is deprecated and will be removed in the 2.0 release"
216
- $stderr.puts(msg)
217
- StringIO.open(str) { |s|
218
- new.object(s, id.to_i, gen.to_i)
219
- }
220
- end
221
-
222
165
  # returns an array of PDF::Reader::Page objects, one for each
223
166
  # page in the source PDF.
224
167
  #
@@ -234,9 +177,13 @@ module PDF
234
177
  # methods available on each page
235
178
  #
236
179
  def pages
237
- (1..self.page_count).map { |num|
238
- PDF::Reader::Page.new(@objects, num, :cache => @cache)
239
- }
180
+ (1..self.page_count).map do |num|
181
+ begin
182
+ PDF::Reader::Page.new(@objects, num, :cache => @cache)
183
+ rescue InvalidPageError
184
+ raise MalformedPDFError, "Missing data for page: #{num}"
185
+ end
186
+ end
240
187
  end
241
188
 
242
189
  # returns a single PDF::Reader::Page for the specified page.
@@ -254,45 +201,11 @@ module PDF
254
201
  def page(num)
255
202
  num = num.to_i
256
203
  if num < 1 || num > self.page_count
257
- raise ArgumentError, "valid pages are 1 .. #{self.page_count}"
204
+ raise InvalidPageError, "Valid pages are 1 .. #{self.page_count}"
258
205
  end
259
206
  PDF::Reader::Page.new(@objects, num, :cache => @cache)
260
207
  end
261
208
 
262
-
263
- # DEPRECATED: this method was deprecated in version 1.0.0 and will
264
- # eventually be removed
265
- #
266
- # Given an IO object that contains PDF data, parse it.
267
- #
268
- def parse(io, receivers, opts = {})
269
- msg = "PDF::Reader#parse is deprecated and will be removed in the 2.0 release"
270
- $stderr.puts(msg)
271
- ohash = ObjectHash.new(io)
272
-
273
- options = {:pages => true, :raw_text => false, :metadata => true}
274
- options.merge!(opts)
275
-
276
- strategies.each do |s|
277
- s.new(ohash, receivers, options).process
278
- end
279
-
280
- self
281
- end
282
-
283
- # DEPRECATED: this method was deprecated in version 1.0.0 and will
284
- # eventually be removed
285
- #
286
- # Given an IO object that contains PDF data, return the contents of a single object
287
- #
288
- def object(io, id, gen)
289
- msg = "PDF::Reader#object is deprecated and will be removed in the 2.0 release"
290
- $stderr.puts(msg)
291
- @objects = ObjectHash.new(io)
292
-
293
- @objects.deref(Reference.new(id, gen))
294
- end
295
-
296
209
  private
297
210
 
298
211
  # recursively convert strings from outside a content stream into UTF-8
@@ -314,14 +227,14 @@ module PDF
314
227
  pdfdoc_to_utf8(obj)
315
228
  end
316
229
  else
317
- obj
230
+ @objects.deref(obj)
318
231
  end
319
232
  end
320
233
 
321
234
  # TODO find a PDF I can use to spec this behaviour
322
235
  #
323
236
  def pdfdoc_to_utf8(obj)
324
- obj.force_encoding("utf-8") if obj.respond_to?(:force_encoding)
237
+ obj.force_encoding("utf-8")
325
238
  obj
326
239
  end
327
240
 
@@ -331,19 +244,18 @@ module PDF
331
244
  def utf16_to_utf8(obj)
332
245
  str = obj[2, obj.size]
333
246
  str = str.unpack("n*").pack("U*")
334
- str.force_encoding("utf-8") if str.respond_to?(:force_encoding)
247
+ str.force_encoding("utf-8")
335
248
  str
336
249
  end
337
250
 
338
- def strategies
339
- @strategies ||= [
340
- ::PDF::Reader::MetadataStrategy,
341
- ::PDF::Reader::PagesStrategy
342
- ]
343
- end
344
-
345
251
  def root
346
- @root ||= @objects.deref(@objects.trailer[:Root])
252
+ @root ||= begin
253
+ obj = @objects.deref(@objects.trailer[:Root])
254
+ unless obj.kind_of?(::Hash)
255
+ raise MalformedPDFError, "PDF malformed, trailer Root should be a dictionary"
256
+ end
257
+ obj
258
+ end
347
259
  end
348
260
 
349
261
  end
@@ -351,7 +263,6 @@ end
351
263
  ################################################################################
352
264
 
353
265
  require 'pdf/reader/resource_methods'
354
- require 'pdf/reader/abstract_strategy'
355
266
  require 'pdf/reader/buffer'
356
267
  require 'pdf/reader/cid_widths'
357
268
  require 'pdf/reader/cmap'
@@ -370,7 +281,6 @@ require 'pdf/reader/font_descriptor'
370
281
  require 'pdf/reader/form_xobject'
371
282
  require 'pdf/reader/glyph_hash'
372
283
  require 'pdf/reader/lzw'
373
- require 'pdf/reader/metadata_strategy'
374
284
  require 'pdf/reader/object_cache'
375
285
  require 'pdf/reader/object_hash'
376
286
  require 'pdf/reader/object_stream'
@@ -379,9 +289,11 @@ require 'pdf/reader/parser'
379
289
  require 'pdf/reader/print_receiver'
380
290
  require 'pdf/reader/reference'
381
291
  require 'pdf/reader/register_receiver'
292
+ require 'pdf/reader/null_security_handler'
382
293
  require 'pdf/reader/standard_security_handler'
294
+ require 'pdf/reader/standard_security_handler_v5'
295
+ require 'pdf/reader/unimplemented_security_handler'
383
296
  require 'pdf/reader/stream'
384
- require 'pdf/reader/text_receiver'
385
297
  require 'pdf/reader/text_run'
386
298
  require 'pdf/reader/page_state'
387
299
  require 'pdf/reader/page_text_receiver'
@@ -389,4 +301,3 @@ require 'pdf/reader/token'
389
301
  require 'pdf/reader/xref'
390
302
  require 'pdf/reader/orientation_detector'
391
303
  require 'pdf/reader/page'
392
- require 'pdf/hash'
data/lib/pdf-reader.rb CHANGED
@@ -1,3 +1,4 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  require "pdf/reader"