pdf-reader 2.6.0 → 2.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +21 -1
  3. data/Rakefile +1 -1
  4. data/examples/rspec.rb +1 -0
  5. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  6. data/lib/pdf/reader/buffer.rb +1 -0
  7. data/lib/pdf/reader/cid_widths.rb +1 -0
  8. data/lib/pdf/reader/cmap.rb +5 -3
  9. data/lib/pdf/reader/encoding.rb +2 -1
  10. data/lib/pdf/reader/error.rb +8 -0
  11. data/lib/pdf/reader/filter/ascii85.rb +2 -0
  12. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  13. data/lib/pdf/reader/filter/depredict.rb +7 -5
  14. data/lib/pdf/reader/filter/flate.rb +2 -0
  15. data/lib/pdf/reader/filter/lzw.rb +2 -0
  16. data/lib/pdf/reader/filter/null.rb +1 -0
  17. data/lib/pdf/reader/filter/run_length.rb +19 -13
  18. data/lib/pdf/reader/filter.rb +1 -0
  19. data/lib/pdf/reader/font.rb +44 -0
  20. data/lib/pdf/reader/font_descriptor.rb +1 -0
  21. data/lib/pdf/reader/form_xobject.rb +1 -0
  22. data/lib/pdf/reader/glyph_hash.rb +1 -0
  23. data/lib/pdf/reader/lzw.rb +4 -2
  24. data/lib/pdf/reader/null_security_handler.rb +1 -0
  25. data/lib/pdf/reader/object_cache.rb +1 -0
  26. data/lib/pdf/reader/object_hash.rb +5 -2
  27. data/lib/pdf/reader/object_stream.rb +1 -0
  28. data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
  29. data/lib/pdf/reader/page.rb +73 -11
  30. data/lib/pdf/reader/page_layout.rb +28 -32
  31. data/lib/pdf/reader/page_state.rb +11 -10
  32. data/lib/pdf/reader/page_text_receiver.rb +53 -9
  33. data/lib/pdf/reader/pages_strategy.rb +1 -0
  34. data/lib/pdf/reader/parser.rb +7 -1
  35. data/lib/pdf/reader/point.rb +25 -0
  36. data/lib/pdf/reader/print_receiver.rb +1 -0
  37. data/lib/pdf/reader/rectangle.rb +113 -0
  38. data/lib/pdf/reader/reference.rb +1 -0
  39. data/lib/pdf/reader/register_receiver.rb +1 -0
  40. data/lib/pdf/reader/resource_methods.rb +5 -0
  41. data/lib/pdf/reader/standard_security_handler.rb +1 -0
  42. data/lib/pdf/reader/standard_security_handler_v5.rb +1 -0
  43. data/lib/pdf/reader/stream.rb +1 -0
  44. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  45. data/lib/pdf/reader/text_run.rb +14 -6
  46. data/lib/pdf/reader/token.rb +1 -0
  47. data/lib/pdf/reader/transformation_matrix.rb +1 -0
  48. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  49. data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
  50. data/lib/pdf/reader/width_calculator/composite.rb +1 -0
  51. data/lib/pdf/reader/width_calculator/true_type.rb +1 -0
  52. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
  53. data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
  54. data/lib/pdf/reader/width_calculator.rb +1 -0
  55. data/lib/pdf/reader/xref.rb +1 -0
  56. data/lib/pdf/reader/zero_width_runs_filter.rb +2 -0
  57. data/lib/pdf/reader.rb +29 -6
  58. data/lib/pdf-reader.rb +1 -0
  59. data/rbi/pdf-reader.rbi +1763 -0
  60. metadata +13 -10
  61. data/lib/pdf/reader/orientation_detector.rb +0 -34
data/lib/pdf/reader.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -111,17 +112,25 @@ module PDF
111
112
  #
112
113
  # reader = PDF::Reader.new("somefile.pdf", :password => "apples")
113
114
  #
115
+ # Using this method directly is supported, but it's more common to use
116
+ # `PDF::Reader.open`
117
+ #
114
118
  def initialize(input, opts = {})
115
119
  @cache = PDF::Reader::ObjectCache.new
116
120
  opts.merge!(:cache => @cache)
117
121
  @objects = PDF::Reader::ObjectHash.new(input, opts)
118
122
  end
119
123
 
124
+ # Return a Hash with some basic information about the PDF file
125
+ #
120
126
  def info
121
127
  dict = @objects.deref(@objects.trailer[:Info])
122
128
  doc_strings_to_utf8(dict)
123
129
  end
124
130
 
131
+ # Return a Hash with extra metadata provided by the author of the PDF file. Not
132
+ # always present.
133
+ #
125
134
  def metadata
126
135
  stream = @objects.deref(root[:Metadata])
127
136
  if stream.nil?
@@ -133,20 +142,24 @@ module PDF
133
142
  end
134
143
  end
135
144
 
145
+ # To number of pages in this PDF
146
+ #
136
147
  def page_count
137
148
  pages = @objects.deref(root[:Pages])
138
149
  unless pages.kind_of?(::Hash)
139
- raise MalformedPDFError, 'Pages structure is missing'
150
+ raise MalformedPDFError, "Pages structure is missing #{pages.class}"
140
151
  end
141
152
  @page_count ||= @objects.deref(pages[:Count])
142
153
  end
143
154
 
155
+ # The PDF version this file uses
156
+ #
144
157
  def pdf_version
145
158
  @objects.pdf_version
146
159
  end
147
160
 
148
- # syntactic sugar for opening a PDF file. Accepts the same arguments
149
- # as new().
161
+ # syntactic sugar for opening a PDF file and the most common approach. Accepts the
162
+ # same arguments as new().
150
163
  #
151
164
  # PDF::Reader.open("somefile.pdf") do |reader|
152
165
  # puts reader.pdf_version
@@ -221,7 +234,7 @@ module PDF
221
234
  when Array then
222
235
  obj.map { |item| doc_strings_to_utf8(item) }
223
236
  when String then
224
- if obj[0,2].unpack("C*") == [254, 255]
237
+ if has_utf16_bom?(obj)
225
238
  utf16_to_utf8(obj)
226
239
  else
227
240
  pdfdoc_to_utf8(obj)
@@ -231,6 +244,14 @@ module PDF
231
244
  end
232
245
  end
233
246
 
247
+ def has_utf16_bom?(str)
248
+ first_bytes = str[0,2]
249
+
250
+ return false if first_bytes.nil?
251
+
252
+ first_bytes.unpack("C*") == [254, 255]
253
+ end
254
+
234
255
  # TODO find a PDF I can use to spec this behaviour
235
256
  #
236
257
  def pdfdoc_to_utf8(obj)
@@ -242,7 +263,7 @@ module PDF
242
263
  # String#encode
243
264
  #
244
265
  def utf16_to_utf8(obj)
245
- str = obj[2, obj.size]
266
+ str = obj[2, obj.size].to_s
246
267
  str = str.unpack("n*").pack("U*")
247
268
  str.force_encoding("utf-8")
248
269
  str
@@ -264,6 +285,7 @@ end
264
285
 
265
286
  require 'pdf/reader/resource_methods'
266
287
  require 'pdf/reader/buffer'
288
+ require 'pdf/reader/bounding_rectangle_runs_filter'
267
289
  require 'pdf/reader/cid_widths'
268
290
  require 'pdf/reader/cmap'
269
291
  require 'pdf/reader/encoding'
@@ -286,7 +308,9 @@ require 'pdf/reader/object_hash'
286
308
  require 'pdf/reader/object_stream'
287
309
  require 'pdf/reader/pages_strategy'
288
310
  require 'pdf/reader/parser'
311
+ require 'pdf/reader/point'
289
312
  require 'pdf/reader/print_receiver'
313
+ require 'pdf/reader/rectangle'
290
314
  require 'pdf/reader/reference'
291
315
  require 'pdf/reader/register_receiver'
292
316
  require 'pdf/reader/null_security_handler'
@@ -299,5 +323,4 @@ require 'pdf/reader/page_state'
299
323
  require 'pdf/reader/page_text_receiver'
300
324
  require 'pdf/reader/token'
301
325
  require 'pdf/reader/xref'
302
- require 'pdf/reader/orientation_detector'
303
326
  require 'pdf/reader/page'
data/lib/pdf-reader.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require "pdf/reader"