pdf-reader 2.14.1 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +15 -0
  3. data/lib/pdf/reader/advanced_text_run_filter.rb +17 -2
  4. data/lib/pdf/reader/aes_v2_security_handler.rb +30 -0
  5. data/lib/pdf/reader/aes_v3_security_handler.rb +35 -3
  6. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +1 -0
  7. data/lib/pdf/reader/buffer.rb +35 -17
  8. data/lib/pdf/reader/cid_widths.rb +7 -1
  9. data/lib/pdf/reader/cmap.rb +14 -3
  10. data/lib/pdf/reader/encoding.rb +37 -12
  11. data/lib/pdf/reader/error.rb +6 -0
  12. data/lib/pdf/reader/filter/ascii85.rb +2 -0
  13. data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
  14. data/lib/pdf/reader/filter/depredict.rb +4 -0
  15. data/lib/pdf/reader/filter/flate.rb +5 -2
  16. data/lib/pdf/reader/filter/lzw.rb +2 -0
  17. data/lib/pdf/reader/filter/null.rb +2 -0
  18. data/lib/pdf/reader/filter/run_length.rb +2 -0
  19. data/lib/pdf/reader/filter.rb +1 -0
  20. data/lib/pdf/reader/font.rb +90 -22
  21. data/lib/pdf/reader/font_descriptor.rb +76 -23
  22. data/lib/pdf/reader/form_xobject.rb +11 -0
  23. data/lib/pdf/reader/glyph_hash.rb +34 -9
  24. data/lib/pdf/reader/key_builder_v5.rb +17 -9
  25. data/lib/pdf/reader/lzw.rb +17 -6
  26. data/lib/pdf/reader/no_text_filter.rb +1 -0
  27. data/lib/pdf/reader/null_security_handler.rb +1 -0
  28. data/lib/pdf/reader/object_cache.rb +7 -2
  29. data/lib/pdf/reader/object_hash.rb +116 -9
  30. data/lib/pdf/reader/object_stream.rb +19 -2
  31. data/lib/pdf/reader/overlapping_runs_filter.rb +7 -1
  32. data/lib/pdf/reader/page.rb +41 -7
  33. data/lib/pdf/reader/page_layout.rb +25 -8
  34. data/lib/pdf/reader/page_state.rb +5 -2
  35. data/lib/pdf/reader/page_text_receiver.rb +6 -2
  36. data/lib/pdf/reader/pages_strategy.rb +1 -1
  37. data/lib/pdf/reader/parser.rb +51 -10
  38. data/lib/pdf/reader/point.rb +9 -2
  39. data/lib/pdf/reader/print_receiver.rb +2 -6
  40. data/lib/pdf/reader/rc4_security_handler.rb +2 -0
  41. data/lib/pdf/reader/rectangle.rb +24 -1
  42. data/lib/pdf/reader/reference.rb +10 -1
  43. data/lib/pdf/reader/register_receiver.rb +15 -2
  44. data/lib/pdf/reader/resources.rb +9 -0
  45. data/lib/pdf/reader/security_handler_factory.rb +13 -0
  46. data/lib/pdf/reader/standard_key_builder.rb +37 -23
  47. data/lib/pdf/reader/stream.rb +9 -3
  48. data/lib/pdf/reader/synchronized_cache.rb +5 -2
  49. data/lib/pdf/reader/text_run.rb +28 -1
  50. data/lib/pdf/reader/token.rb +1 -0
  51. data/lib/pdf/reader/transformation_matrix.rb +33 -2
  52. data/lib/pdf/reader/type_check.rb +10 -3
  53. data/lib/pdf/reader/unimplemented_security_handler.rb +2 -0
  54. data/lib/pdf/reader/validating_receiver.rb +29 -0
  55. data/lib/pdf/reader/width_calculator/built_in.rb +10 -3
  56. data/lib/pdf/reader/width_calculator/composite.rb +5 -1
  57. data/lib/pdf/reader/width_calculator/true_type.rb +5 -1
  58. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +3 -1
  59. data/lib/pdf/reader/width_calculator/type_zero.rb +2 -0
  60. data/lib/pdf/reader/xref.rb +28 -7
  61. data/lib/pdf/reader/zero_width_runs_filter.rb +1 -0
  62. data/lib/pdf/reader.rb +18 -2
  63. data/rbi/pdf-reader.rbi +1502 -1594
  64. metadata +17 -11
@@ -32,8 +32,24 @@ class PDF::Reader
32
32
  class ObjectHash
33
33
  include Enumerable
34
34
 
35
+ #: type securityHandler = (
36
+ #| PDF::Reader::NullSecurityHandler |
37
+ #| PDF::Reader::AesV2SecurityHandler |
38
+ #| PDF::Reader::Rc4SecurityHandler |
39
+ #| PDF::Reader::AesV3SecurityHandler |
40
+ #| PDF::Reader::UnimplementedSecurityHandler
41
+ #| )
42
+
43
+ #: untyped
35
44
  attr_accessor :default
36
- attr_reader :trailer, :pdf_version
45
+
46
+ #: Hash[Symbol, untyped]
47
+ attr_reader :trailer
48
+
49
+ #: Float
50
+ attr_reader :pdf_version
51
+
52
+ #: securityHandler
37
53
  attr_reader :sec_handler
38
54
 
39
55
  # Creates a new ObjectHash object. Input can be a string with a valid filename
@@ -43,21 +59,25 @@ class PDF::Reader
43
59
  #
44
60
  # :password - the user password to decrypt the source PDF
45
61
  #
62
+ #: ((IO | Tempfile | StringIO | String), ?Hash[Symbol, untyped]) -> void
46
63
  def initialize(input, opts = {})
47
- @io = extract_io_from(input)
48
- @xref = PDF::Reader::XRef.new(@io)
49
- @pdf_version = read_version
50
- @trailer = @xref.trailer
51
- @cache = opts[:cache] || PDF::Reader::ObjectCache.new
52
- @sec_handler = NullSecurityHandler.new
64
+ @io = extract_io_from(input) #: IO | Tempfile | StringIO
65
+ @xref = PDF::Reader::XRef.new(@io) #: PDF::Reader::XRef[PDF::Reader::Reference]
66
+ @pdf_version = read_version #: Float
67
+ @trailer = @xref.trailer #: Hash[Symbol, untyped]
68
+ @cache = opts[:cache] || PDF::Reader::ObjectCache.new #: PDF::Reader::ObjectCache
69
+ @sec_handler = NullSecurityHandler.new #: securityHandler
53
70
  @sec_handler = SecurityHandlerFactory.build(
54
71
  deref(trailer[:Encrypt]),
55
72
  deref(trailer[:ID]),
56
73
  opts[:password]
57
74
  )
75
+ @page_references = nil #: Array[PDF::Reader::Reference | Hash[Symbol, untyped]]?
76
+ @object_streams = nil #: Hash[PDF::Reader::Reference, PDF::Reader::ObjectStream]?
58
77
  end
59
78
 
60
79
  # returns the type of object a ref points to
80
+ #: ((Integer | PDF::Reader::Reference)) -> Symbol?
61
81
  def obj_type(ref)
62
82
  self[ref].class.to_s.to_sym
63
83
  rescue
@@ -65,6 +85,7 @@ class PDF::Reader
65
85
  end
66
86
 
67
87
  # returns true if the supplied references points to an object with a stream
88
+ #: ((Integer | PDF::Reader::Reference)) -> bool
68
89
  def stream?(ref)
69
90
  self.has_key?(ref) && self[ref].is_a?(PDF::Reader::Stream)
70
91
  end
@@ -78,6 +99,7 @@ class PDF::Reader
78
99
  # If a PDF::Reader::Reference object is used the exact ID and generation number
79
100
  # can be specified.
80
101
  #
102
+ #: ((Integer | PDF::Reader::Reference)) -> untyped
81
103
  def [](key)
82
104
  return default if key.to_i <= 0
83
105
 
@@ -93,6 +115,7 @@ class PDF::Reader
93
115
  # If key is a PDF::Reader::Reference object, lookup the corresponding
94
116
  # object in the PDF and return it. Otherwise return key untouched.
95
117
  #
118
+ #: (untyped) -> untyped
96
119
  def object(key)
97
120
  key.is_a?(PDF::Reader::Reference) ? self[key] : key
98
121
  end
@@ -104,6 +127,7 @@ class PDF::Reader
104
127
  # Guaranteed to only return an Array or nil. If the dereference results in
105
128
  # any other type then a MalformedPDFError exception will raise. Useful when
106
129
  # expecting an Array and no other type will do.
130
+ #: (untyped) -> Array[untyped]?
107
131
  def deref_array(key)
108
132
  obj = deref(key)
109
133
 
@@ -122,6 +146,7 @@ class PDF::Reader
122
146
  # expecting an Array and no other type will do.
123
147
  #
124
148
  # Some effort to cast array elements to a number is made for any non-numeric elements.
149
+ #: (untyped) -> Array[Numeric]?
125
150
  def deref_array_of_numbers(key)
126
151
  arr = deref(key)
127
152
 
@@ -148,6 +173,7 @@ class PDF::Reader
148
173
  # Guaranteed to only return a Hash or nil. If the dereference results in
149
174
  # any other type then a MalformedPDFError exception will raise. Useful when
150
175
  # expecting an Array and no other type will do.
176
+ #: (untyped) -> Hash[Symbol, untyped]?
151
177
  def deref_hash(key)
152
178
  obj = deref(key)
153
179
 
@@ -166,6 +192,7 @@ class PDF::Reader
166
192
  # expecting an Array and no other type will do.
167
193
  #
168
194
  # Some effort to cast to a symbol is made when the reference points to a non-symbol.
195
+ #: (untyped) -> Symbol?
169
196
  def deref_name(key)
170
197
  obj = deref(key)
171
198
 
@@ -190,6 +217,7 @@ class PDF::Reader
190
217
  # expecting an Array and no other type will do.
191
218
  #
192
219
  # Some effort to cast to an int is made when the reference points to a non-integer.
220
+ #: (untyped) -> Integer?
193
221
  def deref_integer(key)
194
222
  obj = deref(key)
195
223
 
@@ -214,6 +242,7 @@ class PDF::Reader
214
242
  # expecting an Array and no other type will do.
215
243
  #
216
244
  # Some effort to cast to a number is made when the reference points to a non-number.
245
+ #: (untyped) -> Numeric?
217
246
  def deref_number(key)
218
247
  obj = deref(key)
219
248
 
@@ -238,6 +267,7 @@ class PDF::Reader
238
267
  # Guaranteed to only return a PDF::Reader::Stream or nil. If the dereference results in
239
268
  # any other type then a MalformedPDFError exception will raise. Useful when
240
269
  # expecting a stream and no other type will do.
270
+ #: (untyped) -> PDF::Reader::Stream?
241
271
  def deref_stream(key)
242
272
  obj = deref(key)
243
273
 
@@ -258,6 +288,7 @@ class PDF::Reader
258
288
  # expecting a string and no other type will do.
259
289
  #
260
290
  # Some effort to cast to a string is made when the reference points to a non-string.
291
+ #: (untyped) -> String?
261
292
  def deref_string(key)
262
293
  obj = deref(key)
263
294
 
@@ -280,6 +311,7 @@ class PDF::Reader
280
311
  # Guaranteed to only return a PDF Name (symbol), Array or nil. If the dereference results in
281
312
  # any other type then a MalformedPDFError exception will raise. Useful when
282
313
  # expecting a Name or Array and no other type will do.
314
+ #: (untyped) -> (Symbol | Array[untyped] | nil)
283
315
  def deref_name_or_array(key)
284
316
  obj = deref(key)
285
317
 
@@ -298,6 +330,7 @@ class PDF::Reader
298
330
  # Guaranteed to only return a PDF::Reader::Stream, Array or nil. If the dereference results in
299
331
  # any other type then a MalformedPDFError exception will raise. Useful when
300
332
  # expecting a stream or Array and no other type will do.
333
+ #: (untyped) -> (PDF::Reader::Stream | Array[untyped] | nil)
301
334
  def deref_stream_or_array(key)
302
335
  obj = deref(key)
303
336
 
@@ -313,10 +346,12 @@ class PDF::Reader
313
346
  # Recursively dereferences the object refered to be +key+. If +key+ is not
314
347
  # a PDF::Reader::Reference, the key is returned unchanged.
315
348
  #
349
+ #: (untyped) -> untyped
316
350
  def deref!(key)
317
351
  deref_internal!(key, {})
318
352
  end
319
353
 
354
+ #: (untyped) -> Array[untyped]?
320
355
  def deref_array!(key)
321
356
  deref!(key).tap { |obj|
322
357
  if !obj.nil? && !obj.is_a?(Array)
@@ -325,6 +360,7 @@ class PDF::Reader
325
360
  }
326
361
  end
327
362
 
363
+ #: (untyped) -> Hash[Symbol, untyped]?
328
364
  def deref_hash!(key)
329
365
  deref!(key).tap { |obj|
330
366
  if !obj.nil? && !obj.is_a?(Hash)
@@ -345,6 +381,7 @@ class PDF::Reader
345
381
  # local_default is the object that will be returned if the requested key doesn't
346
382
  # exist.
347
383
  #
384
+ #: (untyped, ?untyped) -> untyped
348
385
  def fetch(key, local_default = nil)
349
386
  obj = self[key]
350
387
  if obj
@@ -358,6 +395,8 @@ class PDF::Reader
358
395
 
359
396
  # iterate over each key, value. Just like a ruby hash.
360
397
  #
398
+ # @override(allow_incompatible: true)
399
+ #: () { (PDF::Reader::Reference, untyped) -> untyped } -> untyped
361
400
  def each(&block)
362
401
  @xref.each do |ref|
363
402
  yield ref, self[ref]
@@ -367,6 +406,7 @@ class PDF::Reader
367
406
 
368
407
  # iterate over each key. Just like a ruby hash.
369
408
  #
409
+ #: { (PDF::Reader::Reference) -> untyped } -> untyped
370
410
  def each_key(&block)
371
411
  each do |id, obj|
372
412
  yield id
@@ -375,6 +415,7 @@ class PDF::Reader
375
415
 
376
416
  # iterate over each value. Just like a ruby hash.
377
417
  #
418
+ #: { (untyped) -> untyped } -> untyped
378
419
  def each_value(&block)
379
420
  each do |id, obj|
380
421
  yield obj
@@ -383,6 +424,7 @@ class PDF::Reader
383
424
 
384
425
  # return the number of objects in the file. An object with multiple generations
385
426
  # is counted once.
427
+ #: () -> Integer
386
428
  def size
387
429
  xref.size
388
430
  end
@@ -390,6 +432,7 @@ class PDF::Reader
390
432
 
391
433
  # return true if there are no objects in this file
392
434
  #
435
+ #: () -> bool
393
436
  def empty?
394
437
  size == 0 ? true : false
395
438
  end
@@ -397,6 +440,7 @@ class PDF::Reader
397
440
  # return true if the specified key exists in the file. key
398
441
  # can be an int or a PDF::Reader::Reference
399
442
  #
443
+ #: (untyped) -> bool
400
444
  def has_key?(check_key)
401
445
  # TODO update from O(n) to O(1)
402
446
  each_key do |key|
@@ -414,6 +458,7 @@ class PDF::Reader
414
458
 
415
459
  # return true if the specifiedvalue exists in the file
416
460
  #
461
+ #: (untyped) -> bool
417
462
  def has_value?(value)
418
463
  # TODO update from O(n) to O(1)
419
464
  each_value do |obj|
@@ -423,12 +468,14 @@ class PDF::Reader
423
468
  end
424
469
  alias :value? :has_key?
425
470
 
471
+ #: () -> String
426
472
  def to_s
427
473
  "<PDF::Reader::ObjectHash size: #{self.size}>"
428
474
  end
429
475
 
430
476
  # return an array of all keys in the file
431
477
  #
478
+ #: () -> Array[PDF::Reader::Reference]
432
479
  def keys
433
480
  ret = []
434
481
  each_key { |k| ret << k }
@@ -437,6 +484,7 @@ class PDF::Reader
437
484
 
438
485
  # return an array of all values in the file
439
486
  #
487
+ #: () -> untyped
440
488
  def values
441
489
  ret = []
442
490
  each_value { |v| ret << v }
@@ -445,12 +493,14 @@ class PDF::Reader
445
493
 
446
494
  # return an array of all values from the specified keys
447
495
  #
496
+ #: (*untyped) -> untyped
448
497
  def values_at(*ids)
449
498
  ids.map { |id| self[id] }
450
499
  end
451
500
 
452
501
  # return an array of arrays. Each sub array contains a key/value pair.
453
502
  #
503
+ #: () -> untyped
454
504
  def to_a
455
505
  ret = []
456
506
  each do |id, obj|
@@ -465,6 +515,7 @@ class PDF::Reader
465
515
  #
466
516
  # Useful for apps that want to extract data from specific pages.
467
517
  #
518
+ #: () -> Array[PDF::Reader::Reference | Hash[Symbol, untyped]]
468
519
  def page_references
469
520
  root = fetch(trailer[:Root])
470
521
  @page_references ||= begin
@@ -473,10 +524,12 @@ class PDF::Reader
473
524
  end
474
525
  end
475
526
 
527
+ #: () -> bool
476
528
  def encrypted?
477
529
  trailer.has_key?(:Encrypt)
478
530
  end
479
531
 
532
+ #: () -> bool
480
533
  def sec_handler?
481
534
  !!sec_handler
482
535
  end
@@ -486,6 +539,17 @@ class PDF::Reader
486
539
  # parse a traditional object from the PDF, starting from the byte offset indicated
487
540
  # in the xref table
488
541
  #
542
+ #: (PDF::Reader::Reference) -> (
543
+ #| PDF::Reader::Reference |
544
+ #| PDF::Reader::Token |
545
+ #| PDF::Reader::Stream |
546
+ #| Numeric |
547
+ #| String |
548
+ #| Symbol |
549
+ #| Array[untyped] |
550
+ #| Hash[untyped, untyped] |
551
+ #| nil
552
+ #| )
489
553
  def fetch_object(key)
490
554
  if xref[key].is_a?(Integer)
491
555
  buf = new_buffer(xref[key])
@@ -495,13 +559,25 @@ class PDF::Reader
495
559
 
496
560
  # parse a object that's embedded in an object stream in the PDF
497
561
  #
562
+ #: (PDF::Reader::Reference) -> (
563
+ #| PDF::Reader::Reference |
564
+ #| PDF::Reader::Token |
565
+ #| PDF::Reader::Stream |
566
+ #| Numeric |
567
+ #| String |
568
+ #| Symbol |
569
+ #| Array[untyped] |
570
+ #| Hash[untyped, untyped] |
571
+ #| nil
572
+ #| )
498
573
  def fetch_object_stream(key)
499
574
  if xref[key].is_a?(PDF::Reader::Reference)
500
575
  container_key = xref[key]
501
576
  stream = deref_stream(container_key)
502
577
  raise MalformedPDFError, "Object Stream cannot be nil" if stream.nil?
503
- object_streams[container_key] ||= PDF::Reader::ObjectStream.new(stream)
504
- object_streams[container_key][key.id]
578
+ if objstream = object_streams[container_key] ||= PDF::Reader::ObjectStream.new(stream)
579
+ objstream[key.id]
580
+ end
505
581
  end
506
582
  end
507
583
 
@@ -509,6 +585,17 @@ class PDF::Reader
509
585
  # isn't publicly available. It's used to avoid endless loops in the recursion, and
510
586
  # doesn't need to be part of the public API.
511
587
  #
588
+ #: (untyped, Hash[Integer, untyped]) -> (
589
+ #| PDF::Reader::Reference |
590
+ #| PDF::Reader::Token |
591
+ #| PDF::Reader::Stream |
592
+ #| Numeric |
593
+ #| String |
594
+ #| Symbol |
595
+ #| Array[untyped] |
596
+ #| Hash[untyped, untyped] |
597
+ #| nil
598
+ #| )
512
599
  def deref_internal!(key, seen)
513
600
  seen_key = key.is_a?(PDF::Reader::Reference) ? key : key.object_id
514
601
 
@@ -538,6 +625,17 @@ class PDF::Reader
538
625
  end
539
626
  end
540
627
 
628
+ #: (PDF::Reader::Reference, untyped) -> (
629
+ #| PDF::Reader::Reference |
630
+ #| PDF::Reader::Token |
631
+ #| PDF::Reader::Stream |
632
+ #| Numeric |
633
+ #| String |
634
+ #| Symbol |
635
+ #| Array[untyped] |
636
+ #| Hash[untyped, untyped] |
637
+ #| nil
638
+ #| )
541
639
  def decrypt(ref, obj)
542
640
  case obj
543
641
  when PDF::Reader::Stream then
@@ -559,14 +657,17 @@ class PDF::Reader
559
657
  end
560
658
  end
561
659
 
660
+ #: (?Integer) -> PDF::Reader::Buffer
562
661
  def new_buffer(offset = 0)
563
662
  PDF::Reader::Buffer.new(@io, :seek => offset)
564
663
  end
565
664
 
665
+ #: () -> PDF::Reader::XRef[PDF::Reader::Reference]
566
666
  def xref
567
667
  @xref
568
668
  end
569
669
 
670
+ #: () -> Hash[PDF::Reader::Reference, PDF::Reader::ObjectStream]
570
671
  def object_streams
571
672
  @object_streams ||= {}
572
673
  end
@@ -574,6 +675,9 @@ class PDF::Reader
574
675
  # returns an array of object references for all pages in this object store. The ordering of
575
676
  # the Array is significant and matches the page ordering of the document
576
677
  #
678
+ #: (PDF::Reader::Reference | Hash[Symbol, untyped]) -> (
679
+ #| Array[PDF::Reader::Reference | Hash[Symbol, untyped] ]
680
+ #| )
577
681
  def get_page_objects(obj)
578
682
  derefed_obj = deref_hash(obj)
579
683
 
@@ -591,6 +695,7 @@ class PDF::Reader
591
695
  end
592
696
  end
593
697
 
698
+ #: () -> Float
594
699
  def read_version
595
700
  @io.seek(0)
596
701
  _m, version = *@io.read(10).to_s.match(/PDF-(\d.\d)/)
@@ -598,6 +703,7 @@ class PDF::Reader
598
703
  version.to_f
599
704
  end
600
705
 
706
+ #: (IO | Tempfile | StringIO | String) -> (IO | Tempfile | StringIO)
601
707
  def extract_io_from(input)
602
708
  if input.is_a?(IO) || input.is_a?(StringIO) || input.is_a?(Tempfile)
603
709
  input
@@ -608,6 +714,7 @@ class PDF::Reader
608
714
  end
609
715
  end
610
716
 
717
+ #: (String) -> (String)
611
718
  def read_as_binary(input)
612
719
  if File.respond_to?(:binread)
613
720
  File.binread(input.to_s)
@@ -8,11 +8,24 @@ class PDF::Reader
8
8
  # This is done for added compression and is described as an "Object Stream" in the spec.
9
9
  #
10
10
  class ObjectStream # :nodoc:
11
+ #: (PDF::Reader::Stream) -> void
11
12
  def initialize(stream)
12
- @dict = stream.hash
13
- @data = stream.unfiltered_data
13
+ @dict = stream.hash #: Hash[Symbol, untyped]
14
+ @data = stream.unfiltered_data #: String
15
+ @offsets = nil #: Hash[Integer, Integer] | nil
16
+ @buffer = nil #: PDF::Reader::Buffer | nil
14
17
  end
15
18
 
19
+ #: (Integer) -> (
20
+ #| PDF::Reader::Reference |
21
+ #| PDF::Reader::Token |
22
+ #| Numeric |
23
+ #| String |
24
+ #| Symbol |
25
+ #| Array[untyped] |
26
+ #| Hash[untyped, untyped] |
27
+ #| nil
28
+ #| )
16
29
  def [](objid)
17
30
  if offsets[objid].nil?
18
31
  nil
@@ -23,12 +36,14 @@ class PDF::Reader
23
36
  end
24
37
  end
25
38
 
39
+ #: () -> Integer
26
40
  def size
27
41
  TypeCheck.cast_to_int!(@dict[:N])
28
42
  end
29
43
 
30
44
  private
31
45
 
46
+ #: () -> Hash[Integer, Integer]
32
47
  def offsets
33
48
  @offsets ||= {}
34
49
  return @offsets if @offsets.keys.size > 0
@@ -39,10 +54,12 @@ class PDF::Reader
39
54
  @offsets
40
55
  end
41
56
 
57
+ #: () -> Integer
42
58
  def first
43
59
  TypeCheck.cast_to_int!(@dict[:First])
44
60
  end
45
61
 
62
+ #: () -> PDF::Reader::Buffer
46
63
  def buffer
47
64
  @buffer ||= PDF::Reader::Buffer.new(StringIO.new(@data))
48
65
  end
@@ -9,8 +9,9 @@ class PDF::Reader
9
9
 
10
10
  # This should be between 0 and 1. If TextRun B obscures this much of TextRun A (and they
11
11
  # have identical characters) then one will be discarded
12
- OVERLAPPING_THRESHOLD = 0.5
12
+ OVERLAPPING_THRESHOLD = 0.5 #: Float
13
13
 
14
+ #: (Array[PDF::Reader::TextRun]) -> Array[PDF::Reader::TextRun]
14
15
  def self.exclude_redundant_runs(runs)
15
16
  sweep_line_status = Array.new
16
17
  event_point_schedule = Array.new
@@ -38,6 +39,7 @@ class PDF::Reader
38
39
  runs - to_exclude
39
40
  end
40
41
 
42
+ #: (Array[PDF::Reader::TextRun], PDF::Reader::EventPoint) -> bool
41
43
  def self.detect_intersection(sweep_line_status, event_point)
42
44
  sweep_line_status.each do |open_text_run|
43
45
  if open_text_run.text == event_point.run.text &&
@@ -55,15 +57,19 @@ class PDF::Reader
55
57
  # looking for duplicates
56
58
  class EventPoint
57
59
 
60
+ #: Numeric
58
61
  attr_reader :x
59
62
 
63
+ #: PDF::Reader::TextRun
60
64
  attr_reader :run
61
65
 
66
+ #: (Numeric, PDF::Reader::TextRun) -> void
62
67
  def initialize(x, run)
63
68
  @x = x
64
69
  @run = run
65
70
  end
66
71
 
72
+ #: () -> bool
67
73
  def start?
68
74
  @x == @run.x
69
75
  end