pdf-reader 2.10.0 → 2.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 55bfae4c5211a0f3ac70845500183e237d3d5f9cc81d548a27f4b8c5fd5acfc9
4
- data.tar.gz: 9e9000474695100c4874afd9abf5f6290ea0b59ffa773c3d03a4a3fc3d2a0a4c
3
+ metadata.gz: 2c84983c18d983798ff5f2ede514b540ee55a788229501976474b7341bf57fba
4
+ data.tar.gz: 79b8f092e72a194110062cf7d7e9425c0a6531e145009c9b7c10c2c072b3d1d5
5
5
  SHA512:
6
- metadata.gz: 2dc96f064c3b233bd499a5a8140bb6f61fd1bdadb2582ac9bc569adff911e63aaadc16188d6624e0ed766481d453912bf5e0057add84262d753681cc40f51776
7
- data.tar.gz: 64dfac4dd2b73a5302be95c47e74e7ca8b94fc6daeaea30c7a33cca6b4be79b68facc571d209f44a59de2632f6241166a290fe0f2835498b2b7d611906b05a31
6
+ metadata.gz: '09c97a875bb46389172ed48ae8b2779ba3a8e032852b6a9943f187de13c23649e2398a5374358c62b64cf9e13bbf7f819bb5072d9aaa6882b9b94e96d23f5c13'
7
+ data.tar.gz: ed92250acee85f4e355785dd043f7774a5883550fe82b01b3cd9e10011f93a1fcdd500108b0e1f4e2af562bddd833c03ca601078b3eba8ee2e9990fd5e76305a
data/CHANGELOG CHANGED
@@ -1,3 +1,7 @@
1
+ v2.11.0 (26th October 2022)
2
+ - Various bug fixes
3
+ - Expanded sorbet type annotations
4
+
1
5
  v2.10.0 (12th May 2022)
2
6
  - Various bug fixes
3
7
  - Expanded sorbet type annotations
data/Rakefile CHANGED
@@ -14,7 +14,7 @@ desc "Run cane to check quality metrics"
14
14
  Cane::RakeTask.new(:quality) do |cane|
15
15
  cane.abc_max = 20
16
16
  cane.style_measure = 100
17
- cane.max_violations = 28
17
+ cane.max_violations = 33
18
18
 
19
19
  cane.use Morecane::EncodingCheck, :encoding_glob => "{app,lib,spec}/**/*.rb"
20
20
  end
@@ -1,5 +1,5 @@
1
1
  # coding: ASCII-8BIT
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -52,7 +52,9 @@ class PDF::Reader
52
52
 
53
53
  # this is the form 10 20 123 where all index between 10 and 20 have width 123
54
54
  def parse_second_form(first, final, width)
55
- raise MalformedPDFError, "CidWidths: #{first} must be less than #{final}" unless first < final
55
+ if first > final
56
+ raise MalformedPDFError, "CidWidths: #{first} must be less than #{final}"
57
+ end
56
58
 
57
59
  (first..final).inject({}) { |accum, index|
58
60
  accum[index] = width
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -76,9 +76,9 @@ class PDF::Reader
76
76
  diff.each do |val|
77
77
  if val.kind_of?(Numeric)
78
78
  byte = val.to_i
79
- else
79
+ elsif codepoint = glyphlist.name_to_unicode(val)
80
80
  @differences[byte] = val
81
- @mapping[byte] = glyphlist.name_to_unicode(val)
81
+ @mapping[byte] = codepoint
82
82
  byte += 1
83
83
  end
84
84
  end
@@ -167,7 +167,7 @@ class PDF::Reader
167
167
  end
168
168
 
169
169
  def convert_to_utf8(str)
170
- ret = str.unpack(unpack).map! { |c| @mapping[c] || c }.pack("U*")
170
+ ret = str.unpack(unpack).map! { |c| @mapping[c.to_i] || c }.pack("U*")
171
171
  ret.force_encoding("UTF-8")
172
172
  ret
173
173
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -33,10 +33,18 @@ class PDF::Reader
33
33
  #
34
34
  class GlyphHash # :nodoc:
35
35
  def initialize
36
+ @@by_codepoint_cache ||= nil
37
+ @@by_name_cache ||= nil
38
+
36
39
  # only parse the glyph list once, and cache the results (for performance)
37
- adobe = @@cache ||= load_adobe_glyph_mapping
38
- @by_name = adobe.first
39
- @by_codepoint = adobe.last
40
+ if @@by_codepoint_cache != nil && @@by_name_cache != nil
41
+ @by_name = @@by_name_cache
42
+ @by_codepoint = @@by_codepoint_cache
43
+ else
44
+ by_name, by_codepoint = load_adobe_glyph_mapping
45
+ @by_name = @@by_name_cache ||= by_name
46
+ @by_codepoint = @@by_codepoint_cache ||= by_codepoint
47
+ end
40
48
  end
41
49
 
42
50
  # attempt to convert a PDF Name to a unicode codepoint. Returns nil
@@ -127,7 +135,7 @@ class PDF::Reader
127
135
  end
128
136
  end
129
137
 
130
- [keyed_by_name.freeze, keyed_by_codepoint.freeze]
138
+ return keyed_by_name.freeze, keyed_by_codepoint.freeze
131
139
  end
132
140
 
133
141
  end
@@ -0,0 +1,14 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ class PDF::Reader
6
+ # There's no point rendering zero-width characters
7
+ class NoTextFilter
8
+
9
+ def self.exclude_empty_strings(runs)
10
+ runs.reject { |run| run.text.to_s.size == 0 }
11
+ end
12
+ end
13
+ end
14
+
@@ -2,6 +2,8 @@
2
2
  # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
+ require 'tempfile'
6
+
5
7
  class PDF::Reader
6
8
  # Provides low level access to the objects in a PDF file via a hash-like
7
9
  # object.
@@ -566,7 +568,7 @@ class PDF::Reader
566
568
  end
567
569
 
568
570
  def object_streams
569
- @object_stream ||= {}
571
+ @object_streams ||= {}
570
572
  end
571
573
 
572
574
  # returns an array of object references for all pages in this object store. The ordering of
@@ -591,18 +593,18 @@ class PDF::Reader
591
593
 
592
594
  def read_version
593
595
  @io.seek(0)
594
- _m, version = *@io.read(10).match(/PDF-(\d.\d)/)
596
+ _m, version = *@io.read(10).to_s.match(/PDF-(\d.\d)/)
595
597
  @io.seek(0)
596
598
  version.to_f
597
599
  end
598
600
 
599
601
  def extract_io_from(input)
600
- if input.respond_to?(:seek) && input.respond_to?(:read)
602
+ if input.is_a?(IO) || input.is_a?(StringIO) || input.is_a?(Tempfile)
601
603
  input
602
604
  elsif File.file?(input.to_s)
603
- StringIO.new read_as_binary(input)
605
+ StringIO.new read_as_binary(input.to_s)
604
606
  else
605
- raise ArgumentError, "input must be an IO-like object or a filename"
607
+ raise ArgumentError, "input must be an IO-like object or a filename (#{input.class})"
606
608
  end
607
609
  end
608
610
 
@@ -610,7 +612,7 @@ class PDF::Reader
610
612
  if File.respond_to?(:binread)
611
613
  File.binread(input.to_s)
612
614
  else
613
- File.open(input.to_s,"rb") { |f| f.read }
615
+ File.open(input.to_s,"rb") { |f| f.read } || ""
614
616
  end
615
617
  end
616
618
 
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -1,6 +1,6 @@
1
- # typed: true
2
1
  # coding: utf-8
3
2
  # frozen_string_literal: true
3
+ # typed: strict
4
4
 
5
5
  class PDF::Reader
6
6
  # remove duplicates from a collection of TextRun objects. This can be helpful when a PDF
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  module PDF
@@ -43,10 +43,10 @@ module PDF
43
43
  #
44
44
  def initialize(objects, pagenum, options = {})
45
45
  @objects, @pagenum = objects, pagenum
46
- @page_object = objects.deref_hash(objects.page_references[pagenum - 1])
46
+ @page_object = objects.deref_hash(objects.page_references[pagenum - 1]) || {}
47
47
  @cache = options[:cache] || {}
48
48
 
49
- unless @page_object.is_a?(::Hash)
49
+ if @page_object.empty?
50
50
  raise InvalidPageError, "Invalid page: #{pagenum}"
51
51
  end
52
52
  end
@@ -250,8 +250,8 @@ module PDF
250
250
  params = []
251
251
 
252
252
  while (token = parser.parse_token(PagesStrategy::OPERATORS))
253
- if token.kind_of?(Token) and PagesStrategy::OPERATORS.has_key?(token)
254
- callback(receivers, PagesStrategy::OPERATORS[token], params)
253
+ if token.kind_of?(Token) && method_name = PagesStrategy::OPERATORS[token]
254
+ callback(receivers, method_name, params)
255
255
  params.clear
256
256
  else
257
257
  params << token
@@ -263,9 +263,26 @@ module PDF
263
263
 
264
264
  # calls the name callback method on each receiver object with params as the arguments
265
265
  #
266
+ # The silly style here is because sorbet won't let me use splat arguments
267
+ #
266
268
  def callback(receivers, name, params=[])
267
269
  receivers.each do |receiver|
268
- receiver.send(name, *params) if receiver.respond_to?(name)
270
+ if receiver.respond_to?(name)
271
+ case params.size
272
+ when 0 then receiver.send(name)
273
+ when 1 then receiver.send(name, params[0])
274
+ when 2 then receiver.send(name, params[0], params[1])
275
+ when 3 then receiver.send(name, params[0], params[1], params[2])
276
+ when 4 then receiver.send(name, params[0], params[1], params[2], params[3])
277
+ when 5 then receiver.send(name, params[0], params[1], params[2], params[3], params[4])
278
+ when 6 then receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5])
279
+ when 7 then receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5], params[6])
280
+ when 8 then receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5], params[6], params[7])
281
+ when 9 then receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5], params[6], params[7], params[8])
282
+ else
283
+ receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5], params[6], params[7], params[8], params[9])
284
+ end
285
+ end
269
286
  end
270
287
  end
271
288
 
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  require 'pdf/reader/overlapping_runs_filter'
@@ -62,6 +62,8 @@ module PDF
62
62
  runs = OverlappingRunsFilter.exclude_redundant_runs(runs)
63
63
  end
64
64
 
65
+ runs = NoTextFilter.exclude_empty_strings(runs)
66
+
65
67
  if opts.fetch(:merge, true)
66
68
  runs = merge_runs(runs)
67
69
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -31,7 +31,8 @@ class PDF::Reader
31
31
  ################################################################################
32
32
  # An internal PDF::Reader class that represents an indirect reference to a PDF Object
33
33
  class Reference
34
- attr_reader :id, :gen
34
+ attr_reader :id
35
+ attr_reader :gen
35
36
  ################################################################################
36
37
  # Create a new Reference to an object with the specified id and revision number
37
38
  def initialize(id, gen)
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  module PDF
@@ -1,5 +1,5 @@
1
1
  # encoding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  # utilities.rb : General-purpose utility classes which don't fit anywhere else
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -7,7 +7,10 @@ class PDF::Reader
7
7
  class TextRun
8
8
  include Comparable
9
9
 
10
- attr_reader :origin, :width, :font_size, :text
10
+ attr_reader :origin
11
+ attr_reader :width
12
+ attr_reader :font_size
13
+ attr_reader :text
11
14
 
12
15
  alias :to_s :text
13
16
 
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -51,7 +51,7 @@ class PDF::Reader
51
51
  # displacement to speed up processing documents that use vertical
52
52
  # writing systems
53
53
  #
54
- def multiply!(a,b=nil,c=nil, d=nil,e=nil,f=nil)
54
+ def multiply!(a,b,c, d,e,f)
55
55
  if a == 1 && b == 0 && c == 0 && d == 1 && e == 0 && f == 0
56
56
  # the identity matrix, no effect
57
57
  self
@@ -164,12 +164,12 @@ class PDF::Reader
164
164
  # [ e f 1 ] [ e f 1 ]
165
165
  #
166
166
  def regular_multiply!(a2,b2,c2,d2,e2,f2)
167
- newa = (@a * a2) + (@b * c2) + (0 * e2)
168
- newb = (@a * b2) + (@b * d2) + (0 * f2)
169
- newc = (@c * a2) + (@d * c2) + (0 * e2)
170
- newd = (@c * b2) + (@d * d2) + (0 * f2)
171
- newe = (@e * a2) + (@f * c2) + (1 * e2)
172
- newf = (@e * b2) + (@f * d2) + (1 * f2)
167
+ newa = (@a * a2) + (@b * c2) + (e2 * 0)
168
+ newb = (@a * b2) + (@b * d2) + (f2 * 0)
169
+ newc = (@c * a2) + (@d * c2) + (e2 * 0)
170
+ newd = (@c * b2) + (@d * d2) + (f2 * 0)
171
+ newe = (@e * a2) + (@f * c2) + (e2 * 1)
172
+ newf = (@e * b2) + (@f * d2) + (f2 * 1)
173
173
  @a, @b, @c, @d, @e, @f = newa, newb, newc, newd, newe, newf
174
174
  end
175
175
 
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  require 'afm'
@@ -73,7 +73,7 @@ class PDF::Reader
73
73
  #
74
74
  # ref - a PDF::Reader::Reference object containing an object ID and revision number
75
75
  def [](ref)
76
- @xref[ref.id][ref.gen]
76
+ @xref.fetch(ref.id, {}).fetch(ref.gen)
77
77
  rescue
78
78
  raise InvalidObjectError, "Object #{ref.id}, Generation #{ref.gen} is invalid"
79
79
  end
@@ -82,8 +82,8 @@ class PDF::Reader
82
82
  def each(&block)
83
83
  ids = @xref.keys.sort
84
84
  ids.each do |id|
85
- gen = @xref[id].keys.sort[-1]
86
- yield PDF::Reader::Reference.new(id, gen)
85
+ gen = @xref.fetch(id, {}).keys.sort[-1]
86
+ yield PDF::Reader::Reference.new(id, gen.to_i)
87
87
  end
88
88
  end
89
89
  ################################################################################
data/lib/pdf/reader.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -128,7 +128,7 @@ module PDF
128
128
  doc_strings_to_utf8(dict)
129
129
  end
130
130
 
131
- # Return a Hash with extra metadata provided by the author of the PDF file. Not
131
+ # Return a String with extra XML metadata provided by the author of the PDF file. Not
132
132
  # always present.
133
133
  #
134
134
  def metadata
@@ -182,7 +182,7 @@ module PDF
182
182
  #
183
183
  # reader.pages.each do |page|
184
184
  # puts page.fonts
185
- # puts page.images
185
+ # puts page.rectangles
186
186
  # puts page.text
187
187
  # end
188
188
  #
@@ -272,13 +272,7 @@ module PDF
272
272
  end
273
273
 
274
274
  def root
275
- @root ||= begin
276
- obj = @objects.deref_hash(@objects.trailer[:Root]) || {}
277
- unless obj.kind_of?(::Hash)
278
- raise MalformedPDFError, "PDF malformed, trailer Root should be a dictionary"
279
- end
280
- obj
281
- end
275
+ @root ||= @objects.deref_hash(@objects.trailer[:Root]) || {}
282
276
  end
283
277
 
284
278
  end
@@ -315,6 +309,7 @@ require 'pdf/reader/print_receiver'
315
309
  require 'pdf/reader/rectangle'
316
310
  require 'pdf/reader/reference'
317
311
  require 'pdf/reader/register_receiver'
312
+ require 'pdf/reader/no_text_filter'
318
313
  require 'pdf/reader/null_security_handler'
319
314
  require 'pdf/reader/security_handler_factory'
320
315
  require 'pdf/reader/standard_key_builder'