pdf-reader 2.10.0 → 2.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +4 -0
- data/Rakefile +1 -1
- data/lib/pdf/reader/buffer.rb +1 -1
- data/lib/pdf/reader/cid_widths.rb +3 -1
- data/lib/pdf/reader/cmap.rb +1 -1
- data/lib/pdf/reader/encoding.rb +4 -4
- data/lib/pdf/reader/filter/depredict.rb +1 -1
- data/lib/pdf/reader/glyph_hash.rb +13 -5
- data/lib/pdf/reader/no_text_filter.rb +14 -0
- data/lib/pdf/reader/object_hash.rb +8 -6
- data/lib/pdf/reader/object_stream.rb +1 -1
- data/lib/pdf/reader/overlapping_runs_filter.rb +1 -1
- data/lib/pdf/reader/page.rb +23 -6
- data/lib/pdf/reader/page_layout.rb +1 -1
- data/lib/pdf/reader/page_text_receiver.rb +2 -0
- data/lib/pdf/reader/pages_strategy.rb +1 -1
- data/lib/pdf/reader/reference.rb +3 -2
- data/lib/pdf/reader/resources.rb +1 -1
- data/lib/pdf/reader/synchronized_cache.rb +1 -1
- data/lib/pdf/reader/text_run.rb +5 -2
- data/lib/pdf/reader/transformation_matrix.rb +8 -8
- data/lib/pdf/reader/width_calculator/built_in.rb +1 -1
- data/lib/pdf/reader/xref.rb +3 -3
- data/lib/pdf/reader.rb +5 -10
- data/rbi/pdf-reader.rbi +276 -253
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2c84983c18d983798ff5f2ede514b540ee55a788229501976474b7341bf57fba
|
4
|
+
data.tar.gz: 79b8f092e72a194110062cf7d7e9425c0a6531e145009c9b7c10c2c072b3d1d5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '09c97a875bb46389172ed48ae8b2779ba3a8e032852b6a9943f187de13c23649e2398a5374358c62b64cf9e13bbf7f819bb5072d9aaa6882b9b94e96d23f5c13'
|
7
|
+
data.tar.gz: ed92250acee85f4e355785dd043f7774a5883550fe82b01b3cd9e10011f93a1fcdd500108b0e1f4e2af562bddd833c03ca601078b3eba8ee2e9990fd5e76305a
|
data/CHANGELOG
CHANGED
data/Rakefile
CHANGED
@@ -14,7 +14,7 @@ desc "Run cane to check quality metrics"
|
|
14
14
|
Cane::RakeTask.new(:quality) do |cane|
|
15
15
|
cane.abc_max = 20
|
16
16
|
cane.style_measure = 100
|
17
|
-
cane.max_violations =
|
17
|
+
cane.max_violations = 33
|
18
18
|
|
19
19
|
cane.use Morecane::EncodingCheck, :encoding_glob => "{app,lib,spec}/**/*.rb"
|
20
20
|
end
|
data/lib/pdf/reader/buffer.rb
CHANGED
@@ -52,7 +52,9 @@ class PDF::Reader
|
|
52
52
|
|
53
53
|
# this is the form 10 20 123 where all index between 10 and 20 have width 123
|
54
54
|
def parse_second_form(first, final, width)
|
55
|
-
|
55
|
+
if first > final
|
56
|
+
raise MalformedPDFError, "CidWidths: #{first} must be less than #{final}"
|
57
|
+
end
|
56
58
|
|
57
59
|
(first..final).inject({}) { |accum, index|
|
58
60
|
accum[index] = width
|
data/lib/pdf/reader/cmap.rb
CHANGED
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -76,9 +76,9 @@ class PDF::Reader
|
|
76
76
|
diff.each do |val|
|
77
77
|
if val.kind_of?(Numeric)
|
78
78
|
byte = val.to_i
|
79
|
-
|
79
|
+
elsif codepoint = glyphlist.name_to_unicode(val)
|
80
80
|
@differences[byte] = val
|
81
|
-
@mapping[byte] =
|
81
|
+
@mapping[byte] = codepoint
|
82
82
|
byte += 1
|
83
83
|
end
|
84
84
|
end
|
@@ -167,7 +167,7 @@ class PDF::Reader
|
|
167
167
|
end
|
168
168
|
|
169
169
|
def convert_to_utf8(str)
|
170
|
-
ret = str.unpack(unpack).map! { |c| @mapping[c] || c }.pack("U*")
|
170
|
+
ret = str.unpack(unpack).map! { |c| @mapping[c.to_i] || c }.pack("U*")
|
171
171
|
ret.force_encoding("UTF-8")
|
172
172
|
ret
|
173
173
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -33,10 +33,18 @@ class PDF::Reader
|
|
33
33
|
#
|
34
34
|
class GlyphHash # :nodoc:
|
35
35
|
def initialize
|
36
|
+
@@by_codepoint_cache ||= nil
|
37
|
+
@@by_name_cache ||= nil
|
38
|
+
|
36
39
|
# only parse the glyph list once, and cache the results (for performance)
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
+
if @@by_codepoint_cache != nil && @@by_name_cache != nil
|
41
|
+
@by_name = @@by_name_cache
|
42
|
+
@by_codepoint = @@by_codepoint_cache
|
43
|
+
else
|
44
|
+
by_name, by_codepoint = load_adobe_glyph_mapping
|
45
|
+
@by_name = @@by_name_cache ||= by_name
|
46
|
+
@by_codepoint = @@by_codepoint_cache ||= by_codepoint
|
47
|
+
end
|
40
48
|
end
|
41
49
|
|
42
50
|
# attempt to convert a PDF Name to a unicode codepoint. Returns nil
|
@@ -127,7 +135,7 @@ class PDF::Reader
|
|
127
135
|
end
|
128
136
|
end
|
129
137
|
|
130
|
-
|
138
|
+
return keyed_by_name.freeze, keyed_by_codepoint.freeze
|
131
139
|
end
|
132
140
|
|
133
141
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
class PDF::Reader
|
6
|
+
# There's no point rendering zero-width characters
|
7
|
+
class NoTextFilter
|
8
|
+
|
9
|
+
def self.exclude_empty_strings(runs)
|
10
|
+
runs.reject { |run| run.text.to_s.size == 0 }
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# typed: true
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
|
+
require 'tempfile'
|
6
|
+
|
5
7
|
class PDF::Reader
|
6
8
|
# Provides low level access to the objects in a PDF file via a hash-like
|
7
9
|
# object.
|
@@ -566,7 +568,7 @@ class PDF::Reader
|
|
566
568
|
end
|
567
569
|
|
568
570
|
def object_streams
|
569
|
-
@
|
571
|
+
@object_streams ||= {}
|
570
572
|
end
|
571
573
|
|
572
574
|
# returns an array of object references for all pages in this object store. The ordering of
|
@@ -591,18 +593,18 @@ class PDF::Reader
|
|
591
593
|
|
592
594
|
def read_version
|
593
595
|
@io.seek(0)
|
594
|
-
_m, version = *@io.read(10).match(/PDF-(\d.\d)/)
|
596
|
+
_m, version = *@io.read(10).to_s.match(/PDF-(\d.\d)/)
|
595
597
|
@io.seek(0)
|
596
598
|
version.to_f
|
597
599
|
end
|
598
600
|
|
599
601
|
def extract_io_from(input)
|
600
|
-
if input.
|
602
|
+
if input.is_a?(IO) || input.is_a?(StringIO) || input.is_a?(Tempfile)
|
601
603
|
input
|
602
604
|
elsif File.file?(input.to_s)
|
603
|
-
StringIO.new read_as_binary(input)
|
605
|
+
StringIO.new read_as_binary(input.to_s)
|
604
606
|
else
|
605
|
-
raise ArgumentError, "input must be an IO-like object or a filename"
|
607
|
+
raise ArgumentError, "input must be an IO-like object or a filename (#{input.class})"
|
606
608
|
end
|
607
609
|
end
|
608
610
|
|
@@ -610,7 +612,7 @@ class PDF::Reader
|
|
610
612
|
if File.respond_to?(:binread)
|
611
613
|
File.binread(input.to_s)
|
612
614
|
else
|
613
|
-
File.open(input.to_s,"rb") { |f| f.read }
|
615
|
+
File.open(input.to_s,"rb") { |f| f.read } || ""
|
614
616
|
end
|
615
617
|
end
|
616
618
|
|
data/lib/pdf/reader/page.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
module PDF
|
@@ -43,10 +43,10 @@ module PDF
|
|
43
43
|
#
|
44
44
|
def initialize(objects, pagenum, options = {})
|
45
45
|
@objects, @pagenum = objects, pagenum
|
46
|
-
@page_object = objects.deref_hash(objects.page_references[pagenum - 1])
|
46
|
+
@page_object = objects.deref_hash(objects.page_references[pagenum - 1]) || {}
|
47
47
|
@cache = options[:cache] || {}
|
48
48
|
|
49
|
-
|
49
|
+
if @page_object.empty?
|
50
50
|
raise InvalidPageError, "Invalid page: #{pagenum}"
|
51
51
|
end
|
52
52
|
end
|
@@ -250,8 +250,8 @@ module PDF
|
|
250
250
|
params = []
|
251
251
|
|
252
252
|
while (token = parser.parse_token(PagesStrategy::OPERATORS))
|
253
|
-
if token.kind_of?(Token)
|
254
|
-
callback(receivers,
|
253
|
+
if token.kind_of?(Token) && method_name = PagesStrategy::OPERATORS[token]
|
254
|
+
callback(receivers, method_name, params)
|
255
255
|
params.clear
|
256
256
|
else
|
257
257
|
params << token
|
@@ -263,9 +263,26 @@ module PDF
|
|
263
263
|
|
264
264
|
# calls the name callback method on each receiver object with params as the arguments
|
265
265
|
#
|
266
|
+
# The silly style here is because sorbet won't let me use splat arguments
|
267
|
+
#
|
266
268
|
def callback(receivers, name, params=[])
|
267
269
|
receivers.each do |receiver|
|
268
|
-
|
270
|
+
if receiver.respond_to?(name)
|
271
|
+
case params.size
|
272
|
+
when 0 then receiver.send(name)
|
273
|
+
when 1 then receiver.send(name, params[0])
|
274
|
+
when 2 then receiver.send(name, params[0], params[1])
|
275
|
+
when 3 then receiver.send(name, params[0], params[1], params[2])
|
276
|
+
when 4 then receiver.send(name, params[0], params[1], params[2], params[3])
|
277
|
+
when 5 then receiver.send(name, params[0], params[1], params[2], params[3], params[4])
|
278
|
+
when 6 then receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5])
|
279
|
+
when 7 then receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5], params[6])
|
280
|
+
when 8 then receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5], params[6], params[7])
|
281
|
+
when 9 then receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5], params[6], params[7], params[8])
|
282
|
+
else
|
283
|
+
receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5], params[6], params[7], params[8], params[9])
|
284
|
+
end
|
285
|
+
end
|
269
286
|
end
|
270
287
|
end
|
271
288
|
|
data/lib/pdf/reader/reference.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -31,7 +31,8 @@ class PDF::Reader
|
|
31
31
|
################################################################################
|
32
32
|
# An internal PDF::Reader class that represents an indirect reference to a PDF Object
|
33
33
|
class Reference
|
34
|
-
attr_reader :id
|
34
|
+
attr_reader :id
|
35
|
+
attr_reader :gen
|
35
36
|
################################################################################
|
36
37
|
# Create a new Reference to an object with the specified id and revision number
|
37
38
|
def initialize(id, gen)
|
data/lib/pdf/reader/resources.rb
CHANGED
data/lib/pdf/reader/text_run.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
class PDF::Reader
|
@@ -7,7 +7,10 @@ class PDF::Reader
|
|
7
7
|
class TextRun
|
8
8
|
include Comparable
|
9
9
|
|
10
|
-
attr_reader :origin
|
10
|
+
attr_reader :origin
|
11
|
+
attr_reader :width
|
12
|
+
attr_reader :font_size
|
13
|
+
attr_reader :text
|
11
14
|
|
12
15
|
alias :to_s :text
|
13
16
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
class PDF::Reader
|
@@ -51,7 +51,7 @@ class PDF::Reader
|
|
51
51
|
# displacement to speed up processing documents that use vertical
|
52
52
|
# writing systems
|
53
53
|
#
|
54
|
-
def multiply!(a,b
|
54
|
+
def multiply!(a,b,c, d,e,f)
|
55
55
|
if a == 1 && b == 0 && c == 0 && d == 1 && e == 0 && f == 0
|
56
56
|
# the identity matrix, no effect
|
57
57
|
self
|
@@ -164,12 +164,12 @@ class PDF::Reader
|
|
164
164
|
# [ e f 1 ] [ e f 1 ]
|
165
165
|
#
|
166
166
|
def regular_multiply!(a2,b2,c2,d2,e2,f2)
|
167
|
-
newa = (@a * a2) + (@b * c2) + (
|
168
|
-
newb = (@a * b2) + (@b * d2) + (
|
169
|
-
newc = (@c * a2) + (@d * c2) + (
|
170
|
-
newd = (@c * b2) + (@d * d2) + (
|
171
|
-
newe = (@e * a2) + (@f * c2) + (
|
172
|
-
newf = (@e * b2) + (@f * d2) + (
|
167
|
+
newa = (@a * a2) + (@b * c2) + (e2 * 0)
|
168
|
+
newb = (@a * b2) + (@b * d2) + (f2 * 0)
|
169
|
+
newc = (@c * a2) + (@d * c2) + (e2 * 0)
|
170
|
+
newd = (@c * b2) + (@d * d2) + (f2 * 0)
|
171
|
+
newe = (@e * a2) + (@f * c2) + (e2 * 1)
|
172
|
+
newf = (@e * b2) + (@f * d2) + (f2 * 1)
|
173
173
|
@a, @b, @c, @d, @e, @f = newa, newb, newc, newd, newe, newf
|
174
174
|
end
|
175
175
|
|
data/lib/pdf/reader/xref.rb
CHANGED
@@ -73,7 +73,7 @@ class PDF::Reader
|
|
73
73
|
#
|
74
74
|
# ref - a PDF::Reader::Reference object containing an object ID and revision number
|
75
75
|
def [](ref)
|
76
|
-
@xref
|
76
|
+
@xref.fetch(ref.id, {}).fetch(ref.gen)
|
77
77
|
rescue
|
78
78
|
raise InvalidObjectError, "Object #{ref.id}, Generation #{ref.gen} is invalid"
|
79
79
|
end
|
@@ -82,8 +82,8 @@ class PDF::Reader
|
|
82
82
|
def each(&block)
|
83
83
|
ids = @xref.keys.sort
|
84
84
|
ids.each do |id|
|
85
|
-
gen = @xref
|
86
|
-
yield PDF::Reader::Reference.new(id, gen)
|
85
|
+
gen = @xref.fetch(id, {}).keys.sort[-1]
|
86
|
+
yield PDF::Reader::Reference.new(id, gen.to_i)
|
87
87
|
end
|
88
88
|
end
|
89
89
|
################################################################################
|
data/lib/pdf/reader.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -128,7 +128,7 @@ module PDF
|
|
128
128
|
doc_strings_to_utf8(dict)
|
129
129
|
end
|
130
130
|
|
131
|
-
# Return a
|
131
|
+
# Return a String with extra XML metadata provided by the author of the PDF file. Not
|
132
132
|
# always present.
|
133
133
|
#
|
134
134
|
def metadata
|
@@ -182,7 +182,7 @@ module PDF
|
|
182
182
|
#
|
183
183
|
# reader.pages.each do |page|
|
184
184
|
# puts page.fonts
|
185
|
-
# puts page.
|
185
|
+
# puts page.rectangles
|
186
186
|
# puts page.text
|
187
187
|
# end
|
188
188
|
#
|
@@ -272,13 +272,7 @@ module PDF
|
|
272
272
|
end
|
273
273
|
|
274
274
|
def root
|
275
|
-
@root ||=
|
276
|
-
obj = @objects.deref_hash(@objects.trailer[:Root]) || {}
|
277
|
-
unless obj.kind_of?(::Hash)
|
278
|
-
raise MalformedPDFError, "PDF malformed, trailer Root should be a dictionary"
|
279
|
-
end
|
280
|
-
obj
|
281
|
-
end
|
275
|
+
@root ||= @objects.deref_hash(@objects.trailer[:Root]) || {}
|
282
276
|
end
|
283
277
|
|
284
278
|
end
|
@@ -315,6 +309,7 @@ require 'pdf/reader/print_receiver'
|
|
315
309
|
require 'pdf/reader/rectangle'
|
316
310
|
require 'pdf/reader/reference'
|
317
311
|
require 'pdf/reader/register_receiver'
|
312
|
+
require 'pdf/reader/no_text_filter'
|
318
313
|
require 'pdf/reader/null_security_handler'
|
319
314
|
require 'pdf/reader/security_handler_factory'
|
320
315
|
require 'pdf/reader/standard_key_builder'
|