pdf-reader 2.11.0 → 2.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +5 -0
- data/lib/pdf/reader/buffer.rb +4 -5
- data/lib/pdf/reader/cmap.rb +3 -3
- data/lib/pdf/reader/encoding.rb +2 -3
- data/lib/pdf/reader/font.rb +2 -2
- data/lib/pdf/reader/lzw.rb +1 -1
- data/lib/pdf/reader/object_hash.rb +1 -1
- data/lib/pdf/reader/pages_strategy.rb +1 -1
- data/lib/pdf/reader/parser.rb +1 -3
- data/lib/pdf/reader/width_calculator/built_in.rb +1 -1
- data/rbi/pdf-reader.rbi +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e3b00946c8b23b65d19ace187550b15bb3fd2537e518c778f4c12da28672c9d8
|
4
|
+
data.tar.gz: 4c2ebeb19dada9f257fa65c2add2f2f6d64f011cb13e997533a4b63fc81baa6d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 99c9ac879424056221f616d7f7299d03dfc9906c6b81c333ad255439780cf56d2dfc0c31a62347a7a163bcdb4075f8d0c914e2deeebb5d78e8ebc34e19cd7abc
|
7
|
+
data.tar.gz: 50ef8b5e1061dd1d6b24a7727b5537664bcb22473757274b4cc2b92c89b9ba5ea7516f055571f5c8b72d678f7cef549858631408c86a6984196ba7d1773daaca
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
v2.12.0 (26th December 2023)
|
2
|
+
- Fix a sorbet method signature (http://github.com/yob/pdf-reader/pull/512)
|
3
|
+
- Reduce allocations when parsing PDFs with hex strings (http://github.com/yob/pdf-reader/pull/528)
|
4
|
+
- Fix text extraction of some rare unicode codepoints (http://github.com/yob/pdf-reader/pull/529)
|
5
|
+
|
1
6
|
v2.11.0 (26th October 2022)
|
2
7
|
- Various bug fixes
|
3
8
|
- Expanded sorbet type annotations
|
data/lib/pdf/reader/buffer.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: ASCII-8BIT
|
2
|
-
# typed:
|
2
|
+
# typed: true
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -300,13 +300,12 @@ class PDF::Reader
|
|
300
300
|
# we find a closing >
|
301
301
|
#
|
302
302
|
def prepare_hex_token
|
303
|
-
finished = :false
|
304
303
|
str = "".dup
|
305
304
|
|
306
|
-
|
305
|
+
loop do
|
307
306
|
byte = @io.getbyte
|
308
307
|
if byte.nil?
|
309
|
-
|
308
|
+
break
|
310
309
|
elsif (48..57).include?(byte) || (65..90).include?(byte) || (97..122).include?(byte)
|
311
310
|
str << byte
|
312
311
|
elsif byte <= 32
|
@@ -315,7 +314,7 @@ class PDF::Reader
|
|
315
314
|
@tokens << str if str.size > 0
|
316
315
|
@tokens << ">" if byte != 0x3E # '>'
|
317
316
|
@tokens << byte.chr
|
318
|
-
|
317
|
+
break
|
319
318
|
end
|
320
319
|
end
|
321
320
|
end
|
data/lib/pdf/reader/cmap.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: true
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -118,8 +118,8 @@ class PDF::Reader
|
|
118
118
|
result = []
|
119
119
|
while unpacked_string.any? do
|
120
120
|
if unpacked_string.size >= 2 &&
|
121
|
-
unpacked_string.first.to_i
|
122
|
-
unpacked_string.first.to_i
|
121
|
+
unpacked_string.first.to_i >= 0xD800 &&
|
122
|
+
unpacked_string.first.to_i <= 0xDBFF
|
123
123
|
# this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
|
124
124
|
# lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
|
125
125
|
# low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
|
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: true
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -119,7 +119,7 @@ class PDF::Reader
|
|
119
119
|
# => [:A]
|
120
120
|
#
|
121
121
|
def int_to_name(glyph_code)
|
122
|
-
if @enc_name == "Identity-H" || @enc_name == "Identity-V"
|
122
|
+
if @enc_name == :"Identity-H" || @enc_name == :"Identity-V"
|
123
123
|
[]
|
124
124
|
elsif differences[glyph_code]
|
125
125
|
[differences[glyph_code]]
|
@@ -143,7 +143,6 @@ class PDF::Reader
|
|
143
143
|
CONTROL_CHARS.include?(i) ? [i, UNKNOWN_CHAR] : [i,i]
|
144
144
|
}
|
145
145
|
mapping = Hash[tuples]
|
146
|
-
mapping[nil] = UNKNOWN_CHAR
|
147
146
|
mapping
|
148
147
|
end
|
149
148
|
|
data/lib/pdf/reader/font.rb
CHANGED
@@ -82,8 +82,8 @@ class PDF::Reader
|
|
82
82
|
glyph_width_in_glyph_space = glyph_width(code_point)
|
83
83
|
|
84
84
|
if @subtype == :Type3
|
85
|
-
x1,
|
86
|
-
x2,
|
85
|
+
x1, _y1 = font_matrix_transform(0,0)
|
86
|
+
x2, _y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
|
87
87
|
(x2 - x1).abs.round(2)
|
88
88
|
else
|
89
89
|
glyph_width_in_glyph_space / 1000.0
|
data/lib/pdf/reader/lzw.rb
CHANGED
@@ -42,7 +42,7 @@ module PDF
|
|
42
42
|
while bits_left_in_chunk > 0 and @current_pos < @data.size
|
43
43
|
chunk = 0 if chunk < 0
|
44
44
|
codepoint = @data[@current_pos, 1].to_s.unpack("C*")[0].to_i
|
45
|
-
current_byte = codepoint & (2**@bits_left_in_byte - 1) #clear consumed bits
|
45
|
+
current_byte = codepoint & (2**@bits_left_in_byte - 1).to_i #clear consumed bits
|
46
46
|
dif = bits_left_in_chunk - @bits_left_in_byte
|
47
47
|
if dif > 0 then current_byte <<= dif
|
48
48
|
elsif dif < 0 then current_byte >>= dif.abs
|
data/lib/pdf/reader/parser.rb
CHANGED
@@ -173,9 +173,7 @@ class PDF::Reader
|
|
173
173
|
|
174
174
|
# add a missing digit if required, as required by the spec
|
175
175
|
str << "0" unless str.size % 2 == 0
|
176
|
-
str.
|
177
|
-
nibbles.join("").hex.chr
|
178
|
-
}.join.force_encoding("binary")
|
176
|
+
[str].pack('H*')
|
179
177
|
end
|
180
178
|
################################################################################
|
181
179
|
# Reads a PDF String from the buffer and converts it to a Ruby String
|
data/rbi/pdf-reader.rbi
CHANGED
@@ -842,7 +842,7 @@ module PDF
|
|
842
842
|
sig { params(runs: T::Array[PDF::Reader::TextRun]).returns(T::Array[PDF::Reader::TextRun]) }
|
843
843
|
def self.exclude_redundant_runs(runs); end
|
844
844
|
|
845
|
-
sig { params(sweep_line_status: T::Array[PDF::Reader::TextRun], event_point: EventPoint).returns(T::Boolean) }
|
845
|
+
sig { params(sweep_line_status: T::Array[PDF::Reader::TextRun], event_point: PDF::Reader::EventPoint).returns(T::Boolean) }
|
846
846
|
def self.detect_intersection(sweep_line_status, event_point); end
|
847
847
|
end
|
848
848
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.12.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-12-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -289,9 +289,9 @@ licenses:
|
|
289
289
|
- MIT
|
290
290
|
metadata:
|
291
291
|
bug_tracker_uri: https://github.com/yob/pdf-reader/issues
|
292
|
-
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.
|
293
|
-
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.
|
294
|
-
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.
|
292
|
+
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.12.0/CHANGELOG
|
293
|
+
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.12.0
|
294
|
+
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.12.0
|
295
295
|
post_install_message:
|
296
296
|
rdoc_options:
|
297
297
|
- "--title"
|
@@ -312,7 +312,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
312
312
|
- !ruby/object:Gem::Version
|
313
313
|
version: '0'
|
314
314
|
requirements: []
|
315
|
-
rubygems_version: 3.
|
315
|
+
rubygems_version: 3.4.10
|
316
316
|
signing_key:
|
317
317
|
specification_version: 4
|
318
318
|
summary: A library for accessing the content of PDF files
|