pdf-reader 2.11.0 → 2.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +5 -0
- data/lib/pdf/reader/buffer.rb +4 -5
- data/lib/pdf/reader/cmap.rb +3 -3
- data/lib/pdf/reader/encoding.rb +2 -3
- data/lib/pdf/reader/font.rb +2 -2
- data/lib/pdf/reader/lzw.rb +1 -1
- data/lib/pdf/reader/object_hash.rb +1 -1
- data/lib/pdf/reader/pages_strategy.rb +1 -1
- data/lib/pdf/reader/parser.rb +1 -3
- data/lib/pdf/reader/width_calculator/built_in.rb +1 -1
- data/rbi/pdf-reader.rbi +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e3b00946c8b23b65d19ace187550b15bb3fd2537e518c778f4c12da28672c9d8
|
4
|
+
data.tar.gz: 4c2ebeb19dada9f257fa65c2add2f2f6d64f011cb13e997533a4b63fc81baa6d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 99c9ac879424056221f616d7f7299d03dfc9906c6b81c333ad255439780cf56d2dfc0c31a62347a7a163bcdb4075f8d0c914e2deeebb5d78e8ebc34e19cd7abc
|
7
|
+
data.tar.gz: 50ef8b5e1061dd1d6b24a7727b5537664bcb22473757274b4cc2b92c89b9ba5ea7516f055571f5c8b72d678f7cef549858631408c86a6984196ba7d1773daaca
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
v2.12.0 (26th December 2023)
|
2
|
+
- Fix a sorbet method signature (http://github.com/yob/pdf-reader/pull/512)
|
3
|
+
- Reduce allocations when parsing PDFs with hex strings (http://github.com/yob/pdf-reader/pull/528)
|
4
|
+
- Fix text extraction of some rare unicode codepoints (http://github.com/yob/pdf-reader/pull/529)
|
5
|
+
|
1
6
|
v2.11.0 (26th October 2022)
|
2
7
|
- Various bug fixes
|
3
8
|
- Expanded sorbet type annotations
|
data/lib/pdf/reader/buffer.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: ASCII-8BIT
|
2
|
-
# typed:
|
2
|
+
# typed: true
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -300,13 +300,12 @@ class PDF::Reader
|
|
300
300
|
# we find a closing >
|
301
301
|
#
|
302
302
|
def prepare_hex_token
|
303
|
-
finished = :false
|
304
303
|
str = "".dup
|
305
304
|
|
306
|
-
|
305
|
+
loop do
|
307
306
|
byte = @io.getbyte
|
308
307
|
if byte.nil?
|
309
|
-
|
308
|
+
break
|
310
309
|
elsif (48..57).include?(byte) || (65..90).include?(byte) || (97..122).include?(byte)
|
311
310
|
str << byte
|
312
311
|
elsif byte <= 32
|
@@ -315,7 +314,7 @@ class PDF::Reader
|
|
315
314
|
@tokens << str if str.size > 0
|
316
315
|
@tokens << ">" if byte != 0x3E # '>'
|
317
316
|
@tokens << byte.chr
|
318
|
-
|
317
|
+
break
|
319
318
|
end
|
320
319
|
end
|
321
320
|
end
|
data/lib/pdf/reader/cmap.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: true
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -118,8 +118,8 @@ class PDF::Reader
|
|
118
118
|
result = []
|
119
119
|
while unpacked_string.any? do
|
120
120
|
if unpacked_string.size >= 2 &&
|
121
|
-
unpacked_string.first.to_i
|
122
|
-
unpacked_string.first.to_i
|
121
|
+
unpacked_string.first.to_i >= 0xD800 &&
|
122
|
+
unpacked_string.first.to_i <= 0xDBFF
|
123
123
|
# this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
|
124
124
|
# lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
|
125
125
|
# low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
|
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: true
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -119,7 +119,7 @@ class PDF::Reader
|
|
119
119
|
# => [:A]
|
120
120
|
#
|
121
121
|
def int_to_name(glyph_code)
|
122
|
-
if @enc_name == "Identity-H" || @enc_name == "Identity-V"
|
122
|
+
if @enc_name == :"Identity-H" || @enc_name == :"Identity-V"
|
123
123
|
[]
|
124
124
|
elsif differences[glyph_code]
|
125
125
|
[differences[glyph_code]]
|
@@ -143,7 +143,6 @@ class PDF::Reader
|
|
143
143
|
CONTROL_CHARS.include?(i) ? [i, UNKNOWN_CHAR] : [i,i]
|
144
144
|
}
|
145
145
|
mapping = Hash[tuples]
|
146
|
-
mapping[nil] = UNKNOWN_CHAR
|
147
146
|
mapping
|
148
147
|
end
|
149
148
|
|
data/lib/pdf/reader/font.rb
CHANGED
@@ -82,8 +82,8 @@ class PDF::Reader
|
|
82
82
|
glyph_width_in_glyph_space = glyph_width(code_point)
|
83
83
|
|
84
84
|
if @subtype == :Type3
|
85
|
-
x1,
|
86
|
-
x2,
|
85
|
+
x1, _y1 = font_matrix_transform(0,0)
|
86
|
+
x2, _y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
|
87
87
|
(x2 - x1).abs.round(2)
|
88
88
|
else
|
89
89
|
glyph_width_in_glyph_space / 1000.0
|
data/lib/pdf/reader/lzw.rb
CHANGED
@@ -42,7 +42,7 @@ module PDF
|
|
42
42
|
while bits_left_in_chunk > 0 and @current_pos < @data.size
|
43
43
|
chunk = 0 if chunk < 0
|
44
44
|
codepoint = @data[@current_pos, 1].to_s.unpack("C*")[0].to_i
|
45
|
-
current_byte = codepoint & (2**@bits_left_in_byte - 1) #clear consumed bits
|
45
|
+
current_byte = codepoint & (2**@bits_left_in_byte - 1).to_i #clear consumed bits
|
46
46
|
dif = bits_left_in_chunk - @bits_left_in_byte
|
47
47
|
if dif > 0 then current_byte <<= dif
|
48
48
|
elsif dif < 0 then current_byte >>= dif.abs
|
data/lib/pdf/reader/parser.rb
CHANGED
@@ -173,9 +173,7 @@ class PDF::Reader
|
|
173
173
|
|
174
174
|
# add a missing digit if required, as required by the spec
|
175
175
|
str << "0" unless str.size % 2 == 0
|
176
|
-
str.
|
177
|
-
nibbles.join("").hex.chr
|
178
|
-
}.join.force_encoding("binary")
|
176
|
+
[str].pack('H*')
|
179
177
|
end
|
180
178
|
################################################################################
|
181
179
|
# Reads a PDF String from the buffer and converts it to a Ruby String
|
data/rbi/pdf-reader.rbi
CHANGED
@@ -842,7 +842,7 @@ module PDF
|
|
842
842
|
sig { params(runs: T::Array[PDF::Reader::TextRun]).returns(T::Array[PDF::Reader::TextRun]) }
|
843
843
|
def self.exclude_redundant_runs(runs); end
|
844
844
|
|
845
|
-
sig { params(sweep_line_status: T::Array[PDF::Reader::TextRun], event_point: EventPoint).returns(T::Boolean) }
|
845
|
+
sig { params(sweep_line_status: T::Array[PDF::Reader::TextRun], event_point: PDF::Reader::EventPoint).returns(T::Boolean) }
|
846
846
|
def self.detect_intersection(sweep_line_status, event_point); end
|
847
847
|
end
|
848
848
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.12.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-12-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -289,9 +289,9 @@ licenses:
|
|
289
289
|
- MIT
|
290
290
|
metadata:
|
291
291
|
bug_tracker_uri: https://github.com/yob/pdf-reader/issues
|
292
|
-
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.
|
293
|
-
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.
|
294
|
-
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.
|
292
|
+
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.12.0/CHANGELOG
|
293
|
+
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.12.0
|
294
|
+
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.12.0
|
295
295
|
post_install_message:
|
296
296
|
rdoc_options:
|
297
297
|
- "--title"
|
@@ -312,7 +312,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
312
312
|
- !ruby/object:Gem::Version
|
313
313
|
version: '0'
|
314
314
|
requirements: []
|
315
|
-
rubygems_version: 3.
|
315
|
+
rubygems_version: 3.4.10
|
316
316
|
signing_key:
|
317
317
|
specification_version: 4
|
318
318
|
summary: A library for accessing the content of PDF files
|