pdf-reader 2.11.0 → 2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2c84983c18d983798ff5f2ede514b540ee55a788229501976474b7341bf57fba
4
- data.tar.gz: 79b8f092e72a194110062cf7d7e9425c0a6531e145009c9b7c10c2c072b3d1d5
3
+ metadata.gz: e3b00946c8b23b65d19ace187550b15bb3fd2537e518c778f4c12da28672c9d8
4
+ data.tar.gz: 4c2ebeb19dada9f257fa65c2add2f2f6d64f011cb13e997533a4b63fc81baa6d
5
5
  SHA512:
6
- metadata.gz: '09c97a875bb46389172ed48ae8b2779ba3a8e032852b6a9943f187de13c23649e2398a5374358c62b64cf9e13bbf7f819bb5072d9aaa6882b9b94e96d23f5c13'
7
- data.tar.gz: ed92250acee85f4e355785dd043f7774a5883550fe82b01b3cd9e10011f93a1fcdd500108b0e1f4e2af562bddd833c03ca601078b3eba8ee2e9990fd5e76305a
6
+ metadata.gz: 99c9ac879424056221f616d7f7299d03dfc9906c6b81c333ad255439780cf56d2dfc0c31a62347a7a163bcdb4075f8d0c914e2deeebb5d78e8ebc34e19cd7abc
7
+ data.tar.gz: 50ef8b5e1061dd1d6b24a7727b5537664bcb22473757274b4cc2b92c89b9ba5ea7516f055571f5c8b72d678f7cef549858631408c86a6984196ba7d1773daaca
data/CHANGELOG CHANGED
@@ -1,3 +1,8 @@
1
+ v2.12.0 (26th December 2023)
2
+ - Fix a sorbet method signature (http://github.com/yob/pdf-reader/pull/512)
3
+ - Reduce allocations when parsing PDFs with hex strings (http://github.com/yob/pdf-reader/pull/528)
4
+ - Fix text extraction of some rare unicode codepoints (http://github.com/yob/pdf-reader/pull/529)
5
+
1
6
  v2.11.0 (26th October 2022)
2
7
  - Various bug fixes
3
8
  - Expanded sorbet type annotations
@@ -1,5 +1,5 @@
1
1
  # coding: ASCII-8BIT
2
- # typed: strict
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -300,13 +300,12 @@ class PDF::Reader
300
300
  # we find a closing >
301
301
  #
302
302
  def prepare_hex_token
303
- finished = :false
304
303
  str = "".dup
305
304
 
306
- until finished == :true
305
+ loop do
307
306
  byte = @io.getbyte
308
307
  if byte.nil?
309
- finished = :true # unbalanced params
308
+ break
310
309
  elsif (48..57).include?(byte) || (65..90).include?(byte) || (97..122).include?(byte)
311
310
  str << byte
312
311
  elsif byte <= 32
@@ -315,7 +314,7 @@ class PDF::Reader
315
314
  @tokens << str if str.size > 0
316
315
  @tokens << ">" if byte != 0x3E # '>'
317
316
  @tokens << byte.chr
318
- finished = :true
317
+ break
319
318
  end
320
319
  end
321
320
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: strict
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -118,8 +118,8 @@ class PDF::Reader
118
118
  result = []
119
119
  while unpacked_string.any? do
120
120
  if unpacked_string.size >= 2 &&
121
- unpacked_string.first.to_i > 0xD800 &&
122
- unpacked_string.first.to_i < 0xDBFF
121
+ unpacked_string.first.to_i >= 0xD800 &&
122
+ unpacked_string.first.to_i <= 0xDBFF
123
123
  # this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
124
124
  # lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
125
125
  # low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: strict
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -119,7 +119,7 @@ class PDF::Reader
119
119
  # => [:A]
120
120
  #
121
121
  def int_to_name(glyph_code)
122
- if @enc_name == "Identity-H" || @enc_name == "Identity-V"
122
+ if @enc_name == :"Identity-H" || @enc_name == :"Identity-V"
123
123
  []
124
124
  elsif differences[glyph_code]
125
125
  [differences[glyph_code]]
@@ -143,7 +143,6 @@ class PDF::Reader
143
143
  CONTROL_CHARS.include?(i) ? [i, UNKNOWN_CHAR] : [i,i]
144
144
  }
145
145
  mapping = Hash[tuples]
146
- mapping[nil] = UNKNOWN_CHAR
147
146
  mapping
148
147
  end
149
148
 
@@ -82,8 +82,8 @@ class PDF::Reader
82
82
  glyph_width_in_glyph_space = glyph_width(code_point)
83
83
 
84
84
  if @subtype == :Type3
85
- x1, y1 = font_matrix_transform(0,0)
86
- x2, y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
85
+ x1, _y1 = font_matrix_transform(0,0)
86
+ x2, _y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
87
87
  (x2 - x1).abs.round(2)
88
88
  else
89
89
  glyph_width_in_glyph_space / 1000.0
@@ -42,7 +42,7 @@ module PDF
42
42
  while bits_left_in_chunk > 0 and @current_pos < @data.size
43
43
  chunk = 0 if chunk < 0
44
44
  codepoint = @data[@current_pos, 1].to_s.unpack("C*")[0].to_i
45
- current_byte = codepoint & (2**@bits_left_in_byte - 1) #clear consumed bits
45
+ current_byte = codepoint & (2**@bits_left_in_byte - 1).to_i #clear consumed bits
46
46
  dif = bits_left_in_chunk - @bits_left_in_byte
47
47
  if dif > 0 then current_byte <<= dif
48
48
  elsif dif < 0 then current_byte >>= dif.abs
@@ -612,7 +612,7 @@ class PDF::Reader
612
612
  if File.respond_to?(:binread)
613
613
  File.binread(input.to_s)
614
614
  else
615
- File.open(input.to_s,"rb") { |f| f.read } || ""
615
+ File.open(input.to_s,"rb") { |f| f.read }
616
616
  end
617
617
  end
618
618
 
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: strict
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -173,9 +173,7 @@ class PDF::Reader
173
173
 
174
174
  # add a missing digit if required, as required by the spec
175
175
  str << "0" unless str.size % 2 == 0
176
- str.chars.each_slice(2).map { |nibbles|
177
- nibbles.join("").hex.chr
178
- }.join.force_encoding("binary")
176
+ [str].pack('H*')
179
177
  end
180
178
  ################################################################################
181
179
  # Reads a PDF String from the buffer and converts it to a Ruby String
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: strict
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
5
  require 'afm'
data/rbi/pdf-reader.rbi CHANGED
@@ -842,7 +842,7 @@ module PDF
842
842
  sig { params(runs: T::Array[PDF::Reader::TextRun]).returns(T::Array[PDF::Reader::TextRun]) }
843
843
  def self.exclude_redundant_runs(runs); end
844
844
 
845
- sig { params(sweep_line_status: T::Array[PDF::Reader::TextRun], event_point: EventPoint).returns(T::Boolean) }
845
+ sig { params(sweep_line_status: T::Array[PDF::Reader::TextRun], event_point: PDF::Reader::EventPoint).returns(T::Boolean) }
846
846
  def self.detect_intersection(sweep_line_status, event_point); end
847
847
  end
848
848
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.11.0
4
+ version: 2.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-26 00:00:00.000000000 Z
11
+ date: 2023-12-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -289,9 +289,9 @@ licenses:
289
289
  - MIT
290
290
  metadata:
291
291
  bug_tracker_uri: https://github.com/yob/pdf-reader/issues
292
- changelog_uri: https://github.com/yob/pdf-reader/blob/v2.11.0/CHANGELOG
293
- documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.11.0
294
- source_code_uri: https://github.com/yob/pdf-reader/tree/v2.11.0
292
+ changelog_uri: https://github.com/yob/pdf-reader/blob/v2.12.0/CHANGELOG
293
+ documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.12.0
294
+ source_code_uri: https://github.com/yob/pdf-reader/tree/v2.12.0
295
295
  post_install_message:
296
296
  rdoc_options:
297
297
  - "--title"
@@ -312,7 +312,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
312
312
  - !ruby/object:Gem::Version
313
313
  version: '0'
314
314
  requirements: []
315
- rubygems_version: 3.2.32
315
+ rubygems_version: 3.4.10
316
316
  signing_key:
317
317
  specification_version: 4
318
318
  summary: A library for accessing the content of PDF files