pdf-reader 2.11.0 → 2.12.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2c84983c18d983798ff5f2ede514b540ee55a788229501976474b7341bf57fba
4
- data.tar.gz: 79b8f092e72a194110062cf7d7e9425c0a6531e145009c9b7c10c2c072b3d1d5
3
+ metadata.gz: e3b00946c8b23b65d19ace187550b15bb3fd2537e518c778f4c12da28672c9d8
4
+ data.tar.gz: 4c2ebeb19dada9f257fa65c2add2f2f6d64f011cb13e997533a4b63fc81baa6d
5
5
  SHA512:
6
- metadata.gz: '09c97a875bb46389172ed48ae8b2779ba3a8e032852b6a9943f187de13c23649e2398a5374358c62b64cf9e13bbf7f819bb5072d9aaa6882b9b94e96d23f5c13'
7
- data.tar.gz: ed92250acee85f4e355785dd043f7774a5883550fe82b01b3cd9e10011f93a1fcdd500108b0e1f4e2af562bddd833c03ca601078b3eba8ee2e9990fd5e76305a
6
+ metadata.gz: 99c9ac879424056221f616d7f7299d03dfc9906c6b81c333ad255439780cf56d2dfc0c31a62347a7a163bcdb4075f8d0c914e2deeebb5d78e8ebc34e19cd7abc
7
+ data.tar.gz: 50ef8b5e1061dd1d6b24a7727b5537664bcb22473757274b4cc2b92c89b9ba5ea7516f055571f5c8b72d678f7cef549858631408c86a6984196ba7d1773daaca
data/CHANGELOG CHANGED
@@ -1,3 +1,8 @@
1
+ v2.12.0 (26th December 2023)
2
+ - Fix a sorbet method signature (http://github.com/yob/pdf-reader/pull/512)
3
+ - Reduce allocations when parsing PDFs with hex strings (http://github.com/yob/pdf-reader/pull/528)
4
+ - Fix text extraction of some rare unicode codepoints (http://github.com/yob/pdf-reader/pull/529)
5
+
1
6
  v2.11.0 (26th October 2022)
2
7
  - Various bug fixes
3
8
  - Expanded sorbet type annotations
@@ -1,5 +1,5 @@
1
1
  # coding: ASCII-8BIT
2
- # typed: strict
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -300,13 +300,12 @@ class PDF::Reader
300
300
  # we find a closing >
301
301
  #
302
302
  def prepare_hex_token
303
- finished = :false
304
303
  str = "".dup
305
304
 
306
- until finished == :true
305
+ loop do
307
306
  byte = @io.getbyte
308
307
  if byte.nil?
309
- finished = :true # unbalanced params
308
+ break
310
309
  elsif (48..57).include?(byte) || (65..90).include?(byte) || (97..122).include?(byte)
311
310
  str << byte
312
311
  elsif byte <= 32
@@ -315,7 +314,7 @@ class PDF::Reader
315
314
  @tokens << str if str.size > 0
316
315
  @tokens << ">" if byte != 0x3E # '>'
317
316
  @tokens << byte.chr
318
- finished = :true
317
+ break
319
318
  end
320
319
  end
321
320
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: strict
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -118,8 +118,8 @@ class PDF::Reader
118
118
  result = []
119
119
  while unpacked_string.any? do
120
120
  if unpacked_string.size >= 2 &&
121
- unpacked_string.first.to_i > 0xD800 &&
122
- unpacked_string.first.to_i < 0xDBFF
121
+ unpacked_string.first.to_i >= 0xD800 &&
122
+ unpacked_string.first.to_i <= 0xDBFF
123
123
  # this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
124
124
  # lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
125
125
  # low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: strict
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -119,7 +119,7 @@ class PDF::Reader
119
119
  # => [:A]
120
120
  #
121
121
  def int_to_name(glyph_code)
122
- if @enc_name == "Identity-H" || @enc_name == "Identity-V"
122
+ if @enc_name == :"Identity-H" || @enc_name == :"Identity-V"
123
123
  []
124
124
  elsif differences[glyph_code]
125
125
  [differences[glyph_code]]
@@ -143,7 +143,6 @@ class PDF::Reader
143
143
  CONTROL_CHARS.include?(i) ? [i, UNKNOWN_CHAR] : [i,i]
144
144
  }
145
145
  mapping = Hash[tuples]
146
- mapping[nil] = UNKNOWN_CHAR
147
146
  mapping
148
147
  end
149
148
 
@@ -82,8 +82,8 @@ class PDF::Reader
82
82
  glyph_width_in_glyph_space = glyph_width(code_point)
83
83
 
84
84
  if @subtype == :Type3
85
- x1, y1 = font_matrix_transform(0,0)
86
- x2, y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
85
+ x1, _y1 = font_matrix_transform(0,0)
86
+ x2, _y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
87
87
  (x2 - x1).abs.round(2)
88
88
  else
89
89
  glyph_width_in_glyph_space / 1000.0
@@ -42,7 +42,7 @@ module PDF
42
42
  while bits_left_in_chunk > 0 and @current_pos < @data.size
43
43
  chunk = 0 if chunk < 0
44
44
  codepoint = @data[@current_pos, 1].to_s.unpack("C*")[0].to_i
45
- current_byte = codepoint & (2**@bits_left_in_byte - 1) #clear consumed bits
45
+ current_byte = codepoint & (2**@bits_left_in_byte - 1).to_i #clear consumed bits
46
46
  dif = bits_left_in_chunk - @bits_left_in_byte
47
47
  if dif > 0 then current_byte <<= dif
48
48
  elsif dif < 0 then current_byte >>= dif.abs
@@ -612,7 +612,7 @@ class PDF::Reader
612
612
  if File.respond_to?(:binread)
613
613
  File.binread(input.to_s)
614
614
  else
615
- File.open(input.to_s,"rb") { |f| f.read } || ""
615
+ File.open(input.to_s,"rb") { |f| f.read }
616
616
  end
617
617
  end
618
618
 
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: strict
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -173,9 +173,7 @@ class PDF::Reader
173
173
 
174
174
  # add a missing digit if required, as required by the spec
175
175
  str << "0" unless str.size % 2 == 0
176
- str.chars.each_slice(2).map { |nibbles|
177
- nibbles.join("").hex.chr
178
- }.join.force_encoding("binary")
176
+ [str].pack('H*')
179
177
  end
180
178
  ################################################################################
181
179
  # Reads a PDF String from the buffer and converts it to a Ruby String
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: strict
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
5
  require 'afm'
data/rbi/pdf-reader.rbi CHANGED
@@ -842,7 +842,7 @@ module PDF
842
842
  sig { params(runs: T::Array[PDF::Reader::TextRun]).returns(T::Array[PDF::Reader::TextRun]) }
843
843
  def self.exclude_redundant_runs(runs); end
844
844
 
845
- sig { params(sweep_line_status: T::Array[PDF::Reader::TextRun], event_point: EventPoint).returns(T::Boolean) }
845
+ sig { params(sweep_line_status: T::Array[PDF::Reader::TextRun], event_point: PDF::Reader::EventPoint).returns(T::Boolean) }
846
846
  def self.detect_intersection(sweep_line_status, event_point); end
847
847
  end
848
848
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.11.0
4
+ version: 2.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-26 00:00:00.000000000 Z
11
+ date: 2023-12-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -289,9 +289,9 @@ licenses:
289
289
  - MIT
290
290
  metadata:
291
291
  bug_tracker_uri: https://github.com/yob/pdf-reader/issues
292
- changelog_uri: https://github.com/yob/pdf-reader/blob/v2.11.0/CHANGELOG
293
- documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.11.0
294
- source_code_uri: https://github.com/yob/pdf-reader/tree/v2.11.0
292
+ changelog_uri: https://github.com/yob/pdf-reader/blob/v2.12.0/CHANGELOG
293
+ documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.12.0
294
+ source_code_uri: https://github.com/yob/pdf-reader/tree/v2.12.0
295
295
  post_install_message:
296
296
  rdoc_options:
297
297
  - "--title"
@@ -312,7 +312,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
312
312
  - !ruby/object:Gem::Version
313
313
  version: '0'
314
314
  requirements: []
315
- rubygems_version: 3.2.32
315
+ rubygems_version: 3.4.10
316
316
  signing_key:
317
317
  specification_version: 4
318
318
  summary: A library for accessing the content of PDF files