pdf-reader 2.4.1 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2b38615953615bfbca1a80ab344f26166377d8c08d2ba2e05badf43c10682415
4
- data.tar.gz: 658b5d05a14300ad056ee31c10ea998533ccb1b91600e8bc9097070605d003ea
3
+ metadata.gz: 652d05cf6a22fad5ecb4b92de1e27ba60cafc6525c5ca524e24c7f9796fe1b83
4
+ data.tar.gz: 2c7448e97890a9fcbd10ec2cd5bafb9025db2fb75dabaf71a4074c542b1065a1
5
5
  SHA512:
6
- metadata.gz: 210b0bee8c4ac009808555c8ba945f3b17b85af22126ac1440eb9b49d91f542f1974b0984efb22726985f2cf8e03440511ebc4664ac5c4d91a6bddea9a43687e
7
- data.tar.gz: 8fb60cb59dc4430179a4b9ba83d30ae6dc23aa13dbef5e8febe1569311ddf7e531783da7e7dd0a6542f0087748e97898af56d540e7c088832f213b48059aa7d3
6
+ metadata.gz: ac82452924cf46af98ee15f2a20642b1d06d5b9c22104fe171b5b4612665e482f341e12473805016ccb9d921fc15324ba51675170b369adeace8b278cd1279fb
7
+ data.tar.gz: b1dc1c4422b0e6bf01092cf724630ba7424fdef1fdaf34f33aaa3a31397caf6ef5a73185a98e6e2828a9e082d87cbca311565397cb064cac20d86e72be27626f
data/CHANGELOG CHANGED
@@ -1,3 +1,14 @@
1
+ v2.5.0 (6th June 2021)
2
+ - bump minimum ruby version to 2.0
3
+ - Correctly handle trascoding to UTF-8 from some fonts that use a difference table [#344](https://github.com/yob/pdf-reader/pull/344/)
4
+ - Fix some character spacing issues with the TJ operator [#343](https://github.com/yob/pdf-reader/pull/343)
5
+ - Fix crash with some encrypted PDFs [#348](https://github.com/yob/pdf-reader/pull/348/)
6
+ - Fix positions of text on some PDFs with pages rotated 90° [#350](https://github.com/yob/pdf-reader/pull/350/)
7
+
8
+ v2.4.2 (28th January 2021)
9
+ - relax ASCII85 dependency to allow 1.x
10
+ - improved support for decompressing objects with slightly malformed zlib data
11
+
1
12
  v.2.4.1 (24th September 2020)
2
13
  - Re-vendor font metrics from Adobe to clarify their license
3
14
 
@@ -9,6 +9,7 @@ class PDF::Reader
9
9
  # implementation of the Flate (zlib) stream filter
10
10
  class Flate
11
11
  ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 # Zlib::MAX_WBITS + 32
12
+ ZLIB_RAW_DEFLATE = -15 # Zlib::MAX_WBITS * -1
12
13
 
13
14
  def initialize(options = {})
14
15
  @options = options
@@ -17,24 +18,34 @@ class PDF::Reader
17
18
  ################################################################################
18
19
  # Decode the specified data with the Zlib compression algorithm
19
20
  def filter(data)
20
- deflated = nil
21
+ deflated = zlib_inflate(data) || zlib_inflate(data[0, data.bytesize-1])
22
+
23
+ if deflated.nil?
24
+ raise MalformedPDFError,
25
+ "Error while inflating a compressed stream (no suitable inflation algorithm found)"
26
+ end
27
+ Depredict.new(@options).filter(deflated)
28
+ end
29
+
30
+ private
31
+
32
+ def zlib_inflate(data)
21
33
  begin
22
- deflated = Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
23
- rescue Zlib::DataError => e
34
+ return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
35
+ rescue Zlib::DataError
24
36
  # by default, Ruby's Zlib assumes the data it's inflating
25
37
  # is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
26
- # fails, then use a lightly-documented 'feature' to attempt to inflate
27
- # the data as a raw RFC1951 stream.
28
- #
29
- # See
30
- # - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
31
- deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
38
+ # fails, swallow the exception and attempt to inflate the data as a raw
39
+ # RFC1951 stream.
32
40
  end
33
- Depredict.new(@options).filter(deflated)
34
- rescue Exception => e
35
- # Oops, there was a problem inflating the stream
36
- raise MalformedPDFError,
37
- "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})"
41
+
42
+ begin
43
+ return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
44
+ rescue StandardError
45
+ # swallow this one too, so we can try some other fallback options
46
+ end
47
+
48
+ nil
38
49
  end
39
50
  end
40
51
  end
@@ -331,7 +331,9 @@ class PDF::Reader
331
331
  def decrypt(ref, obj)
332
332
  case obj
333
333
  when PDF::Reader::Stream then
334
- obj.data = sec_handler.decrypt(obj.data, ref)
334
+ # PDF 32000-1:2008 7.5.8.2: "The cross-reference stream shall not be encrypted [...]."
335
+ # Therefore we shouldn't try to decrypt it.
336
+ obj.data = sec_handler.decrypt(obj.data, ref) unless obj.hash[:Type] == :XRef
335
337
  obj
336
338
  when Hash then
337
339
  arr = obj.map { |key,val| [key, decrypt(ref, val)] }.flatten(1)
@@ -30,6 +30,7 @@ class PDF::Reader
30
30
 
31
31
  def to_s
32
32
  return "" if @runs.empty?
33
+ return "" if row_count == 0
33
34
 
34
35
  page = row_count.times.map { |i| " " * col_count }
35
36
  @runs.each do |run|
@@ -30,15 +30,7 @@ class PDF::Reader
30
30
  @xobject_stack = [page.xobjects]
31
31
  @cs_stack = [page.color_spaces]
32
32
  @stack = [DEFAULT_GRAPHICS_STATE.dup]
33
- if page.rotate == 0
34
- state[:ctm] = identity_matrix
35
- else
36
- rotate_cos = Math.cos(page.rotate * (Math::PI/180.0)).round(2)
37
- rotate_sin = Math.sin(page.rotate * (Math::PI/180.0)).round(2)
38
- state[:ctm] = TransformationMatrix.new(rotate_cos, rotate_sin,
39
- rotate_sin * -1, rotate_cos,
40
- 0, 0)
41
- end
33
+ state[:ctm] = identity_matrix
42
34
  end
43
35
 
44
36
  #####################################################
@@ -330,11 +322,13 @@ class PDF::Reader
330
322
  th = state[:h_scaling]
331
323
  # optimise the common path to reduce Float allocations
332
324
  if th == 1 && tj == 0 && tc == 0 && tw == 0
333
- glyph_width = w0 * fs
334
- tx = glyph_width
325
+ tx = w0 * fs
326
+ elsif tj != 0
327
+ # don't apply spacing to TJ displacement
328
+ tx = (w0 - (tj/1000.0)) * fs * th
335
329
  else
336
- glyph_width = ((w0 - (tj/1000.0)) * fs) * th
337
- tx = glyph_width + ((tc + tw) * th)
330
+ # apply horizontal scaling to spacing values but not font size
331
+ tx = ((w0 * fs) + tc + tw) * th
338
332
  end
339
333
 
340
334
  # TODO: I'm pretty sure that tx shouldn't need to be divided by
@@ -41,6 +41,7 @@ module PDF
41
41
  # starting a new page
42
42
  def page=(page)
43
43
  @state = PageState.new(page)
44
+ @page = page
44
45
  @content = []
45
46
  @characters = []
46
47
  @mediabox = page.objects.deref(page.attributes[:MediaBox])
@@ -104,6 +105,8 @@ module PDF
104
105
  glyphs.each_with_index do |glyph_code, index|
105
106
  # paint the current glyph
106
107
  newx, newy = @state.trm_transform(0,0)
108
+ newx, newy = apply_rotation(newx, newy)
109
+
107
110
  utf8_chars = @state.current_font.to_utf8(glyph_code)
108
111
 
109
112
  # apply to glyph displacment for the current glyph so the next
@@ -118,6 +121,21 @@ module PDF
118
121
  end
119
122
  end
120
123
 
124
+ def apply_rotation(x, y)
125
+ if @page.rotate == 90
126
+ tmp = x
127
+ x = y
128
+ y = tmp * -1
129
+ elsif @page.rotate == 180
130
+ y *= -1
131
+ elsif @page.rotate == 270
132
+ tmp = x
133
+ x = y * -1
134
+ y = tmp * -1
135
+ end
136
+ return x, y
137
+ end
138
+
121
139
  end
122
140
  end
123
141
  end
@@ -37,23 +37,15 @@ class PDF::Reader
37
37
  def glyph_width(code_point)
38
38
  return 0 if code_point.nil? || code_point < 0
39
39
 
40
- m = @metrics.char_metrics_by_code[code_point]
41
- if m.nil?
42
- names = @font.encoding.int_to_name(code_point)
40
+ names = @font.encoding.int_to_name(code_point)
41
+ metrics = names.map { |name|
42
+ @metrics.char_metrics[name.to_s]
43
+ }.compact.first
43
44
 
44
- m = names.map { |name|
45
- @metrics.char_metrics[name.to_s]
46
- }.compact.first
47
- end
48
-
49
- if m
50
- m[:wx]
51
- elsif @font.widths[code_point - 1]
52
- @font.widths[code_point - 1]
53
- elsif control_character?(code_point)
54
- 0
45
+ if metrics
46
+ metrics[:wx]
55
47
  else
56
- 0
48
+ @font.widths[code_point - 1] || 0
57
49
  end
58
50
  end
59
51
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.1
4
+ version: 2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-24 00:00:00.000000000 Z
11
+ date: 2021-06-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 1.0.0
103
+ version: '1.0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 1.0.0
110
+ version: '1.0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: ruby-rc4
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -277,10 +277,10 @@ licenses:
277
277
  - MIT
278
278
  metadata:
279
279
  bug_tracker_uri: https://github.com/yob/pdf-reader/issues
280
- changelog_uri: https://github.com/yob/pdf-reader/blob/v2.4.1/CHANGELOG
281
- documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.4.1
282
- source_code_uri: https://github.com/yob/pdf-reader/tree/v2.4.1
283
- post_install_message:
280
+ changelog_uri: https://github.com/yob/pdf-reader/blob/v2.5.0/CHANGELOG
281
+ documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.5.0
282
+ source_code_uri: https://github.com/yob/pdf-reader/tree/v2.5.0
283
+ post_install_message:
284
284
  rdoc_options:
285
285
  - "--title"
286
286
  - PDF::Reader Documentation
@@ -293,15 +293,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
293
293
  requirements:
294
294
  - - ">="
295
295
  - !ruby/object:Gem::Version
296
- version: 1.9.3
296
+ version: '2.0'
297
297
  required_rubygems_version: !ruby/object:Gem::Requirement
298
298
  requirements:
299
299
  - - ">="
300
300
  - !ruby/object:Gem::Version
301
301
  version: '0'
302
302
  requirements: []
303
- rubygems_version: 3.0.3
304
- signing_key:
303
+ rubygems_version: 3.2.3
304
+ signing_key:
305
305
  specification_version: 4
306
306
  summary: A library for accessing the content of PDF files
307
307
  test_files: []