pdf-reader 2.4.1 → 2.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2b38615953615bfbca1a80ab344f26166377d8c08d2ba2e05badf43c10682415
4
- data.tar.gz: 658b5d05a14300ad056ee31c10ea998533ccb1b91600e8bc9097070605d003ea
3
+ metadata.gz: 652d05cf6a22fad5ecb4b92de1e27ba60cafc6525c5ca524e24c7f9796fe1b83
4
+ data.tar.gz: 2c7448e97890a9fcbd10ec2cd5bafb9025db2fb75dabaf71a4074c542b1065a1
5
5
  SHA512:
6
- metadata.gz: 210b0bee8c4ac009808555c8ba945f3b17b85af22126ac1440eb9b49d91f542f1974b0984efb22726985f2cf8e03440511ebc4664ac5c4d91a6bddea9a43687e
7
- data.tar.gz: 8fb60cb59dc4430179a4b9ba83d30ae6dc23aa13dbef5e8febe1569311ddf7e531783da7e7dd0a6542f0087748e97898af56d540e7c088832f213b48059aa7d3
6
+ metadata.gz: ac82452924cf46af98ee15f2a20642b1d06d5b9c22104fe171b5b4612665e482f341e12473805016ccb9d921fc15324ba51675170b369adeace8b278cd1279fb
7
+ data.tar.gz: b1dc1c4422b0e6bf01092cf724630ba7424fdef1fdaf34f33aaa3a31397caf6ef5a73185a98e6e2828a9e082d87cbca311565397cb064cac20d86e72be27626f
data/CHANGELOG CHANGED
@@ -1,3 +1,14 @@
1
+ v2.5.0 (6th June 2021)
2
+ - bump minimum ruby version to 2.0
3
+ - Correctly handle trascoding to UTF-8 from some fonts that use a difference table [#344](https://github.com/yob/pdf-reader/pull/344/)
4
+ - Fix some character spacing issues with the TJ operator [#343](https://github.com/yob/pdf-reader/pull/343)
5
+ - Fix crash with some encrypted PDFs [#348](https://github.com/yob/pdf-reader/pull/348/)
6
+ - Fix positions of text on some PDFs with pages rotated 90° [#350](https://github.com/yob/pdf-reader/pull/350/)
7
+
8
+ v2.4.2 (28th January 2021)
9
+ - relax ASCII85 dependency to allow 1.x
10
+ - improved support for decompressing objects with slightly malformed zlib data
11
+
1
12
  v.2.4.1 (24th September 2020)
2
13
  - Re-vendor font metrics from Adobe to clarify their license
3
14
 
@@ -9,6 +9,7 @@ class PDF::Reader
9
9
  # implementation of the Flate (zlib) stream filter
10
10
  class Flate
11
11
  ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 # Zlib::MAX_WBITS + 32
12
+ ZLIB_RAW_DEFLATE = -15 # Zlib::MAX_WBITS * -1
12
13
 
13
14
  def initialize(options = {})
14
15
  @options = options
@@ -17,24 +18,34 @@ class PDF::Reader
17
18
  ################################################################################
18
19
  # Decode the specified data with the Zlib compression algorithm
19
20
  def filter(data)
20
- deflated = nil
21
+ deflated = zlib_inflate(data) || zlib_inflate(data[0, data.bytesize-1])
22
+
23
+ if deflated.nil?
24
+ raise MalformedPDFError,
25
+ "Error while inflating a compressed stream (no suitable inflation algorithm found)"
26
+ end
27
+ Depredict.new(@options).filter(deflated)
28
+ end
29
+
30
+ private
31
+
32
+ def zlib_inflate(data)
21
33
  begin
22
- deflated = Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
23
- rescue Zlib::DataError => e
34
+ return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
35
+ rescue Zlib::DataError
24
36
  # by default, Ruby's Zlib assumes the data it's inflating
25
37
  # is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
26
- # fails, then use a lightly-documented 'feature' to attempt to inflate
27
- # the data as a raw RFC1951 stream.
28
- #
29
- # See
30
- # - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
31
- deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
38
+ # fails, swallow the exception and attempt to inflate the data as a raw
39
+ # RFC1951 stream.
32
40
  end
33
- Depredict.new(@options).filter(deflated)
34
- rescue Exception => e
35
- # Oops, there was a problem inflating the stream
36
- raise MalformedPDFError,
37
- "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})"
41
+
42
+ begin
43
+ return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
44
+ rescue StandardError
45
+ # swallow this one too, so we can try some other fallback options
46
+ end
47
+
48
+ nil
38
49
  end
39
50
  end
40
51
  end
@@ -331,7 +331,9 @@ class PDF::Reader
331
331
  def decrypt(ref, obj)
332
332
  case obj
333
333
  when PDF::Reader::Stream then
334
- obj.data = sec_handler.decrypt(obj.data, ref)
334
+ # PDF 32000-1:2008 7.5.8.2: "The cross-reference stream shall not be encrypted [...]."
335
+ # Therefore we shouldn't try to decrypt it.
336
+ obj.data = sec_handler.decrypt(obj.data, ref) unless obj.hash[:Type] == :XRef
335
337
  obj
336
338
  when Hash then
337
339
  arr = obj.map { |key,val| [key, decrypt(ref, val)] }.flatten(1)
@@ -30,6 +30,7 @@ class PDF::Reader
30
30
 
31
31
  def to_s
32
32
  return "" if @runs.empty?
33
+ return "" if row_count == 0
33
34
 
34
35
  page = row_count.times.map { |i| " " * col_count }
35
36
  @runs.each do |run|
@@ -30,15 +30,7 @@ class PDF::Reader
30
30
  @xobject_stack = [page.xobjects]
31
31
  @cs_stack = [page.color_spaces]
32
32
  @stack = [DEFAULT_GRAPHICS_STATE.dup]
33
- if page.rotate == 0
34
- state[:ctm] = identity_matrix
35
- else
36
- rotate_cos = Math.cos(page.rotate * (Math::PI/180.0)).round(2)
37
- rotate_sin = Math.sin(page.rotate * (Math::PI/180.0)).round(2)
38
- state[:ctm] = TransformationMatrix.new(rotate_cos, rotate_sin,
39
- rotate_sin * -1, rotate_cos,
40
- 0, 0)
41
- end
33
+ state[:ctm] = identity_matrix
42
34
  end
43
35
 
44
36
  #####################################################
@@ -330,11 +322,13 @@ class PDF::Reader
330
322
  th = state[:h_scaling]
331
323
  # optimise the common path to reduce Float allocations
332
324
  if th == 1 && tj == 0 && tc == 0 && tw == 0
333
- glyph_width = w0 * fs
334
- tx = glyph_width
325
+ tx = w0 * fs
326
+ elsif tj != 0
327
+ # don't apply spacing to TJ displacement
328
+ tx = (w0 - (tj/1000.0)) * fs * th
335
329
  else
336
- glyph_width = ((w0 - (tj/1000.0)) * fs) * th
337
- tx = glyph_width + ((tc + tw) * th)
330
+ # apply horizontal scaling to spacing values but not font size
331
+ tx = ((w0 * fs) + tc + tw) * th
338
332
  end
339
333
 
340
334
  # TODO: I'm pretty sure that tx shouldn't need to be divided by
@@ -41,6 +41,7 @@ module PDF
41
41
  # starting a new page
42
42
  def page=(page)
43
43
  @state = PageState.new(page)
44
+ @page = page
44
45
  @content = []
45
46
  @characters = []
46
47
  @mediabox = page.objects.deref(page.attributes[:MediaBox])
@@ -104,6 +105,8 @@ module PDF
104
105
  glyphs.each_with_index do |glyph_code, index|
105
106
  # paint the current glyph
106
107
  newx, newy = @state.trm_transform(0,0)
108
+ newx, newy = apply_rotation(newx, newy)
109
+
107
110
  utf8_chars = @state.current_font.to_utf8(glyph_code)
108
111
 
109
112
  # apply to glyph displacment for the current glyph so the next
@@ -118,6 +121,21 @@ module PDF
118
121
  end
119
122
  end
120
123
 
124
+ def apply_rotation(x, y)
125
+ if @page.rotate == 90
126
+ tmp = x
127
+ x = y
128
+ y = tmp * -1
129
+ elsif @page.rotate == 180
130
+ y *= -1
131
+ elsif @page.rotate == 270
132
+ tmp = x
133
+ x = y * -1
134
+ y = tmp * -1
135
+ end
136
+ return x, y
137
+ end
138
+
121
139
  end
122
140
  end
123
141
  end
@@ -37,23 +37,15 @@ class PDF::Reader
37
37
  def glyph_width(code_point)
38
38
  return 0 if code_point.nil? || code_point < 0
39
39
 
40
- m = @metrics.char_metrics_by_code[code_point]
41
- if m.nil?
42
- names = @font.encoding.int_to_name(code_point)
40
+ names = @font.encoding.int_to_name(code_point)
41
+ metrics = names.map { |name|
42
+ @metrics.char_metrics[name.to_s]
43
+ }.compact.first
43
44
 
44
- m = names.map { |name|
45
- @metrics.char_metrics[name.to_s]
46
- }.compact.first
47
- end
48
-
49
- if m
50
- m[:wx]
51
- elsif @font.widths[code_point - 1]
52
- @font.widths[code_point - 1]
53
- elsif control_character?(code_point)
54
- 0
45
+ if metrics
46
+ metrics[:wx]
55
47
  else
56
- 0
48
+ @font.widths[code_point - 1] || 0
57
49
  end
58
50
  end
59
51
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.1
4
+ version: 2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-24 00:00:00.000000000 Z
11
+ date: 2021-06-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 1.0.0
103
+ version: '1.0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 1.0.0
110
+ version: '1.0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: ruby-rc4
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -277,10 +277,10 @@ licenses:
277
277
  - MIT
278
278
  metadata:
279
279
  bug_tracker_uri: https://github.com/yob/pdf-reader/issues
280
- changelog_uri: https://github.com/yob/pdf-reader/blob/v2.4.1/CHANGELOG
281
- documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.4.1
282
- source_code_uri: https://github.com/yob/pdf-reader/tree/v2.4.1
283
- post_install_message:
280
+ changelog_uri: https://github.com/yob/pdf-reader/blob/v2.5.0/CHANGELOG
281
+ documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.5.0
282
+ source_code_uri: https://github.com/yob/pdf-reader/tree/v2.5.0
283
+ post_install_message:
284
284
  rdoc_options:
285
285
  - "--title"
286
286
  - PDF::Reader Documentation
@@ -293,15 +293,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
293
293
  requirements:
294
294
  - - ">="
295
295
  - !ruby/object:Gem::Version
296
- version: 1.9.3
296
+ version: '2.0'
297
297
  required_rubygems_version: !ruby/object:Gem::Requirement
298
298
  requirements:
299
299
  - - ">="
300
300
  - !ruby/object:Gem::Version
301
301
  version: '0'
302
302
  requirements: []
303
- rubygems_version: 3.0.3
304
- signing_key:
303
+ rubygems_version: 3.2.3
304
+ signing_key:
305
305
  specification_version: 4
306
306
  summary: A library for accessing the content of PDF files
307
307
  test_files: []