pdf-reader 2.4.2 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6772a9ffb17b04eaa3c77f2a7e9838a432ecd78506a75cb263f0056d1ace4ec8
4
- data.tar.gz: 115a94c5b0c8a5367933c438511ec05e2110aaca5589c35a00e6ca76886fa85c
3
+ metadata.gz: 652d05cf6a22fad5ecb4b92de1e27ba60cafc6525c5ca524e24c7f9796fe1b83
4
+ data.tar.gz: 2c7448e97890a9fcbd10ec2cd5bafb9025db2fb75dabaf71a4074c542b1065a1
5
5
  SHA512:
6
- metadata.gz: c0bf877470670dd5c0d0cc061fcf1d9ad1f3ec687e66981806c023b9f270719c9045eeebf8c9ffecf07e45102a383438b1f52edb9bb215fea45d51e29aec8f62
7
- data.tar.gz: bcf0f0bd8baab728d87e5031fd27eaf5386abfbd6111f06e1b7da3232400aed8f93a65675197451329aab6c503f19a5f378af80bc0659018f7dc246da3cb42ea
6
+ metadata.gz: ac82452924cf46af98ee15f2a20642b1d06d5b9c22104fe171b5b4612665e482f341e12473805016ccb9d921fc15324ba51675170b369adeace8b278cd1279fb
7
+ data.tar.gz: b1dc1c4422b0e6bf01092cf724630ba7424fdef1fdaf34f33aaa3a31397caf6ef5a73185a98e6e2828a9e082d87cbca311565397cb064cac20d86e72be27626f
data/CHANGELOG CHANGED
@@ -1,3 +1,10 @@
1
+ v2.5.0 (6th June 2021)
2
+ - bump minimum ruby version to 2.0
3
+ - Correctly handle trascoding to UTF-8 from some fonts that use a difference table [#344](https://github.com/yob/pdf-reader/pull/344/)
4
+ - Fix some character spacing issues with the TJ operator [#343](https://github.com/yob/pdf-reader/pull/343)
5
+ - Fix crash with some encrypted PDFs [#348](https://github.com/yob/pdf-reader/pull/348/)
6
+ - Fix positions of text on some PDFs with pages rotated 90° [#350](https://github.com/yob/pdf-reader/pull/350/)
7
+
1
8
  v2.4.2 (28th January 2021)
2
9
  - relax ASCII85 dependency to allow 1.x
3
10
  - improved support for decompressing objects with slightly malformed zlib data
@@ -32,7 +32,7 @@ class PDF::Reader
32
32
  def zlib_inflate(data)
33
33
  begin
34
34
  return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
35
- rescue Zlib::DataError => e
35
+ rescue Zlib::DataError
36
36
  # by default, Ruby's Zlib assumes the data it's inflating
37
37
  # is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
38
38
  # fails, swallow the exception and attempt to inflate the data as a raw
@@ -41,7 +41,7 @@ class PDF::Reader
41
41
 
42
42
  begin
43
43
  return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
44
- rescue StandardError => e
44
+ rescue StandardError
45
45
  # swallow this one too, so we can try some other fallback options
46
46
  end
47
47
 
@@ -331,7 +331,9 @@ class PDF::Reader
331
331
  def decrypt(ref, obj)
332
332
  case obj
333
333
  when PDF::Reader::Stream then
334
- obj.data = sec_handler.decrypt(obj.data, ref)
334
+ # PDF 32000-1:2008 7.5.8.2: "The cross-reference stream shall not be encrypted [...]."
335
+ # Therefore we shouldn't try to decrypt it.
336
+ obj.data = sec_handler.decrypt(obj.data, ref) unless obj.hash[:Type] == :XRef
335
337
  obj
336
338
  when Hash then
337
339
  arr = obj.map { |key,val| [key, decrypt(ref, val)] }.flatten(1)
@@ -30,6 +30,7 @@ class PDF::Reader
30
30
 
31
31
  def to_s
32
32
  return "" if @runs.empty?
33
+ return "" if row_count == 0
33
34
 
34
35
  page = row_count.times.map { |i| " " * col_count }
35
36
  @runs.each do |run|
@@ -30,15 +30,7 @@ class PDF::Reader
30
30
  @xobject_stack = [page.xobjects]
31
31
  @cs_stack = [page.color_spaces]
32
32
  @stack = [DEFAULT_GRAPHICS_STATE.dup]
33
- if page.rotate == 0
34
- state[:ctm] = identity_matrix
35
- else
36
- rotate_cos = Math.cos(page.rotate * (Math::PI/180.0)).round(2)
37
- rotate_sin = Math.sin(page.rotate * (Math::PI/180.0)).round(2)
38
- state[:ctm] = TransformationMatrix.new(rotate_cos, rotate_sin,
39
- rotate_sin * -1, rotate_cos,
40
- 0, 0)
41
- end
33
+ state[:ctm] = identity_matrix
42
34
  end
43
35
 
44
36
  #####################################################
@@ -330,11 +322,13 @@ class PDF::Reader
330
322
  th = state[:h_scaling]
331
323
  # optimise the common path to reduce Float allocations
332
324
  if th == 1 && tj == 0 && tc == 0 && tw == 0
333
- glyph_width = w0 * fs
334
- tx = glyph_width
325
+ tx = w0 * fs
326
+ elsif tj != 0
327
+ # don't apply spacing to TJ displacement
328
+ tx = (w0 - (tj/1000.0)) * fs * th
335
329
  else
336
- glyph_width = ((w0 - (tj/1000.0)) * fs) * th
337
- tx = glyph_width + ((tc + tw) * th)
330
+ # apply horizontal scaling to spacing values but not font size
331
+ tx = ((w0 * fs) + tc + tw) * th
338
332
  end
339
333
 
340
334
  # TODO: I'm pretty sure that tx shouldn't need to be divided by
@@ -41,6 +41,7 @@ module PDF
41
41
  # starting a new page
42
42
  def page=(page)
43
43
  @state = PageState.new(page)
44
+ @page = page
44
45
  @content = []
45
46
  @characters = []
46
47
  @mediabox = page.objects.deref(page.attributes[:MediaBox])
@@ -104,6 +105,8 @@ module PDF
104
105
  glyphs.each_with_index do |glyph_code, index|
105
106
  # paint the current glyph
106
107
  newx, newy = @state.trm_transform(0,0)
108
+ newx, newy = apply_rotation(newx, newy)
109
+
107
110
  utf8_chars = @state.current_font.to_utf8(glyph_code)
108
111
 
109
112
  # apply to glyph displacment for the current glyph so the next
@@ -118,6 +121,21 @@ module PDF
118
121
  end
119
122
  end
120
123
 
124
+ def apply_rotation(x, y)
125
+ if @page.rotate == 90
126
+ tmp = x
127
+ x = y
128
+ y = tmp * -1
129
+ elsif @page.rotate == 180
130
+ y *= -1
131
+ elsif @page.rotate == 270
132
+ tmp = x
133
+ x = y * -1
134
+ y = tmp * -1
135
+ end
136
+ return x, y
137
+ end
138
+
121
139
  end
122
140
  end
123
141
  end
@@ -37,23 +37,15 @@ class PDF::Reader
37
37
  def glyph_width(code_point)
38
38
  return 0 if code_point.nil? || code_point < 0
39
39
 
40
- m = @metrics.char_metrics_by_code[code_point]
41
- if m.nil?
42
- names = @font.encoding.int_to_name(code_point)
40
+ names = @font.encoding.int_to_name(code_point)
41
+ metrics = names.map { |name|
42
+ @metrics.char_metrics[name.to_s]
43
+ }.compact.first
43
44
 
44
- m = names.map { |name|
45
- @metrics.char_metrics[name.to_s]
46
- }.compact.first
47
- end
48
-
49
- if m
50
- m[:wx]
51
- elsif @font.widths[code_point - 1]
52
- @font.widths[code_point - 1]
53
- elsif control_character?(code_point)
54
- 0
45
+ if metrics
46
+ metrics[:wx]
55
47
  else
56
- 0
48
+ @font.widths[code_point - 1] || 0
57
49
  end
58
50
  end
59
51
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.2
4
+ version: 2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-28 00:00:00.000000000 Z
11
+ date: 2021-06-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -277,9 +277,9 @@ licenses:
277
277
  - MIT
278
278
  metadata:
279
279
  bug_tracker_uri: https://github.com/yob/pdf-reader/issues
280
- changelog_uri: https://github.com/yob/pdf-reader/blob/v2.4.2/CHANGELOG
281
- documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.4.2
282
- source_code_uri: https://github.com/yob/pdf-reader/tree/v2.4.2
280
+ changelog_uri: https://github.com/yob/pdf-reader/blob/v2.5.0/CHANGELOG
281
+ documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.5.0
282
+ source_code_uri: https://github.com/yob/pdf-reader/tree/v2.5.0
283
283
  post_install_message:
284
284
  rdoc_options:
285
285
  - "--title"
@@ -293,7 +293,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
293
293
  requirements:
294
294
  - - ">="
295
295
  - !ruby/object:Gem::Version
296
- version: 1.9.3
296
+ version: '2.0'
297
297
  required_rubygems_version: !ruby/object:Gem::Requirement
298
298
  requirements:
299
299
  - - ">="