pdf-reader 2.4.0 → 2.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -131,7 +131,9 @@ class PDF::Reader
131
131
  if obj[:ToUnicode]
132
132
  # ToUnicode is optional for Type1 and Type3
133
133
  stream = @ohash.object(obj[:ToUnicode])
134
- @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
134
+ if stream.is_a?(PDF::Reader::Stream)
135
+ @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
136
+ end
135
137
  end
136
138
  end
137
139
 
@@ -331,7 +331,9 @@ class PDF::Reader
331
331
  def decrypt(ref, obj)
332
332
  case obj
333
333
  when PDF::Reader::Stream then
334
- obj.data = sec_handler.decrypt(obj.data, ref)
334
+ # PDF 32000-1:2008 7.5.8.2: "The cross-reference stream shall not be encrypted [...]."
335
+ # Therefore we shouldn't try to decrypt it.
336
+ obj.data = sec_handler.decrypt(obj.data, ref) unless obj.hash[:Type] == :XRef
335
337
  obj
336
338
  when Hash then
337
339
  arr = obj.map { |key,val| [key, decrypt(ref, val)] }.flatten(1)
@@ -22,8 +22,8 @@ class PDF::Reader
22
22
  def detect_orientation
23
23
  llx,lly,urx,ury = @attributes[:MediaBox]
24
24
  rotation = @attributes[:Rotate].to_i
25
- width = urx.to_i - llx.to_i
26
- height = ury.to_i - lly.to_i
25
+ width = (urx.to_i - llx.to_i).abs
26
+ height = (ury.to_i - lly.to_i).abs
27
27
  if width > height
28
28
  (rotation % 180).zero? ? 'landscape' : 'portrait'
29
29
  else
@@ -124,6 +124,34 @@ module PDF
124
124
  }.join(" ")
125
125
  end
126
126
 
127
+ # returns the angle to rotate the page clockwise. Always 0, 90, 180 or 270
128
+ #
129
+ def rotate
130
+ value = attributes[:Rotate].to_i
131
+ case value
132
+ when 0, 90, 180, 270
133
+ value
134
+ else
135
+ 0
136
+ end
137
+ end
138
+
139
+ # returns the "boxes" that define the page object.
140
+ # values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
141
+ #
142
+ def boxes
143
+ mediabox = attributes[:MediaBox]
144
+ cropbox = attributes[:Cropbox] || mediabox
145
+
146
+ {
147
+ MediaBox: objects.deref!(mediabox),
148
+ CropBox: objects.deref!(cropbox),
149
+ BleedBox: objects.deref!(attributes[:BleedBox] || cropbox),
150
+ TrimBox: objects.deref!(attributes[:TrimBox] || cropbox),
151
+ ArtBox: objects.deref!(attributes[:ArtBox] || cropbox)
152
+ }
153
+ end
154
+
127
155
  private
128
156
 
129
157
  def root
@@ -21,18 +21,21 @@ class PDF::Reader
21
21
  @mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
22
22
  @mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
23
23
  @mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
24
- @page_width = mediabox[2] - mediabox[0]
25
- @page_height = mediabox[3] - mediabox[1]
26
- @x_offset = @runs.map(&:x).sort.first
24
+ @page_width = (mediabox[2] - mediabox[0]).abs
25
+ @page_height = (mediabox[3] - mediabox[1]).abs
26
+ @x_offset = @runs.map(&:x).sort.first || 0
27
+ lowest_y = @runs.map(&:y).sort.first || 0
28
+ @y_offset = lowest_y > 0 ? 0 : lowest_y
27
29
  end
28
30
 
29
31
  def to_s
30
32
  return "" if @runs.empty?
33
+ return "" if row_count == 0
31
34
 
32
35
  page = row_count.times.map { |i| " " * col_count }
33
36
  @runs.each do |run|
34
37
  x_pos = ((run.x - @x_offset) / col_multiplier).round
35
- y_pos = row_count - (run.y / row_multiplier).round
38
+ y_pos = row_count - ((run.y - @y_offset) / row_multiplier).round
36
39
  if y_pos <= row_count && y_pos >= 0 && x_pos <= col_count && x_pos >= 0
37
40
  local_string_insert(page[y_pos-1], run.text, x_pos)
38
41
  end
@@ -30,7 +30,7 @@ class PDF::Reader
30
30
  @xobject_stack = [page.xobjects]
31
31
  @cs_stack = [page.color_spaces]
32
32
  @stack = [DEFAULT_GRAPHICS_STATE.dup]
33
- state[:ctm] = identity_matrix
33
+ state[:ctm] = identity_matrix
34
34
  end
35
35
 
36
36
  #####################################################
@@ -322,11 +322,13 @@ class PDF::Reader
322
322
  th = state[:h_scaling]
323
323
  # optimise the common path to reduce Float allocations
324
324
  if th == 1 && tj == 0 && tc == 0 && tw == 0
325
- glyph_width = w0 * fs
326
- tx = glyph_width
325
+ tx = w0 * fs
326
+ elsif tj != 0
327
+ # don't apply spacing to TJ displacement
328
+ tx = (w0 - (tj/1000.0)) * fs * th
327
329
  else
328
- glyph_width = ((w0 - (tj/1000.0)) * fs) * th
329
- tx = glyph_width + ((tc + tw) * th)
330
+ # apply horizontal scaling to spacing values but not font size
331
+ tx = ((w0 * fs) + tc + tw) * th
330
332
  end
331
333
 
332
334
  # TODO: I'm pretty sure that tx shouldn't need to be divided by
@@ -41,13 +41,17 @@ module PDF
41
41
  # starting a new page
42
42
  def page=(page)
43
43
  @state = PageState.new(page)
44
+ @page = page
44
45
  @content = []
45
46
  @characters = []
46
47
  @mediabox = page.objects.deref(page.attributes[:MediaBox])
48
+ device_bl = @state.ctm_transform(@mediabox[0], @mediabox[1])
49
+ device_tr = @state.ctm_transform(@mediabox[2], @mediabox[3])
50
+ @device_mediabox = [ device_bl.first, device_bl.last, device_tr.first, device_tr.last]
47
51
  end
48
52
 
49
53
  def content
50
- PageLayout.new(@characters, @mediabox).to_s
54
+ PageLayout.new(@characters, @device_mediabox).to_s
51
55
  end
52
56
 
53
57
  #####################################################
@@ -101,6 +105,8 @@ module PDF
101
105
  glyphs.each_with_index do |glyph_code, index|
102
106
  # paint the current glyph
103
107
  newx, newy = @state.trm_transform(0,0)
108
+ newx, newy = apply_rotation(newx, newy)
109
+
104
110
  utf8_chars = @state.current_font.to_utf8(glyph_code)
105
111
 
106
112
  # apply to glyph displacment for the current glyph so the next
@@ -115,6 +121,21 @@ module PDF
115
121
  end
116
122
  end
117
123
 
124
+ def apply_rotation(x, y)
125
+ if @page.rotate == 90
126
+ tmp = x
127
+ x = y
128
+ y = tmp * -1
129
+ elsif @page.rotate == 180
130
+ y *= -1
131
+ elsif @page.rotate == 270
132
+ tmp = x
133
+ x = y * -1
134
+ y = tmp * -1
135
+ end
136
+ return x, y
137
+ end
138
+
118
139
  end
119
140
  end
120
141
  end
@@ -37,23 +37,15 @@ class PDF::Reader
37
37
  def glyph_width(code_point)
38
38
  return 0 if code_point.nil? || code_point < 0
39
39
 
40
- m = @metrics.char_metrics_by_code[code_point]
41
- if m.nil?
42
- names = @font.encoding.int_to_name(code_point)
40
+ names = @font.encoding.int_to_name(code_point)
41
+ metrics = names.map { |name|
42
+ @metrics.char_metrics[name.to_s]
43
+ }.compact.first
43
44
 
44
- m = names.map { |name|
45
- @metrics.char_metrics[name.to_s]
46
- }.compact.first
47
- end
48
-
49
- if m
50
- m[:wx]
51
- elsif @font.widths[code_point - 1]
52
- @font.widths[code_point - 1]
53
- elsif control_character?(code_point)
54
- 0
45
+ if metrics
46
+ metrics[:wx]
55
47
  else
56
- 0
48
+ @font.widths[code_point - 1] || 0
57
49
  end
58
50
  end
59
51
 
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.0
4
+ version: 2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-21 00:00:00.000000000 Z
11
+ date: 2021-06-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "<"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '13.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "<"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '13.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -67,7 +67,7 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0.2'
69
69
  - !ruby/object:Gem::Dependency
70
- name: ir_b
70
+ name: pry
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ">="
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 1.0.0
103
+ version: '1.0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 1.0.0
110
+ version: '1.0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: ruby-rc4
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -208,6 +208,7 @@ files:
208
208
  - lib/pdf/reader/afm/Helvetica-BoldOblique.afm
209
209
  - lib/pdf/reader/afm/Helvetica-Oblique.afm
210
210
  - lib/pdf/reader/afm/Helvetica.afm
211
+ - lib/pdf/reader/afm/MustRead.html
211
212
  - lib/pdf/reader/afm/Symbol.afm
212
213
  - lib/pdf/reader/afm/Times-Bold.afm
213
214
  - lib/pdf/reader/afm/Times-BoldItalic.afm
@@ -276,10 +277,10 @@ licenses:
276
277
  - MIT
277
278
  metadata:
278
279
  bug_tracker_uri: https://github.com/yob/pdf-reader/issues
279
- changelog_uri: https://github.com/yob/pdf-reader/blob/v2.4.0/CHANGELOG
280
- documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.4.0
281
- source_code_uri: https://github.com/yob/pdf-reader/tree/v2.4.0
282
- post_install_message:
280
+ changelog_uri: https://github.com/yob/pdf-reader/blob/v2.5.0/CHANGELOG
281
+ documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.5.0
282
+ source_code_uri: https://github.com/yob/pdf-reader/tree/v2.5.0
283
+ post_install_message:
283
284
  rdoc_options:
284
285
  - "--title"
285
286
  - PDF::Reader Documentation
@@ -292,15 +293,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
292
293
  requirements:
293
294
  - - ">="
294
295
  - !ruby/object:Gem::Version
295
- version: 1.9.3
296
+ version: '2.0'
296
297
  required_rubygems_version: !ruby/object:Gem::Requirement
297
298
  requirements:
298
299
  - - ">="
299
300
  - !ruby/object:Gem::Version
300
301
  version: '0'
301
302
  requirements: []
302
- rubygems_version: 3.0.3
303
- signing_key:
303
+ rubygems_version: 3.2.3
304
+ signing_key:
304
305
  specification_version: 4
305
306
  summary: A library for accessing the content of PDF files
306
307
  test_files: []