pdf-reader 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -131,7 +131,9 @@ class PDF::Reader
131
131
  if obj[:ToUnicode]
132
132
  # ToUnicode is optional for Type1 and Type3
133
133
  stream = @ohash.object(obj[:ToUnicode])
134
- @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
134
+ if stream.is_a?(PDF::Reader::Stream)
135
+ @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
136
+ end
135
137
  end
136
138
  end
137
139
 
@@ -331,7 +331,9 @@ class PDF::Reader
331
331
  def decrypt(ref, obj)
332
332
  case obj
333
333
  when PDF::Reader::Stream then
334
- obj.data = sec_handler.decrypt(obj.data, ref)
334
+ # PDF 32000-1:2008 7.5.8.2: "The cross-reference stream shall not be encrypted [...]."
335
+ # Therefore we shouldn't try to decrypt it.
336
+ obj.data = sec_handler.decrypt(obj.data, ref) unless obj.hash[:Type] == :XRef
335
337
  obj
336
338
  when Hash then
337
339
  arr = obj.map { |key,val| [key, decrypt(ref, val)] }.flatten(1)
@@ -22,8 +22,8 @@ class PDF::Reader
22
22
  def detect_orientation
23
23
  llx,lly,urx,ury = @attributes[:MediaBox]
24
24
  rotation = @attributes[:Rotate].to_i
25
- width = urx.to_i - llx.to_i
26
- height = ury.to_i - lly.to_i
25
+ width = (urx.to_i - llx.to_i).abs
26
+ height = (ury.to_i - lly.to_i).abs
27
27
  if width > height
28
28
  (rotation % 180).zero? ? 'landscape' : 'portrait'
29
29
  else
@@ -124,6 +124,34 @@ module PDF
124
124
  }.join(" ")
125
125
  end
126
126
 
127
+ # returns the angle to rotate the page clockwise. Always 0, 90, 180 or 270
128
+ #
129
+ def rotate
130
+ value = attributes[:Rotate].to_i
131
+ case value
132
+ when 0, 90, 180, 270
133
+ value
134
+ else
135
+ 0
136
+ end
137
+ end
138
+
139
+ # returns the "boxes" that define the page object.
140
+ # values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
141
+ #
142
+ def boxes
143
+ mediabox = attributes[:MediaBox]
144
+ cropbox = attributes[:Cropbox] || mediabox
145
+
146
+ {
147
+ MediaBox: objects.deref!(mediabox),
148
+ CropBox: objects.deref!(cropbox),
149
+ BleedBox: objects.deref!(attributes[:BleedBox] || cropbox),
150
+ TrimBox: objects.deref!(attributes[:TrimBox] || cropbox),
151
+ ArtBox: objects.deref!(attributes[:ArtBox] || cropbox)
152
+ }
153
+ end
154
+
127
155
  private
128
156
 
129
157
  def root
@@ -21,18 +21,21 @@ class PDF::Reader
21
21
  @mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
22
22
  @mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
23
23
  @mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
24
- @page_width = mediabox[2] - mediabox[0]
25
- @page_height = mediabox[3] - mediabox[1]
26
- @x_offset = @runs.map(&:x).sort.first
24
+ @page_width = (mediabox[2] - mediabox[0]).abs
25
+ @page_height = (mediabox[3] - mediabox[1]).abs
26
+ @x_offset = @runs.map(&:x).sort.first || 0
27
+ lowest_y = @runs.map(&:y).sort.first || 0
28
+ @y_offset = lowest_y > 0 ? 0 : lowest_y
27
29
  end
28
30
 
29
31
  def to_s
30
32
  return "" if @runs.empty?
33
+ return "" if row_count == 0
31
34
 
32
35
  page = row_count.times.map { |i| " " * col_count }
33
36
  @runs.each do |run|
34
37
  x_pos = ((run.x - @x_offset) / col_multiplier).round
35
- y_pos = row_count - (run.y / row_multiplier).round
38
+ y_pos = row_count - ((run.y - @y_offset) / row_multiplier).round
36
39
  if y_pos <= row_count && y_pos >= 0 && x_pos <= col_count && x_pos >= 0
37
40
  local_string_insert(page[y_pos-1], run.text, x_pos)
38
41
  end
@@ -30,7 +30,7 @@ class PDF::Reader
30
30
  @xobject_stack = [page.xobjects]
31
31
  @cs_stack = [page.color_spaces]
32
32
  @stack = [DEFAULT_GRAPHICS_STATE.dup]
33
- state[:ctm] = identity_matrix
33
+ state[:ctm] = identity_matrix
34
34
  end
35
35
 
36
36
  #####################################################
@@ -322,11 +322,13 @@ class PDF::Reader
322
322
  th = state[:h_scaling]
323
323
  # optimise the common path to reduce Float allocations
324
324
  if th == 1 && tj == 0 && tc == 0 && tw == 0
325
- glyph_width = w0 * fs
326
- tx = glyph_width
325
+ tx = w0 * fs
326
+ elsif tj != 0
327
+ # don't apply spacing to TJ displacement
328
+ tx = (w0 - (tj/1000.0)) * fs * th
327
329
  else
328
- glyph_width = ((w0 - (tj/1000.0)) * fs) * th
329
- tx = glyph_width + ((tc + tw) * th)
330
+ # apply horizontal scaling to spacing values but not font size
331
+ tx = ((w0 * fs) + tc + tw) * th
330
332
  end
331
333
 
332
334
  # TODO: I'm pretty sure that tx shouldn't need to be divided by
@@ -41,13 +41,17 @@ module PDF
41
41
  # starting a new page
42
42
  def page=(page)
43
43
  @state = PageState.new(page)
44
+ @page = page
44
45
  @content = []
45
46
  @characters = []
46
47
  @mediabox = page.objects.deref(page.attributes[:MediaBox])
48
+ device_bl = @state.ctm_transform(@mediabox[0], @mediabox[1])
49
+ device_tr = @state.ctm_transform(@mediabox[2], @mediabox[3])
50
+ @device_mediabox = [ device_bl.first, device_bl.last, device_tr.first, device_tr.last]
47
51
  end
48
52
 
49
53
  def content
50
- PageLayout.new(@characters, @mediabox).to_s
54
+ PageLayout.new(@characters, @device_mediabox).to_s
51
55
  end
52
56
 
53
57
  #####################################################
@@ -101,6 +105,8 @@ module PDF
101
105
  glyphs.each_with_index do |glyph_code, index|
102
106
  # paint the current glyph
103
107
  newx, newy = @state.trm_transform(0,0)
108
+ newx, newy = apply_rotation(newx, newy)
109
+
104
110
  utf8_chars = @state.current_font.to_utf8(glyph_code)
105
111
 
106
112
  # apply to glyph displacment for the current glyph so the next
@@ -115,6 +121,21 @@ module PDF
115
121
  end
116
122
  end
117
123
 
124
+ def apply_rotation(x, y)
125
+ if @page.rotate == 90
126
+ tmp = x
127
+ x = y
128
+ y = tmp * -1
129
+ elsif @page.rotate == 180
130
+ y *= -1
131
+ elsif @page.rotate == 270
132
+ tmp = x
133
+ x = y * -1
134
+ y = tmp * -1
135
+ end
136
+ return x, y
137
+ end
138
+
118
139
  end
119
140
  end
120
141
  end
@@ -37,23 +37,15 @@ class PDF::Reader
37
37
  def glyph_width(code_point)
38
38
  return 0 if code_point.nil? || code_point < 0
39
39
 
40
- m = @metrics.char_metrics_by_code[code_point]
41
- if m.nil?
42
- names = @font.encoding.int_to_name(code_point)
40
+ names = @font.encoding.int_to_name(code_point)
41
+ metrics = names.map { |name|
42
+ @metrics.char_metrics[name.to_s]
43
+ }.compact.first
43
44
 
44
- m = names.map { |name|
45
- @metrics.char_metrics[name.to_s]
46
- }.compact.first
47
- end
48
-
49
- if m
50
- m[:wx]
51
- elsif @font.widths[code_point - 1]
52
- @font.widths[code_point - 1]
53
- elsif control_character?(code_point)
54
- 0
45
+ if metrics
46
+ metrics[:wx]
55
47
  else
56
- 0
48
+ @font.widths[code_point - 1] || 0
57
49
  end
58
50
  end
59
51
 
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.0
4
+ version: 2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-21 00:00:00.000000000 Z
11
+ date: 2021-06-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "<"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '13.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "<"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '13.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -67,7 +67,7 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0.2'
69
69
  - !ruby/object:Gem::Dependency
70
- name: ir_b
70
+ name: pry
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ">="
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 1.0.0
103
+ version: '1.0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 1.0.0
110
+ version: '1.0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: ruby-rc4
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -208,6 +208,7 @@ files:
208
208
  - lib/pdf/reader/afm/Helvetica-BoldOblique.afm
209
209
  - lib/pdf/reader/afm/Helvetica-Oblique.afm
210
210
  - lib/pdf/reader/afm/Helvetica.afm
211
+ - lib/pdf/reader/afm/MustRead.html
211
212
  - lib/pdf/reader/afm/Symbol.afm
212
213
  - lib/pdf/reader/afm/Times-Bold.afm
213
214
  - lib/pdf/reader/afm/Times-BoldItalic.afm
@@ -276,10 +277,10 @@ licenses:
276
277
  - MIT
277
278
  metadata:
278
279
  bug_tracker_uri: https://github.com/yob/pdf-reader/issues
279
- changelog_uri: https://github.com/yob/pdf-reader/blob/v2.4.0/CHANGELOG
280
- documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.4.0
281
- source_code_uri: https://github.com/yob/pdf-reader/tree/v2.4.0
282
- post_install_message:
280
+ changelog_uri: https://github.com/yob/pdf-reader/blob/v2.5.0/CHANGELOG
281
+ documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.5.0
282
+ source_code_uri: https://github.com/yob/pdf-reader/tree/v2.5.0
283
+ post_install_message:
283
284
  rdoc_options:
284
285
  - "--title"
285
286
  - PDF::Reader Documentation
@@ -292,15 +293,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
292
293
  requirements:
293
294
  - - ">="
294
295
  - !ruby/object:Gem::Version
295
- version: 1.9.3
296
+ version: '2.0'
296
297
  required_rubygems_version: !ruby/object:Gem::Requirement
297
298
  requirements:
298
299
  - - ">="
299
300
  - !ruby/object:Gem::Version
300
301
  version: '0'
301
302
  requirements: []
302
- rubygems_version: 3.0.3
303
- signing_key:
303
+ rubygems_version: 3.2.3
304
+ signing_key:
304
305
  specification_version: 4
305
306
  summary: A library for accessing the content of PDF files
306
307
  test_files: []