pdf-reader 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,8 @@
1
+ v1.1.0 (25th March 2012)
2
+ - new PageState class for handling common state tracking in page receivers
3
+ - see PageTextReceiver for example usage
4
+ - various bugfixes to support reading more PDF dialects
5
+
1
6
  v1.0.0 (16th January 2012)
2
7
  - support a new encryption variation
3
8
  - bugfix in PageTextRender (thanks Paul Gallagher)
@@ -354,6 +354,7 @@ require 'pdf/reader/register_receiver'
354
354
  require 'pdf/reader/standard_security_handler'
355
355
  require 'pdf/reader/stream'
356
356
  require 'pdf/reader/text_receiver'
357
+ require 'pdf/reader/page_state'
357
358
  require 'pdf/reader/page_text_receiver'
358
359
  require 'pdf/reader/token'
359
360
  require 'pdf/reader/xref'
@@ -171,7 +171,11 @@ class PDF::Reader
171
171
  when "<" then :hex_string
172
172
  when "stream" then :stream
173
173
  when "ID"
174
- in_content_stream? ? :inline : :regular
174
+ if in_content_stream? && @tokens[-2] != "/"
175
+ :inline
176
+ else
177
+ :regular
178
+ end
175
179
  else
176
180
  :regular
177
181
  end
@@ -314,7 +318,7 @@ class PDF::Reader
314
318
  @tokens << chr
315
319
  tok = ""
316
320
  break
317
- when "\x28", "\x5B", "\x7B", "\x2F"
321
+ when "\x28", "\x5B", "\x7B"
318
322
  # opening delimiter, start of new token
319
323
  @tokens << tok if tok.size > 0
320
324
  @tokens << chr
@@ -326,6 +330,14 @@ class PDF::Reader
326
330
  @tokens << chr
327
331
  tok = ""
328
332
  break
333
+ when "\x2F"
334
+ # PDF name, start of new token
335
+ @tokens << tok if tok.size > 0
336
+ @tokens << chr
337
+ next_char = peek_char
338
+ @tokens << "" if chr == "/" && [nil, " ", "\n"].include?(next_char)
339
+ tok = ""
340
+ break
329
341
  else
330
342
  tok << chr
331
343
  end
@@ -41,16 +41,12 @@ class PDF::Reader
41
41
  enc = nil
42
42
  end
43
43
 
44
- @to_unicode_required = unicode_required?(enc)
44
+ @enc_name = enc
45
45
  @unpack = get_unpack(enc)
46
46
  @map_file = get_mapping_file(enc)
47
47
  load_mapping(@map_file) if @map_file
48
48
  end
49
49
 
50
- def to_unicode_required?
51
- @to_unicode_required
52
- end
53
-
54
50
  # set the differences table for this encoding. should be an array in the following format:
55
51
  #
56
52
  # [25, :A, 26, :B]
@@ -91,13 +87,40 @@ class PDF::Reader
91
87
  # * pack the final array of Unicode codepoints into a utf-8 string
92
88
  # * mark the string as utf-8 if we're running on a M17N aware VM
93
89
  #
94
- def to_utf8(str, tounicode = nil)
90
+ def to_utf8(str)
91
+ if utf8_conversion_impossible?
92
+ little_boxes(str.unpack(unpack).size)
93
+ else
94
+ convert_to_utf8(str)
95
+ end
96
+ end
97
+
98
+ private
99
+
100
+ def utf8_conversion_impossible?
101
+ @enc_name == :"Identity-H" || @enc_name == :"Identity-V"
102
+ end
103
+
104
+ def little_boxes(times)
105
+ codepoints = [ PDF::Reader::Encoding::UNKNOWN_CHAR ] * times
106
+ ret = codepoints.pack("U*")
107
+ ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
108
+ ret
109
+ end
110
+
111
+ def convert_to_utf8(str)
95
112
  ret = str.unpack(unpack).map { |c|
96
113
  differences[c] || c
97
- }.map { |num|
98
- original_codepoint_to_unicode(num, tounicode)
114
+ }.map { |c|
115
+ mapping[c] || c
99
116
  }.map { |c|
100
117
  names_to_unicode[c] || c
118
+ }.map { |c|
119
+ if PDF::Reader::Encoding::CONTROL_CHARS.include?(c)
120
+ PDF::Reader::Encoding::UNKNOWN_CHAR
121
+ else
122
+ c
123
+ end
101
124
  }.map { |c|
102
125
  if c.nil? || !c.is_a?(Fixnum)
103
126
  PDF::Reader::Encoding::UNKNOWN_CHAR
@@ -111,22 +134,6 @@ class PDF::Reader
111
134
  ret
112
135
  end
113
136
 
114
- private
115
-
116
- def original_codepoint_to_unicode(cp, tounicode = nil)
117
- if tounicode && (code = tounicode.decode(cp))
118
- code
119
- elsif to_unicode_required? && (tounicode.nil? || tounicode.decode(cp).nil?)
120
- PDF::Reader::Encoding::UNKNOWN_CHAR
121
- elsif mapping[cp]
122
- mapping[cp]
123
- elsif PDF::Reader::Encoding::CONTROL_CHARS.include?(cp)
124
- PDF::Reader::Encoding::UNKNOWN_CHAR
125
- else
126
- cp
127
- end
128
- end
129
-
130
137
  def get_unpack(enc)
131
138
  case enc
132
139
  when :"Identity-H", :"Identity-V", :UTF16Encoding
@@ -157,10 +164,6 @@ class PDF::Reader
157
164
  end
158
165
  end
159
166
 
160
- def unicode_required?(enc)
161
- enc == :"Identity-H" or enc == :"Identity-V"
162
- end
163
-
164
167
  def mapping
165
168
  @mapping ||= {}
166
169
  end
@@ -35,6 +35,7 @@ class PDF::Reader
35
35
  return
36
36
  end
37
37
  @ohash = ohash
38
+ @tounicode = nil
38
39
 
39
40
  extract_base_info(obj)
40
41
  extract_descriptor(obj)
@@ -58,24 +59,17 @@ class PDF::Reader
58
59
  end
59
60
 
60
61
  def to_utf8(params)
61
- raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported" if encoding.kind_of?(String)
62
-
63
- if params.class == String
64
- encoding.to_utf8(params, tounicode)
65
- elsif params.class == Array
66
- params.collect { |param| self.to_utf8(param) }
62
+ if @tounicode
63
+ to_utf8_via_cmap(params)
67
64
  else
68
- params
65
+ to_utf8_via_encoding(params)
69
66
  end
70
67
  end
71
68
 
72
69
  def glyph_width(c)
73
70
  @missing_width ||= 0
74
- if @widths.nil?
75
- 0
76
- else
77
- @widths.fetch(c.codepoints.first - @first_char, @missing_width)
78
- end
71
+ @widths ||= []
72
+ @widths.fetch(c - @first_char, @missing_width)
79
73
  end
80
74
 
81
75
  private
@@ -84,7 +78,7 @@ class PDF::Reader
84
78
  @subtype = @ohash.object(obj[:Subtype])
85
79
  @basefont = @ohash.object(obj[:BaseFont])
86
80
  @encoding = PDF::Reader::Encoding.new(@ohash.object(obj[:Encoding]))
87
- @widths = @ohash.object(obj[:Widths])
81
+ @widths = @ohash.object(obj[:Widths]) || []
88
82
  @first_char = @ohash.object(obj[:FirstChar])
89
83
  if obj[:ToUnicode]
90
84
  stream = @ohash.object(obj[:ToUnicode])
@@ -111,5 +105,29 @@ class PDF::Reader
111
105
  }
112
106
  end
113
107
 
108
+ def to_utf8_via_cmap(params)
109
+ if params.class == String
110
+ params.unpack(encoding.unpack).map { |c|
111
+ @tounicode.decode(c) || PDF::Reader::Encoding::UNKNOWN_CHAR
112
+ }.pack("U*")
113
+ elsif params.class == Array
114
+ params.collect { |param| to_utf8_via_cmap(param) }
115
+ else
116
+ params
117
+ end
118
+ end
119
+
120
+ def to_utf8_via_encoding(params)
121
+ raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported" if encoding.kind_of?(String)
122
+
123
+ if params.class == String
124
+ encoding.to_utf8(params)
125
+ elsif params.class == Array
126
+ params.collect { |param| to_utf8_via_encoding(param) }
127
+ else
128
+ params
129
+ end
130
+ end
131
+
114
132
  end
115
133
  end
@@ -13,6 +13,8 @@ module PDF
13
13
  class FormXObject
14
14
  include ResourceMethods
15
15
 
16
+ attr_reader :xobject
17
+
16
18
  def initialize(page, xobject)
17
19
  @page = page
18
20
  @objects = page.objects
@@ -317,7 +317,7 @@ class PDF::Reader
317
317
  if obj[:Type] == :Page
318
318
  ref
319
319
  elsif obj[:Type] == :Pages
320
- obj[:Kids].map { |kid| get_page_objects(kid) }
320
+ deref(obj[:Kids]).map { |kid| get_page_objects(kid) }
321
321
  end
322
322
  end
323
323
 
@@ -0,0 +1,295 @@
1
+ # coding: utf-8
2
+
3
+ require 'matrix'
4
+
5
+ module PDF
6
+ class Reader
7
+ class PageState
8
+
9
+ DEFAULT_GRAPHICS_STATE = {
10
+ :ctm => Matrix.identity(3),
11
+ :char_spacing => 0,
12
+ :word_spacing => 0,
13
+ :h_scaling => 100,
14
+ :text_leading => 0,
15
+ :text_font => nil,
16
+ :text_font_size => nil,
17
+ :text_mode => 0,
18
+ :text_rise => 0,
19
+ :text_knockout => 0
20
+ }
21
+
22
+ # starting a new page
23
+ def initialize(page)
24
+ @page = page
25
+ @objects = page.objects
26
+ @font_stack = [build_fonts(page.fonts)]
27
+ @xobject_stack = [page.xobjects]
28
+ @cs_stack = [page.color_spaces]
29
+ @stack = [DEFAULT_GRAPHICS_STATE.dup]
30
+ end
31
+
32
+ #####################################################
33
+ # Graphics State Operators
34
+ #####################################################
35
+
36
+ def save_graphics_state
37
+ @stack.push clone_state
38
+ end
39
+
40
+ def restore_graphics_state
41
+ @stack.pop
42
+ end
43
+
44
+ #####################################################
45
+ # Matrix Operators
46
+ #####################################################
47
+
48
+ # update the current transformation matrix.
49
+ #
50
+ # If the CTM is currently undefined, just store the new values.
51
+ #
52
+ # If there's an existing CTM, then multiply the existing matrix
53
+ # with the new matrix to form the updated matrix.
54
+ #
55
+ def concatenate_matrix(a, b, c, d, e, f)
56
+ transform = Matrix[
57
+ [a, b, 0],
58
+ [c, d, 0],
59
+ [e, f, 1]
60
+ ]
61
+ if state[:ctm]
62
+ state[:ctm] = transform * state[:ctm]
63
+ else
64
+ state[:ctm] = transform
65
+ end
66
+ end
67
+
68
+ #####################################################
69
+ # Text Object Operators
70
+ #####################################################
71
+
72
+ def begin_text_object
73
+ @text_matrix = Matrix.identity(3)
74
+ @text_line_matrix = Matrix.identity(3)
75
+ end
76
+
77
+ def end_text_object
78
+ @text_matrix = Matrix.identity(3)
79
+ @text_line_matrix = Matrix.identity(3)
80
+ end
81
+
82
+ #####################################################
83
+ # Text State Operators
84
+ #####################################################
85
+
86
+ def set_character_spacing(char_spacing)
87
+ state[:char_spacing] = char_spacing
88
+ end
89
+
90
+ def set_horizontal_text_scaling(h_scaling)
91
+ state[:h_scaling] = h_scaling
92
+ end
93
+
94
+ def set_text_font_and_size(label, size)
95
+ state[:text_font] = label
96
+ state[:text_font_size] = size
97
+ end
98
+
99
+ def font_size
100
+ state[:text_font_size] * @text_matrix[0,0]
101
+ end
102
+
103
+ def set_text_leading(leading)
104
+ state[:text_leading] = leading
105
+ end
106
+
107
+ def set_text_rendering_mode(mode)
108
+ state[:text_mode] = mode
109
+ end
110
+
111
+ def set_text_rise(rise)
112
+ state[:text_rise] = rise
113
+ end
114
+
115
+ def set_word_spacing(word_spacing)
116
+ state[:word_spacing] = word_spacing
117
+ end
118
+
119
+ #####################################################
120
+ # Text Positioning Operators
121
+ #####################################################
122
+
123
+ def move_text_position(x, y) # Td
124
+ temp_matrix = Matrix[
125
+ [1, 0, 0],
126
+ [0, 1, 0],
127
+ [x, y, 1]
128
+ ]
129
+ @text_matrix = @text_line_matrix = temp_matrix * @text_line_matrix
130
+ end
131
+
132
+ def move_text_position_and_set_leading(x, y) # TD
133
+ set_text_leading(-1 * y)
134
+ move_text_position(x, y)
135
+ end
136
+
137
+ def set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) # Tm
138
+ @text_matrix = @text_line_matrix = Matrix[
139
+ [a, b, 0],
140
+ [c, d, 0],
141
+ [e, f, 1]
142
+ ]
143
+ end
144
+
145
+ def move_to_start_of_next_line # T*
146
+ move_text_position(0, -state[:text_leading])
147
+ end
148
+
149
+ #####################################################
150
+ # Text Showing Operators
151
+ #####################################################
152
+
153
+ def show_text_with_positioning(params) # TJ
154
+ # TODO record position changes in state here
155
+ end
156
+
157
+ def move_to_next_line_and_show_text(str) # '
158
+ move_to_start_of_next_line
159
+ end
160
+
161
+ def set_spacing_next_line_show_text(aw, ac, string) # "
162
+ set_word_spacing(aw)
163
+ set_character_spacing(ac)
164
+ move_to_next_line_and_show_text(string)
165
+ end
166
+
167
+ #####################################################
168
+ # XObjects
169
+ #####################################################
170
+ def invoke_xobject(label)
171
+ save_graphics_state
172
+ xobject = find_xobject(label)
173
+
174
+ raise MalformedPDFError, "XObject #{label} not found" if xobject.nil?
175
+ matrix = xobject.hash[:Matrix]
176
+ concatenate_matrix(*matrix) if matrix
177
+
178
+ if xobject.hash[:Subtype] == :Form
179
+ form = PDF::Reader::FormXObject.new(@page, xobject)
180
+ @font_stack.unshift(form.font_objects)
181
+ @xobject_stack.unshift(form.xobjects)
182
+ yield form if block_given?
183
+ @font_stack.shift
184
+ @xobject_stack.shift
185
+ else
186
+ yield xobject if block_given?
187
+ end
188
+
189
+ restore_graphics_state
190
+ end
191
+
192
+ #####################################################
193
+ # Public Visible State
194
+ #####################################################
195
+
196
+ # transform x and y co-ordinates from the current user space to the
197
+ # underlying device space.
198
+ #
199
+ def ctm_transform(x, y, z = 1)
200
+ [
201
+ (ctm[0,0] * x) + (ctm[1,0] * y) + (ctm[2,0] * z),
202
+ (ctm[0,1] * x) + (ctm[1,1] * y) + (ctm[2,1] * z)
203
+ ]
204
+ end
205
+
206
+ # transform x and y co-ordinates from the current text space to the
207
+ # underlying device space.
208
+ #
209
+ def trm_transform(x, y, z = 1)
210
+ trm = text_rendering_matrix
211
+ [
212
+ (trm[0,0] * x) + (trm[1,0] * y) + (trm[2,0] * z),
213
+ (trm[0,1] * x) + (trm[1,1] * y) + (trm[2,1] * z)
214
+ ]
215
+ end
216
+
217
+ def current_font
218
+ find_font(state[:text_font])
219
+ end
220
+
221
+ def find_font(label)
222
+ dict = @font_stack.detect { |fonts|
223
+ fonts.has_key?(label)
224
+ }
225
+ dict ? dict[label] : nil
226
+ end
227
+
228
+ def find_color_space(label)
229
+ dict = @cs_stack.detect { |colorspaces|
230
+ colorspaces.has_key?(label)
231
+ }
232
+ dict ? dict[label] : nil
233
+ end
234
+
235
+ def find_xobject(label)
236
+ dict = @xobject_stack.detect { |xobjects|
237
+ xobjects.has_key?(label)
238
+ }
239
+ dict ? dict[label] : nil
240
+ end
241
+
242
+ private
243
+
244
+ def text_rendering_matrix
245
+ state_matrix = Matrix[
246
+ [font_size * state[:h_scaling], 0, 0],
247
+ [0, font_size, 0],
248
+ [0, state[:text_rise], 1]
249
+ ]
250
+
251
+ state_matrix * @text_matrix * ctm
252
+ end
253
+
254
+ # return the current transformation matrix
255
+ #
256
+ def ctm
257
+ state[:ctm]
258
+ end
259
+
260
+ def state
261
+ @stack.last
262
+ end
263
+
264
+ # wrap the raw PDF Font objects in handy ruby Font objects.
265
+ #
266
+ def build_fonts(raw_fonts)
267
+ wrapped_fonts = raw_fonts.map { |label, font|
268
+ [label, PDF::Reader::Font.new(@objects, @objects.deref(font))]
269
+ }
270
+
271
+ ::Hash[wrapped_fonts]
272
+ end
273
+
274
+ # when save_graphics_state is called, we need to push a new copy of the
275
+ # current state onto the stack. That way any modifications to the state
276
+ # will be undone once restore_graphics_state is called.
277
+ #
278
+ # This returns a deep clone of the current state, ensuring changes are
279
+ # keep separate from earlier states.
280
+ #
281
+ # Marshal is used to round-trip the state through a string to easily
282
+ # perform the deep clone. Kinda hacky, but effective.
283
+ #
284
+ def clone_state
285
+ if @stack.empty?
286
+ {}
287
+ else
288
+ Marshal.load Marshal.dump(@stack.last)
289
+ end
290
+ end
291
+
292
+ end
293
+ end
294
+ end
295
+
@@ -1,156 +1,43 @@
1
1
  # coding: utf-8
2
2
 
3
3
  require 'matrix'
4
+ require 'forwardable'
4
5
 
5
6
  module PDF
6
7
  class Reader
7
8
  class PageTextReceiver
9
+ extend Forwardable
8
10
 
9
- DEFAULT_GRAPHICS_STATE = {
10
- :ctm => Matrix.identity(3),
11
- :char_spacing => 0,
12
- :word_spacing => 0,
13
- :h_scaling => 100,
14
- :text_leading => 0,
15
- :text_font => nil,
16
- :text_font_size => nil,
17
- :text_mode => 0,
18
- :text_rise => 0,
19
- :text_knockout => 0
20
- }
21
-
22
- # starting a new page
23
- def page=(page)
24
- @page = page
25
- @objects = page.objects
26
- @font_stack = [build_fonts(page.fonts)]
27
- @xobject_stack = [page.xobjects]
28
- @content = {}
29
- @stack = [DEFAULT_GRAPHICS_STATE.dup]
30
- end
31
-
32
- def content
33
- keys = @content.keys.sort.reverse
34
- keys.map { |key|
35
- @content[key]
36
- }.join("\n")
37
- end
38
-
39
- #####################################################
40
11
  # Graphics State Operators
41
- #####################################################
12
+ def_delegators :@state, :save_graphics_state, :restore_graphics_state
42
13
 
43
- def save_graphics_state
44
- @stack.push clone_state
45
- end
46
-
47
- def restore_graphics_state
48
- @stack.pop
49
- end
50
-
51
- #####################################################
52
14
  # Matrix Operators
53
- #####################################################
15
+ def_delegators :@state, :concatenate_matrix
54
16
 
55
- # update the current transformation matrix.
56
- #
57
- # If the CTM is currently undefined, just store the new values.
58
- #
59
- # If there's an existing CTM, then multiply the existing matrix
60
- # with the new matrix to form the updated matrix.
61
- #
62
- def concatenate_matrix(a, b, c, d, e, f)
63
- transform = Matrix[
64
- [a, b, 0],
65
- [c, d, 0],
66
- [e, f, 1]
67
- ]
68
- if state[:ctm]
69
- state[:ctm] = transform * state[:ctm]
70
- else
71
- state[:ctm] = transform
72
- end
73
- end
74
-
75
- #####################################################
76
17
  # Text Object Operators
77
- #####################################################
18
+ def_delegators :@state, :begin_text_object, :end_text_object
78
19
 
79
- def begin_text_object
80
- @text_matrix = Matrix.identity(3)
81
- @text_line_matrix = Matrix.identity(3)
82
- end
83
-
84
- def end_text_object
85
- @text_matrix = Matrix.identity(3)
86
- @text_line_matrix = Matrix.identity(3)
87
- end
88
-
89
- #####################################################
90
20
  # Text State Operators
91
- #####################################################
92
-
93
- def set_character_spacing(char_spacing)
94
- state[:char_spacing] = char_spacing
95
- end
21
+ def_delegators :@state, :set_character_spacing, :set_horizontal_text_scaling
22
+ def_delegators :@state, :set_text_font_and_size, :font_size
23
+ def_delegators :@state, :set_text_leading, :set_text_rendering_mode
24
+ def_delegators :@state, :set_text_rise, :set_word_spacing
96
25
 
97
- def set_horizontal_text_scaling(h_scaling)
98
- state[:h_scaling] = h_scaling
99
- end
100
-
101
- def set_text_font_and_size(label, size)
102
- state[:text_font] = label
103
- state[:text_font_size] = size
104
- end
105
-
106
- def font_size
107
- state[:text_font_size] * @text_matrix[0,0]
108
- end
109
-
110
- def set_text_leading(leading)
111
- state[:text_leading] = leading
112
- end
113
-
114
- def set_text_rendering_mode(mode)
115
- state[:text_mode] = mode
116
- end
117
-
118
- def set_text_rise(rise)
119
- state[:text_rise] = rise
120
- end
121
-
122
- def set_word_spacing(word_spacing)
123
- state[:word_spacing] = word_spacing
124
- end
125
-
126
- #####################################################
127
26
  # Text Positioning Operators
128
- #####################################################
27
+ def_delegators :@state, :move_text_position, :move_text_position_and_set_leading
28
+ def_delegators :@state, :set_text_matrix_and_text_line_matrix, :move_to_start_of_next_line
129
29
 
130
- def move_text_position(x, y) # Td
131
- temp_matrix = Matrix[
132
- [1, 0, 0],
133
- [0, 1, 0],
134
- [x, y, 1]
135
- ]
136
- @text_matrix = @text_line_matrix = temp_matrix * @text_line_matrix
137
- end
138
-
139
- def move_text_position_and_set_leading(x, y) # TD
140
- set_text_leading(-1 * y)
141
- move_text_position(x, y)
142
- end
143
-
144
- def set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) # Tm
145
- @text_matrix = @text_line_matrix = Matrix[
146
- [a, b, 0],
147
- [c, d, 0],
148
- [e, f, 1]
149
- ]
30
+ # starting a new page
31
+ def page=(page)
32
+ @state = PageState.new(page)
33
+ @content = {}
150
34
  end
151
35
 
152
- def move_to_start_of_next_line # T*
153
- move_text_position(0, -state[:text_leading])
36
+ def content
37
+ keys = @content.keys.sort.reverse
38
+ keys.map { |key|
39
+ @content[key]
40
+ }.join("\n")
154
41
  end
155
42
 
156
43
  #####################################################
@@ -159,10 +46,10 @@ module PDF
159
46
 
160
47
  # record text that is drawn on the page
161
48
  def show_text(string) # Tj
162
- raise PDF::Reader::MalformedPDFError, "current font is invalid" if current_font.nil?
163
- at = transform(Point.new(0,0))
164
- @content[at.y] ||= ""
165
- @content[at.y] << current_font.to_utf8(string)
49
+ raise PDF::Reader::MalformedPDFError, "current font is invalid" if @state.current_font.nil?
50
+ newx, newy = @state.trm_transform(0,0)
51
+ @content[newy] ||= ""
52
+ @content[newy] << @state.current_font.to_utf8(string)
166
53
  end
167
54
 
168
55
  def show_text_with_positioning(params) # TJ
@@ -177,13 +64,13 @@ module PDF
177
64
  end
178
65
 
179
66
  def move_to_next_line_and_show_text(str) # '
180
- move_to_start_of_next_line
67
+ @state.move_to_start_of_next_line
181
68
  show_text(str)
182
69
  end
183
70
 
184
71
  def set_spacing_next_line_show_text(aw, ac, string) # "
185
- set_word_spacing(aw)
186
- set_character_spacing(ac)
72
+ @state.set_word_spacing(aw)
73
+ @state.set_character_spacing(ac)
187
74
  move_to_next_line_and_show_text(string)
188
75
  end
189
76
 
@@ -191,103 +78,14 @@ module PDF
191
78
  # XObjects
192
79
  #####################################################
193
80
  def invoke_xobject(label)
194
- save_graphics_state
195
- dict = @xobject_stack.detect { |xobjects|
196
- xobjects.has_key?(label)
197
- }
198
- xobject = dict ? dict[label] : nil
199
-
200
- raise MalformedPDFError, "XObject #{label} not found" if xobject.nil?
201
- matrix = xobject.hash[:Matrix]
202
- concatenate_matrix(*matrix) if matrix
203
-
204
- if xobject.hash[:Subtype] == :Form
205
- form = PDF::Reader::FormXObject.new(@page, xobject)
206
- @font_stack.unshift(form.font_objects)
207
- @xobject_stack.unshift(form.xobjects)
208
- form.walk(self)
209
- @font_stack.shift
210
- @xobject_stack.shift
211
- end
212
-
213
- restore_graphics_state
214
- end
215
-
216
- private
217
-
218
- # wrap the raw PDF Font objects in handy ruby Font objects.
219
- #
220
- def build_fonts(raw_fonts)
221
- wrapped_fonts = raw_fonts.map { |label, font|
222
- [label, PDF::Reader::Font.new(@objects, @objects.deref(font))]
223
- }
224
-
225
- ::Hash[wrapped_fonts]
226
- end
227
-
228
- # transform x and y co-ordinates from the current text space to the
229
- # underlying device space.
230
- #
231
- def transform(point, z = 1)
232
- point.transform(text_rendering_matrix, z)
233
- end
234
-
235
- def text_rendering_matrix
236
- state_matrix = Matrix[
237
- [font_size * state[:h_scaling], 0, 0],
238
- [0, font_size, 0],
239
- [0, state[:text_rise], 1]
240
- ]
241
-
242
- state_matrix * @text_matrix * ctm
243
- end
244
-
245
- def state
246
- @stack.last
247
- end
248
-
249
- # when save_graphics_state is called, we need to push a new copy of the
250
- # current state onto the stack. That way any modifications to the state
251
- # will be undone once restore_graphics_state is called.
252
- #
253
- # This returns a deep clone of the current state, ensuring changes are
254
- # keep separate from earlier states.
255
- #
256
- # Marshal is used to round-trip the state through a string to easily
257
- # perform the deep clone. Kinda hacky, but effective.
258
- #
259
- def clone_state
260
- if @stack.empty?
261
- {}
262
- else
263
- Marshal.load Marshal.dump(@stack.last)
81
+ @state.invoke_xobject(label) do |xobj|
82
+ case xobj
83
+ when PDF::Reader::FormXObject then
84
+ xobj.walk(self)
85
+ end
264
86
  end
265
87
  end
266
88
 
267
- # return the current transformation matrix
268
- #
269
- def ctm
270
- state[:ctm]
271
- end
272
-
273
- def current_font
274
- dict = @font_stack.detect { |fonts|
275
- fonts.has_key?(state[:text_font])
276
- }
277
- dict ? dict[state[:text_font]] : nil
278
- end
279
-
280
- # private class for representing points on a cartesian plain. Used
281
- # to simplify maths.
282
- #
283
- class Point < Struct.new(:x, :y)
284
- def transform(trm, z)
285
- Point.new(
286
- (trm[0,0] * x) + (trm[1,0] * y) + (trm[2,0] * z),
287
- (trm[0,1] * x) + (trm[1,1] * y) + (trm[2,1] * z)
288
- )
289
- end
290
- end
291
89
  end
292
90
  end
293
91
  end
@@ -130,6 +130,7 @@ class PDF::Reader
130
130
  # reads a PDF name from the buffer and converts it to a Ruby Symbol
131
131
  def pdf_name
132
132
  tok = @buffer.token
133
+ tok = " " if tok == "" && RUBY_VERSION < "1.9"
133
134
  tok.gsub!(/#([A-Fa-f0-9]{2})/) do |match|
134
135
  match[1, 2].hex.chr
135
136
  end
@@ -8,12 +8,20 @@ module PDF
8
8
  module ResourceMethods
9
9
  # Returns a Hash of color spaces that are available to this page
10
10
  #
11
+ # NOTE: this method de-serialise objects from the underlying PDF
12
+ # with no caching. You will want to cache the results instead
13
+ # of calling it over and over.
14
+ #
11
15
  def color_spaces
12
16
  @objects.deref!(resources[:ColorSpace]) || {}
13
17
  end
14
18
 
15
19
  # Returns a Hash of fonts that are available to this page
16
20
  #
21
+ # NOTE: this method de-serialise objects from the underlying PDF
22
+ # with no caching. You will want to cache the results instead
23
+ # of calling it over and over.
24
+ #
17
25
  def fonts
18
26
  @objects.deref!(resources[:Font]) || {}
19
27
  end
@@ -21,36 +29,60 @@ module PDF
21
29
  # Returns a Hash of external graphic states that are available to this
22
30
  # page
23
31
  #
32
+ # NOTE: this method de-serialise objects from the underlying PDF
33
+ # with no caching. You will want to cache the results instead
34
+ # of calling it over and over.
35
+ #
24
36
  def graphic_states
25
37
  @objects.deref!(resources[:ExtGState]) || {}
26
38
  end
27
39
 
28
40
  # Returns a Hash of patterns that are available to this page
29
41
  #
42
+ # NOTE: this method de-serialise objects from the underlying PDF
43
+ # with no caching. You will want to cache the results instead
44
+ # of calling it over and over.
45
+ #
30
46
  def patterns
31
47
  @objects.deref!(resources[:Pattern]) || {}
32
48
  end
33
49
 
34
50
  # Returns an Array of procedure sets that are available to this page
35
51
  #
52
+ # NOTE: this method de-serialise objects from the underlying PDF
53
+ # with no caching. You will want to cache the results instead
54
+ # of calling it over and over.
55
+ #
36
56
  def procedure_sets
37
57
  @objects.deref!(resources[:ProcSet]) || []
38
58
  end
39
59
 
40
60
  # Returns a Hash of properties sets that are available to this page
41
61
  #
62
+ # NOTE: this method de-serialise objects from the underlying PDF
63
+ # with no caching. You will want to cache the results instead
64
+ # of calling it over and over.
65
+ #
42
66
  def properties
43
67
  @objects.deref!(resources[:Properties]) || {}
44
68
  end
45
69
 
46
70
  # Returns a Hash of shadings that are available to this page
47
71
  #
72
+ # NOTE: this method de-serialise objects from the underlying PDF
73
+ # with no caching. You will want to cache the results instead
74
+ # of calling it over and over.
75
+ #
48
76
  def shadings
49
77
  @objects.deref!(resources[:Shading]) || {}
50
78
  end
51
79
 
52
80
  # Returns a Hash of XObjects that are available to this page
53
81
  #
82
+ # NOTE: this method de-serialise objects from the underlying PDF
83
+ # with no caching. You will want to cache the results instead
84
+ # of calling it over and over.
85
+ #
54
86
  def xobjects
55
87
  @objects.deref!(resources[:XObject]) || {}
56
88
  end
@@ -124,7 +124,7 @@ class PDF::Reader
124
124
  generation = buf.token.to_i
125
125
  state = buf.token
126
126
 
127
- store(objid, generation, offset) if state == "n"
127
+ store(objid, generation, offset) if state == "n" && offset > 0
128
128
  objid += 1
129
129
  params.clear
130
130
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-16 00:00:00.000000000 Z
12
+ date: 2012-03-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &24844240 !ruby/object:Gem::Requirement
16
+ requirement: &20774400 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *24844240
24
+ version_requirements: *20774400
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: roodi
27
- requirement: &24843780 !ruby/object:Gem::Requirement
27
+ requirement: &20772820 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *24843780
35
+ version_requirements: *20772820
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rspec
38
- requirement: &24843280 !ruby/object:Gem::Requirement
38
+ requirement: &20771660 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '2.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *24843280
46
+ version_requirements: *20771660
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: ZenTest
49
- requirement: &24842780 !ruby/object:Gem::Requirement
49
+ requirement: &20770840 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 4.4.2
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *24842780
57
+ version_requirements: *20770840
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: Ascii85
60
- requirement: &24842320 !ruby/object:Gem::Requirement
60
+ requirement: &20770080 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.0.0
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *24842320
68
+ version_requirements: *20770080
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: ruby-rc4
71
- requirement: &24841940 !ruby/object:Gem::Requirement
71
+ requirement: &20769400 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *24841940
79
+ version_requirements: *20769400
80
80
  description: The PDF::Reader library implements a PDF parser conforming as much as
81
81
  possible to the PDF specification from Adobe
82
82
  email:
@@ -114,6 +114,7 @@ files:
114
114
  - lib/pdf/reader/filter.rb
115
115
  - lib/pdf/reader/object_hash.rb
116
116
  - lib/pdf/reader/stream.rb
117
+ - lib/pdf/reader/page_state.rb
117
118
  - lib/pdf/reader/standard_security_handler.rb
118
119
  - lib/pdf/reader/cmap.rb
119
120
  - lib/pdf/reader/form_xobject.rb