pdf-reader 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,8 @@
1
+ v1.1.0 (25th March 2012)
2
+ - new PageState class for handling common state tracking in page receivers
3
+ - see PageTextReceiver for example usage
4
+ - various bugfixes to support reading more PDF dialects
5
+
1
6
  v1.0.0 (16th January 2012)
2
7
  - support a new encryption variation
3
8
  - bugfix in PageTextRender (thanks Paul Gallagher)
@@ -354,6 +354,7 @@ require 'pdf/reader/register_receiver'
354
354
  require 'pdf/reader/standard_security_handler'
355
355
  require 'pdf/reader/stream'
356
356
  require 'pdf/reader/text_receiver'
357
+ require 'pdf/reader/page_state'
357
358
  require 'pdf/reader/page_text_receiver'
358
359
  require 'pdf/reader/token'
359
360
  require 'pdf/reader/xref'
@@ -171,7 +171,11 @@ class PDF::Reader
171
171
  when "<" then :hex_string
172
172
  when "stream" then :stream
173
173
  when "ID"
174
- in_content_stream? ? :inline : :regular
174
+ if in_content_stream? && @tokens[-2] != "/"
175
+ :inline
176
+ else
177
+ :regular
178
+ end
175
179
  else
176
180
  :regular
177
181
  end
@@ -314,7 +318,7 @@ class PDF::Reader
314
318
  @tokens << chr
315
319
  tok = ""
316
320
  break
317
- when "\x28", "\x5B", "\x7B", "\x2F"
321
+ when "\x28", "\x5B", "\x7B"
318
322
  # opening delimiter, start of new token
319
323
  @tokens << tok if tok.size > 0
320
324
  @tokens << chr
@@ -326,6 +330,14 @@ class PDF::Reader
326
330
  @tokens << chr
327
331
  tok = ""
328
332
  break
333
+ when "\x2F"
334
+ # PDF name, start of new token
335
+ @tokens << tok if tok.size > 0
336
+ @tokens << chr
337
+ next_char = peek_char
338
+ @tokens << "" if chr == "/" && [nil, " ", "\n"].include?(next_char)
339
+ tok = ""
340
+ break
329
341
  else
330
342
  tok << chr
331
343
  end
@@ -41,16 +41,12 @@ class PDF::Reader
41
41
  enc = nil
42
42
  end
43
43
 
44
- @to_unicode_required = unicode_required?(enc)
44
+ @enc_name = enc
45
45
  @unpack = get_unpack(enc)
46
46
  @map_file = get_mapping_file(enc)
47
47
  load_mapping(@map_file) if @map_file
48
48
  end
49
49
 
50
- def to_unicode_required?
51
- @to_unicode_required
52
- end
53
-
54
50
  # set the differences table for this encoding. should be an array in the following format:
55
51
  #
56
52
  # [25, :A, 26, :B]
@@ -91,13 +87,40 @@ class PDF::Reader
91
87
  # * pack the final array of Unicode codepoints into a utf-8 string
92
88
  # * mark the string as utf-8 if we're running on a M17N aware VM
93
89
  #
94
- def to_utf8(str, tounicode = nil)
90
+ def to_utf8(str)
91
+ if utf8_conversion_impossible?
92
+ little_boxes(str.unpack(unpack).size)
93
+ else
94
+ convert_to_utf8(str)
95
+ end
96
+ end
97
+
98
+ private
99
+
100
+ def utf8_conversion_impossible?
101
+ @enc_name == :"Identity-H" || @enc_name == :"Identity-V"
102
+ end
103
+
104
+ def little_boxes(times)
105
+ codepoints = [ PDF::Reader::Encoding::UNKNOWN_CHAR ] * times
106
+ ret = codepoints.pack("U*")
107
+ ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
108
+ ret
109
+ end
110
+
111
+ def convert_to_utf8(str)
95
112
  ret = str.unpack(unpack).map { |c|
96
113
  differences[c] || c
97
- }.map { |num|
98
- original_codepoint_to_unicode(num, tounicode)
114
+ }.map { |c|
115
+ mapping[c] || c
99
116
  }.map { |c|
100
117
  names_to_unicode[c] || c
118
+ }.map { |c|
119
+ if PDF::Reader::Encoding::CONTROL_CHARS.include?(c)
120
+ PDF::Reader::Encoding::UNKNOWN_CHAR
121
+ else
122
+ c
123
+ end
101
124
  }.map { |c|
102
125
  if c.nil? || !c.is_a?(Fixnum)
103
126
  PDF::Reader::Encoding::UNKNOWN_CHAR
@@ -111,22 +134,6 @@ class PDF::Reader
111
134
  ret
112
135
  end
113
136
 
114
- private
115
-
116
- def original_codepoint_to_unicode(cp, tounicode = nil)
117
- if tounicode && (code = tounicode.decode(cp))
118
- code
119
- elsif to_unicode_required? && (tounicode.nil? || tounicode.decode(cp).nil?)
120
- PDF::Reader::Encoding::UNKNOWN_CHAR
121
- elsif mapping[cp]
122
- mapping[cp]
123
- elsif PDF::Reader::Encoding::CONTROL_CHARS.include?(cp)
124
- PDF::Reader::Encoding::UNKNOWN_CHAR
125
- else
126
- cp
127
- end
128
- end
129
-
130
137
  def get_unpack(enc)
131
138
  case enc
132
139
  when :"Identity-H", :"Identity-V", :UTF16Encoding
@@ -157,10 +164,6 @@ class PDF::Reader
157
164
  end
158
165
  end
159
166
 
160
- def unicode_required?(enc)
161
- enc == :"Identity-H" or enc == :"Identity-V"
162
- end
163
-
164
167
  def mapping
165
168
  @mapping ||= {}
166
169
  end
@@ -35,6 +35,7 @@ class PDF::Reader
35
35
  return
36
36
  end
37
37
  @ohash = ohash
38
+ @tounicode = nil
38
39
 
39
40
  extract_base_info(obj)
40
41
  extract_descriptor(obj)
@@ -58,24 +59,17 @@ class PDF::Reader
58
59
  end
59
60
 
60
61
  def to_utf8(params)
61
- raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported" if encoding.kind_of?(String)
62
-
63
- if params.class == String
64
- encoding.to_utf8(params, tounicode)
65
- elsif params.class == Array
66
- params.collect { |param| self.to_utf8(param) }
62
+ if @tounicode
63
+ to_utf8_via_cmap(params)
67
64
  else
68
- params
65
+ to_utf8_via_encoding(params)
69
66
  end
70
67
  end
71
68
 
72
69
  def glyph_width(c)
73
70
  @missing_width ||= 0
74
- if @widths.nil?
75
- 0
76
- else
77
- @widths.fetch(c.codepoints.first - @first_char, @missing_width)
78
- end
71
+ @widths ||= []
72
+ @widths.fetch(c - @first_char, @missing_width)
79
73
  end
80
74
 
81
75
  private
@@ -84,7 +78,7 @@ class PDF::Reader
84
78
  @subtype = @ohash.object(obj[:Subtype])
85
79
  @basefont = @ohash.object(obj[:BaseFont])
86
80
  @encoding = PDF::Reader::Encoding.new(@ohash.object(obj[:Encoding]))
87
- @widths = @ohash.object(obj[:Widths])
81
+ @widths = @ohash.object(obj[:Widths]) || []
88
82
  @first_char = @ohash.object(obj[:FirstChar])
89
83
  if obj[:ToUnicode]
90
84
  stream = @ohash.object(obj[:ToUnicode])
@@ -111,5 +105,29 @@ class PDF::Reader
111
105
  }
112
106
  end
113
107
 
108
+ def to_utf8_via_cmap(params)
109
+ if params.class == String
110
+ params.unpack(encoding.unpack).map { |c|
111
+ @tounicode.decode(c) || PDF::Reader::Encoding::UNKNOWN_CHAR
112
+ }.pack("U*")
113
+ elsif params.class == Array
114
+ params.collect { |param| to_utf8_via_cmap(param) }
115
+ else
116
+ params
117
+ end
118
+ end
119
+
120
+ def to_utf8_via_encoding(params)
121
+ raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported" if encoding.kind_of?(String)
122
+
123
+ if params.class == String
124
+ encoding.to_utf8(params)
125
+ elsif params.class == Array
126
+ params.collect { |param| to_utf8_via_encoding(param) }
127
+ else
128
+ params
129
+ end
130
+ end
131
+
114
132
  end
115
133
  end
@@ -13,6 +13,8 @@ module PDF
13
13
  class FormXObject
14
14
  include ResourceMethods
15
15
 
16
+ attr_reader :xobject
17
+
16
18
  def initialize(page, xobject)
17
19
  @page = page
18
20
  @objects = page.objects
@@ -317,7 +317,7 @@ class PDF::Reader
317
317
  if obj[:Type] == :Page
318
318
  ref
319
319
  elsif obj[:Type] == :Pages
320
- obj[:Kids].map { |kid| get_page_objects(kid) }
320
+ deref(obj[:Kids]).map { |kid| get_page_objects(kid) }
321
321
  end
322
322
  end
323
323
 
@@ -0,0 +1,295 @@
1
+ # coding: utf-8
2
+
3
+ require 'matrix'
4
+
5
+ module PDF
6
+ class Reader
7
+ class PageState
8
+
9
+ DEFAULT_GRAPHICS_STATE = {
10
+ :ctm => Matrix.identity(3),
11
+ :char_spacing => 0,
12
+ :word_spacing => 0,
13
+ :h_scaling => 100,
14
+ :text_leading => 0,
15
+ :text_font => nil,
16
+ :text_font_size => nil,
17
+ :text_mode => 0,
18
+ :text_rise => 0,
19
+ :text_knockout => 0
20
+ }
21
+
22
+ # starting a new page
23
+ def initialize(page)
24
+ @page = page
25
+ @objects = page.objects
26
+ @font_stack = [build_fonts(page.fonts)]
27
+ @xobject_stack = [page.xobjects]
28
+ @cs_stack = [page.color_spaces]
29
+ @stack = [DEFAULT_GRAPHICS_STATE.dup]
30
+ end
31
+
32
+ #####################################################
33
+ # Graphics State Operators
34
+ #####################################################
35
+
36
+ def save_graphics_state
37
+ @stack.push clone_state
38
+ end
39
+
40
+ def restore_graphics_state
41
+ @stack.pop
42
+ end
43
+
44
+ #####################################################
45
+ # Matrix Operators
46
+ #####################################################
47
+
48
+ # update the current transformation matrix.
49
+ #
50
+ # If the CTM is currently undefined, just store the new values.
51
+ #
52
+ # If there's an existing CTM, then multiply the existing matrix
53
+ # with the new matrix to form the updated matrix.
54
+ #
55
+ def concatenate_matrix(a, b, c, d, e, f)
56
+ transform = Matrix[
57
+ [a, b, 0],
58
+ [c, d, 0],
59
+ [e, f, 1]
60
+ ]
61
+ if state[:ctm]
62
+ state[:ctm] = transform * state[:ctm]
63
+ else
64
+ state[:ctm] = transform
65
+ end
66
+ end
67
+
68
+ #####################################################
69
+ # Text Object Operators
70
+ #####################################################
71
+
72
+ def begin_text_object
73
+ @text_matrix = Matrix.identity(3)
74
+ @text_line_matrix = Matrix.identity(3)
75
+ end
76
+
77
+ def end_text_object
78
+ @text_matrix = Matrix.identity(3)
79
+ @text_line_matrix = Matrix.identity(3)
80
+ end
81
+
82
+ #####################################################
83
+ # Text State Operators
84
+ #####################################################
85
+
86
+ def set_character_spacing(char_spacing)
87
+ state[:char_spacing] = char_spacing
88
+ end
89
+
90
+ def set_horizontal_text_scaling(h_scaling)
91
+ state[:h_scaling] = h_scaling
92
+ end
93
+
94
+ def set_text_font_and_size(label, size)
95
+ state[:text_font] = label
96
+ state[:text_font_size] = size
97
+ end
98
+
99
+ def font_size
100
+ state[:text_font_size] * @text_matrix[0,0]
101
+ end
102
+
103
+ def set_text_leading(leading)
104
+ state[:text_leading] = leading
105
+ end
106
+
107
+ def set_text_rendering_mode(mode)
108
+ state[:text_mode] = mode
109
+ end
110
+
111
+ def set_text_rise(rise)
112
+ state[:text_rise] = rise
113
+ end
114
+
115
+ def set_word_spacing(word_spacing)
116
+ state[:word_spacing] = word_spacing
117
+ end
118
+
119
+ #####################################################
120
+ # Text Positioning Operators
121
+ #####################################################
122
+
123
+ def move_text_position(x, y) # Td
124
+ temp_matrix = Matrix[
125
+ [1, 0, 0],
126
+ [0, 1, 0],
127
+ [x, y, 1]
128
+ ]
129
+ @text_matrix = @text_line_matrix = temp_matrix * @text_line_matrix
130
+ end
131
+
132
+ def move_text_position_and_set_leading(x, y) # TD
133
+ set_text_leading(-1 * y)
134
+ move_text_position(x, y)
135
+ end
136
+
137
+ def set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) # Tm
138
+ @text_matrix = @text_line_matrix = Matrix[
139
+ [a, b, 0],
140
+ [c, d, 0],
141
+ [e, f, 1]
142
+ ]
143
+ end
144
+
145
+ def move_to_start_of_next_line # T*
146
+ move_text_position(0, -state[:text_leading])
147
+ end
148
+
149
+ #####################################################
150
+ # Text Showing Operators
151
+ #####################################################
152
+
153
+ def show_text_with_positioning(params) # TJ
154
+ # TODO record position changes in state here
155
+ end
156
+
157
+ def move_to_next_line_and_show_text(str) # '
158
+ move_to_start_of_next_line
159
+ end
160
+
161
+ def set_spacing_next_line_show_text(aw, ac, string) # "
162
+ set_word_spacing(aw)
163
+ set_character_spacing(ac)
164
+ move_to_next_line_and_show_text(string)
165
+ end
166
+
167
+ #####################################################
168
+ # XObjects
169
+ #####################################################
170
+ def invoke_xobject(label)
171
+ save_graphics_state
172
+ xobject = find_xobject(label)
173
+
174
+ raise MalformedPDFError, "XObject #{label} not found" if xobject.nil?
175
+ matrix = xobject.hash[:Matrix]
176
+ concatenate_matrix(*matrix) if matrix
177
+
178
+ if xobject.hash[:Subtype] == :Form
179
+ form = PDF::Reader::FormXObject.new(@page, xobject)
180
+ @font_stack.unshift(form.font_objects)
181
+ @xobject_stack.unshift(form.xobjects)
182
+ yield form if block_given?
183
+ @font_stack.shift
184
+ @xobject_stack.shift
185
+ else
186
+ yield xobject if block_given?
187
+ end
188
+
189
+ restore_graphics_state
190
+ end
191
+
192
+ #####################################################
193
+ # Public Visible State
194
+ #####################################################
195
+
196
+ # transform x and y co-ordinates from the current user space to the
197
+ # underlying device space.
198
+ #
199
+ def ctm_transform(x, y, z = 1)
200
+ [
201
+ (ctm[0,0] * x) + (ctm[1,0] * y) + (ctm[2,0] * z),
202
+ (ctm[0,1] * x) + (ctm[1,1] * y) + (ctm[2,1] * z)
203
+ ]
204
+ end
205
+
206
+ # transform x and y co-ordinates from the current text space to the
207
+ # underlying device space.
208
+ #
209
+ def trm_transform(x, y, z = 1)
210
+ trm = text_rendering_matrix
211
+ [
212
+ (trm[0,0] * x) + (trm[1,0] * y) + (trm[2,0] * z),
213
+ (trm[0,1] * x) + (trm[1,1] * y) + (trm[2,1] * z)
214
+ ]
215
+ end
216
+
217
+ def current_font
218
+ find_font(state[:text_font])
219
+ end
220
+
221
+ def find_font(label)
222
+ dict = @font_stack.detect { |fonts|
223
+ fonts.has_key?(label)
224
+ }
225
+ dict ? dict[label] : nil
226
+ end
227
+
228
+ def find_color_space(label)
229
+ dict = @cs_stack.detect { |colorspaces|
230
+ colorspaces.has_key?(label)
231
+ }
232
+ dict ? dict[label] : nil
233
+ end
234
+
235
+ def find_xobject(label)
236
+ dict = @xobject_stack.detect { |xobjects|
237
+ xobjects.has_key?(label)
238
+ }
239
+ dict ? dict[label] : nil
240
+ end
241
+
242
+ private
243
+
244
+ def text_rendering_matrix
245
+ state_matrix = Matrix[
246
+ [font_size * state[:h_scaling], 0, 0],
247
+ [0, font_size, 0],
248
+ [0, state[:text_rise], 1]
249
+ ]
250
+
251
+ state_matrix * @text_matrix * ctm
252
+ end
253
+
254
+ # return the current transformation matrix
255
+ #
256
+ def ctm
257
+ state[:ctm]
258
+ end
259
+
260
+ def state
261
+ @stack.last
262
+ end
263
+
264
+ # wrap the raw PDF Font objects in handy ruby Font objects.
265
+ #
266
+ def build_fonts(raw_fonts)
267
+ wrapped_fonts = raw_fonts.map { |label, font|
268
+ [label, PDF::Reader::Font.new(@objects, @objects.deref(font))]
269
+ }
270
+
271
+ ::Hash[wrapped_fonts]
272
+ end
273
+
274
+ # when save_graphics_state is called, we need to push a new copy of the
275
+ # current state onto the stack. That way any modifications to the state
276
+ # will be undone once restore_graphics_state is called.
277
+ #
278
+ # This returns a deep clone of the current state, ensuring changes are
279
+ # keep separate from earlier states.
280
+ #
281
+ # Marshal is used to round-trip the state through a string to easily
282
+ # perform the deep clone. Kinda hacky, but effective.
283
+ #
284
+ def clone_state
285
+ if @stack.empty?
286
+ {}
287
+ else
288
+ Marshal.load Marshal.dump(@stack.last)
289
+ end
290
+ end
291
+
292
+ end
293
+ end
294
+ end
295
+
@@ -1,156 +1,43 @@
1
1
  # coding: utf-8
2
2
 
3
3
  require 'matrix'
4
+ require 'forwardable'
4
5
 
5
6
  module PDF
6
7
  class Reader
7
8
  class PageTextReceiver
9
+ extend Forwardable
8
10
 
9
- DEFAULT_GRAPHICS_STATE = {
10
- :ctm => Matrix.identity(3),
11
- :char_spacing => 0,
12
- :word_spacing => 0,
13
- :h_scaling => 100,
14
- :text_leading => 0,
15
- :text_font => nil,
16
- :text_font_size => nil,
17
- :text_mode => 0,
18
- :text_rise => 0,
19
- :text_knockout => 0
20
- }
21
-
22
- # starting a new page
23
- def page=(page)
24
- @page = page
25
- @objects = page.objects
26
- @font_stack = [build_fonts(page.fonts)]
27
- @xobject_stack = [page.xobjects]
28
- @content = {}
29
- @stack = [DEFAULT_GRAPHICS_STATE.dup]
30
- end
31
-
32
- def content
33
- keys = @content.keys.sort.reverse
34
- keys.map { |key|
35
- @content[key]
36
- }.join("\n")
37
- end
38
-
39
- #####################################################
40
11
  # Graphics State Operators
41
- #####################################################
12
+ def_delegators :@state, :save_graphics_state, :restore_graphics_state
42
13
 
43
- def save_graphics_state
44
- @stack.push clone_state
45
- end
46
-
47
- def restore_graphics_state
48
- @stack.pop
49
- end
50
-
51
- #####################################################
52
14
  # Matrix Operators
53
- #####################################################
15
+ def_delegators :@state, :concatenate_matrix
54
16
 
55
- # update the current transformation matrix.
56
- #
57
- # If the CTM is currently undefined, just store the new values.
58
- #
59
- # If there's an existing CTM, then multiply the existing matrix
60
- # with the new matrix to form the updated matrix.
61
- #
62
- def concatenate_matrix(a, b, c, d, e, f)
63
- transform = Matrix[
64
- [a, b, 0],
65
- [c, d, 0],
66
- [e, f, 1]
67
- ]
68
- if state[:ctm]
69
- state[:ctm] = transform * state[:ctm]
70
- else
71
- state[:ctm] = transform
72
- end
73
- end
74
-
75
- #####################################################
76
17
  # Text Object Operators
77
- #####################################################
18
+ def_delegators :@state, :begin_text_object, :end_text_object
78
19
 
79
- def begin_text_object
80
- @text_matrix = Matrix.identity(3)
81
- @text_line_matrix = Matrix.identity(3)
82
- end
83
-
84
- def end_text_object
85
- @text_matrix = Matrix.identity(3)
86
- @text_line_matrix = Matrix.identity(3)
87
- end
88
-
89
- #####################################################
90
20
  # Text State Operators
91
- #####################################################
92
-
93
- def set_character_spacing(char_spacing)
94
- state[:char_spacing] = char_spacing
95
- end
21
+ def_delegators :@state, :set_character_spacing, :set_horizontal_text_scaling
22
+ def_delegators :@state, :set_text_font_and_size, :font_size
23
+ def_delegators :@state, :set_text_leading, :set_text_rendering_mode
24
+ def_delegators :@state, :set_text_rise, :set_word_spacing
96
25
 
97
- def set_horizontal_text_scaling(h_scaling)
98
- state[:h_scaling] = h_scaling
99
- end
100
-
101
- def set_text_font_and_size(label, size)
102
- state[:text_font] = label
103
- state[:text_font_size] = size
104
- end
105
-
106
- def font_size
107
- state[:text_font_size] * @text_matrix[0,0]
108
- end
109
-
110
- def set_text_leading(leading)
111
- state[:text_leading] = leading
112
- end
113
-
114
- def set_text_rendering_mode(mode)
115
- state[:text_mode] = mode
116
- end
117
-
118
- def set_text_rise(rise)
119
- state[:text_rise] = rise
120
- end
121
-
122
- def set_word_spacing(word_spacing)
123
- state[:word_spacing] = word_spacing
124
- end
125
-
126
- #####################################################
127
26
  # Text Positioning Operators
128
- #####################################################
27
+ def_delegators :@state, :move_text_position, :move_text_position_and_set_leading
28
+ def_delegators :@state, :set_text_matrix_and_text_line_matrix, :move_to_start_of_next_line
129
29
 
130
- def move_text_position(x, y) # Td
131
- temp_matrix = Matrix[
132
- [1, 0, 0],
133
- [0, 1, 0],
134
- [x, y, 1]
135
- ]
136
- @text_matrix = @text_line_matrix = temp_matrix * @text_line_matrix
137
- end
138
-
139
- def move_text_position_and_set_leading(x, y) # TD
140
- set_text_leading(-1 * y)
141
- move_text_position(x, y)
142
- end
143
-
144
- def set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) # Tm
145
- @text_matrix = @text_line_matrix = Matrix[
146
- [a, b, 0],
147
- [c, d, 0],
148
- [e, f, 1]
149
- ]
30
+ # starting a new page
31
+ def page=(page)
32
+ @state = PageState.new(page)
33
+ @content = {}
150
34
  end
151
35
 
152
- def move_to_start_of_next_line # T*
153
- move_text_position(0, -state[:text_leading])
36
+ def content
37
+ keys = @content.keys.sort.reverse
38
+ keys.map { |key|
39
+ @content[key]
40
+ }.join("\n")
154
41
  end
155
42
 
156
43
  #####################################################
@@ -159,10 +46,10 @@ module PDF
159
46
 
160
47
  # record text that is drawn on the page
161
48
  def show_text(string) # Tj
162
- raise PDF::Reader::MalformedPDFError, "current font is invalid" if current_font.nil?
163
- at = transform(Point.new(0,0))
164
- @content[at.y] ||= ""
165
- @content[at.y] << current_font.to_utf8(string)
49
+ raise PDF::Reader::MalformedPDFError, "current font is invalid" if @state.current_font.nil?
50
+ newx, newy = @state.trm_transform(0,0)
51
+ @content[newy] ||= ""
52
+ @content[newy] << @state.current_font.to_utf8(string)
166
53
  end
167
54
 
168
55
  def show_text_with_positioning(params) # TJ
@@ -177,13 +64,13 @@ module PDF
177
64
  end
178
65
 
179
66
  def move_to_next_line_and_show_text(str) # '
180
- move_to_start_of_next_line
67
+ @state.move_to_start_of_next_line
181
68
  show_text(str)
182
69
  end
183
70
 
184
71
  def set_spacing_next_line_show_text(aw, ac, string) # "
185
- set_word_spacing(aw)
186
- set_character_spacing(ac)
72
+ @state.set_word_spacing(aw)
73
+ @state.set_character_spacing(ac)
187
74
  move_to_next_line_and_show_text(string)
188
75
  end
189
76
 
@@ -191,103 +78,14 @@ module PDF
191
78
  # XObjects
192
79
  #####################################################
193
80
  def invoke_xobject(label)
194
- save_graphics_state
195
- dict = @xobject_stack.detect { |xobjects|
196
- xobjects.has_key?(label)
197
- }
198
- xobject = dict ? dict[label] : nil
199
-
200
- raise MalformedPDFError, "XObject #{label} not found" if xobject.nil?
201
- matrix = xobject.hash[:Matrix]
202
- concatenate_matrix(*matrix) if matrix
203
-
204
- if xobject.hash[:Subtype] == :Form
205
- form = PDF::Reader::FormXObject.new(@page, xobject)
206
- @font_stack.unshift(form.font_objects)
207
- @xobject_stack.unshift(form.xobjects)
208
- form.walk(self)
209
- @font_stack.shift
210
- @xobject_stack.shift
211
- end
212
-
213
- restore_graphics_state
214
- end
215
-
216
- private
217
-
218
- # wrap the raw PDF Font objects in handy ruby Font objects.
219
- #
220
- def build_fonts(raw_fonts)
221
- wrapped_fonts = raw_fonts.map { |label, font|
222
- [label, PDF::Reader::Font.new(@objects, @objects.deref(font))]
223
- }
224
-
225
- ::Hash[wrapped_fonts]
226
- end
227
-
228
- # transform x and y co-ordinates from the current text space to the
229
- # underlying device space.
230
- #
231
- def transform(point, z = 1)
232
- point.transform(text_rendering_matrix, z)
233
- end
234
-
235
- def text_rendering_matrix
236
- state_matrix = Matrix[
237
- [font_size * state[:h_scaling], 0, 0],
238
- [0, font_size, 0],
239
- [0, state[:text_rise], 1]
240
- ]
241
-
242
- state_matrix * @text_matrix * ctm
243
- end
244
-
245
- def state
246
- @stack.last
247
- end
248
-
249
- # when save_graphics_state is called, we need to push a new copy of the
250
- # current state onto the stack. That way any modifications to the state
251
- # will be undone once restore_graphics_state is called.
252
- #
253
- # This returns a deep clone of the current state, ensuring changes are
254
- # keep separate from earlier states.
255
- #
256
- # Marshal is used to round-trip the state through a string to easily
257
- # perform the deep clone. Kinda hacky, but effective.
258
- #
259
- def clone_state
260
- if @stack.empty?
261
- {}
262
- else
263
- Marshal.load Marshal.dump(@stack.last)
81
+ @state.invoke_xobject(label) do |xobj|
82
+ case xobj
83
+ when PDF::Reader::FormXObject then
84
+ xobj.walk(self)
85
+ end
264
86
  end
265
87
  end
266
88
 
267
- # return the current transformation matrix
268
- #
269
- def ctm
270
- state[:ctm]
271
- end
272
-
273
- def current_font
274
- dict = @font_stack.detect { |fonts|
275
- fonts.has_key?(state[:text_font])
276
- }
277
- dict ? dict[state[:text_font]] : nil
278
- end
279
-
280
- # private class for representing points on a cartesian plain. Used
281
- # to simplify maths.
282
- #
283
- class Point < Struct.new(:x, :y)
284
- def transform(trm, z)
285
- Point.new(
286
- (trm[0,0] * x) + (trm[1,0] * y) + (trm[2,0] * z),
287
- (trm[0,1] * x) + (trm[1,1] * y) + (trm[2,1] * z)
288
- )
289
- end
290
- end
291
89
  end
292
90
  end
293
91
  end
@@ -130,6 +130,7 @@ class PDF::Reader
130
130
  # reads a PDF name from the buffer and converts it to a Ruby Symbol
131
131
  def pdf_name
132
132
  tok = @buffer.token
133
+ tok = " " if tok == "" && RUBY_VERSION < "1.9"
133
134
  tok.gsub!(/#([A-Fa-f0-9]{2})/) do |match|
134
135
  match[1, 2].hex.chr
135
136
  end
@@ -8,12 +8,20 @@ module PDF
8
8
  module ResourceMethods
9
9
  # Returns a Hash of color spaces that are available to this page
10
10
  #
11
+ # NOTE: this method de-serialise objects from the underlying PDF
12
+ # with no caching. You will want to cache the results instead
13
+ # of calling it over and over.
14
+ #
11
15
  def color_spaces
12
16
  @objects.deref!(resources[:ColorSpace]) || {}
13
17
  end
14
18
 
15
19
  # Returns a Hash of fonts that are available to this page
16
20
  #
21
+ # NOTE: this method de-serialise objects from the underlying PDF
22
+ # with no caching. You will want to cache the results instead
23
+ # of calling it over and over.
24
+ #
17
25
  def fonts
18
26
  @objects.deref!(resources[:Font]) || {}
19
27
  end
@@ -21,36 +29,60 @@ module PDF
21
29
  # Returns a Hash of external graphic states that are available to this
22
30
  # page
23
31
  #
32
+ # NOTE: this method de-serialise objects from the underlying PDF
33
+ # with no caching. You will want to cache the results instead
34
+ # of calling it over and over.
35
+ #
24
36
  def graphic_states
25
37
  @objects.deref!(resources[:ExtGState]) || {}
26
38
  end
27
39
 
28
40
  # Returns a Hash of patterns that are available to this page
29
41
  #
42
+ # NOTE: this method de-serialise objects from the underlying PDF
43
+ # with no caching. You will want to cache the results instead
44
+ # of calling it over and over.
45
+ #
30
46
  def patterns
31
47
  @objects.deref!(resources[:Pattern]) || {}
32
48
  end
33
49
 
34
50
  # Returns an Array of procedure sets that are available to this page
35
51
  #
52
+ # NOTE: this method de-serialise objects from the underlying PDF
53
+ # with no caching. You will want to cache the results instead
54
+ # of calling it over and over.
55
+ #
36
56
  def procedure_sets
37
57
  @objects.deref!(resources[:ProcSet]) || []
38
58
  end
39
59
 
40
60
  # Returns a Hash of properties sets that are available to this page
41
61
  #
62
+ # NOTE: this method de-serialise objects from the underlying PDF
63
+ # with no caching. You will want to cache the results instead
64
+ # of calling it over and over.
65
+ #
42
66
  def properties
43
67
  @objects.deref!(resources[:Properties]) || {}
44
68
  end
45
69
 
46
70
  # Returns a Hash of shadings that are available to this page
47
71
  #
72
+ # NOTE: this method de-serialise objects from the underlying PDF
73
+ # with no caching. You will want to cache the results instead
74
+ # of calling it over and over.
75
+ #
48
76
  def shadings
49
77
  @objects.deref!(resources[:Shading]) || {}
50
78
  end
51
79
 
52
80
  # Returns a Hash of XObjects that are available to this page
53
81
  #
82
+ # NOTE: this method de-serialise objects from the underlying PDF
83
+ # with no caching. You will want to cache the results instead
84
+ # of calling it over and over.
85
+ #
54
86
  def xobjects
55
87
  @objects.deref!(resources[:XObject]) || {}
56
88
  end
@@ -124,7 +124,7 @@ class PDF::Reader
124
124
  generation = buf.token.to_i
125
125
  state = buf.token
126
126
 
127
- store(objid, generation, offset) if state == "n"
127
+ store(objid, generation, offset) if state == "n" && offset > 0
128
128
  objid += 1
129
129
  params.clear
130
130
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-16 00:00:00.000000000 Z
12
+ date: 2012-03-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &24844240 !ruby/object:Gem::Requirement
16
+ requirement: &20774400 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *24844240
24
+ version_requirements: *20774400
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: roodi
27
- requirement: &24843780 !ruby/object:Gem::Requirement
27
+ requirement: &20772820 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *24843780
35
+ version_requirements: *20772820
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rspec
38
- requirement: &24843280 !ruby/object:Gem::Requirement
38
+ requirement: &20771660 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '2.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *24843280
46
+ version_requirements: *20771660
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: ZenTest
49
- requirement: &24842780 !ruby/object:Gem::Requirement
49
+ requirement: &20770840 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 4.4.2
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *24842780
57
+ version_requirements: *20770840
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: Ascii85
60
- requirement: &24842320 !ruby/object:Gem::Requirement
60
+ requirement: &20770080 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.0.0
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *24842320
68
+ version_requirements: *20770080
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: ruby-rc4
71
- requirement: &24841940 !ruby/object:Gem::Requirement
71
+ requirement: &20769400 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *24841940
79
+ version_requirements: *20769400
80
80
  description: The PDF::Reader library implements a PDF parser conforming as much as
81
81
  possible to the PDF specification from Adobe
82
82
  email:
@@ -114,6 +114,7 @@ files:
114
114
  - lib/pdf/reader/filter.rb
115
115
  - lib/pdf/reader/object_hash.rb
116
116
  - lib/pdf/reader/stream.rb
117
+ - lib/pdf/reader/page_state.rb
117
118
  - lib/pdf/reader/standard_security_handler.rb
118
119
  - lib/pdf/reader/cmap.rb
119
120
  - lib/pdf/reader/form_xobject.rb