ruby-rtf 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ module RubyRTF
2
+ # Raised if the parsed document is not valid RTF
3
+ class InvalidDocument < RuntimeError ; end
4
+ end
@@ -0,0 +1,492 @@
1
+ # encoding: utf-8
2
+
3
+ module RubyRTF
4
+ # Handles the parsing of RTF content into an RubyRTF::Document
5
+ class Parser
6
+ attr_accessor :current_section
7
+
8
+ # @return [Array] The current formatting block to use as the basis for new sections
9
+ attr_reader :formatting_stack
10
+
11
+ attr_reader :doc
12
+
13
+ def initialize
14
+ default_mods = {}
15
+ @formatting_stack = [default_mods]
16
+ @current_section = {:text => '', :modifiers => default_mods}
17
+
18
+ @seen = {}
19
+
20
+ @doc = RubyRTF::Document.new
21
+ @context_stack = []
22
+ end
23
+
24
+ # Parses a given string into an RubyRTF::Document
25
+ #
26
+ # @param src [String] The document to parse
27
+ # @return [RubyRTF::Document] The RTF document representing the provided @doc
28
+ # @raise [RubyRTF::InvalidDocument] Raised if the document is not valid RTF
29
+ def parse(src)
30
+ raise RubyRTF::InvalidDocument.new("Opening \\rtf1 missing") unless src =~ /\{\\rtf1/
31
+
32
+ current_pos = 0
33
+ len = src.length
34
+
35
+ group_level = 0
36
+ while (current_pos < len)
37
+ char = src[current_pos]
38
+ current_pos += 1
39
+
40
+ case(char)
41
+ when '\\' then
42
+ name, val, current_pos = parse_control(src, current_pos)
43
+ current_pos = handle_control(name, val, src, current_pos)
44
+
45
+ when '{' then
46
+ add_section!
47
+ group_level += 1
48
+
49
+ when '}' then
50
+ pop_formatting!
51
+ add_section!
52
+ group_level -= 1
53
+
54
+ when *["\r", "\n"] then ;
55
+ else current_section[:text] << char
56
+ end
57
+ end
58
+
59
+ unless current_section[:text].empty?
60
+ current_context << current_section
61
+ end
62
+
63
+ raise RubyRTF::InvalidDocument.new("Unbalanced {}s") unless group_level == 0
64
+ @doc
65
+ end
66
+
67
+ STOP_CHARS = [' ', '\\', '{', '}', "\r", "\n", ';']
68
+
69
+ # Parses a control switch
70
+ #
71
+ # @param src [String] The fragment to parse
72
+ # @param current_pos [Integer] The position in string the control starts at (after the \)
73
+ # @return [String, String|Integer, Integer] The name, optional control value and the new current position
74
+ #
75
+ # @api private
76
+ def parse_control(src, current_pos = 0)
77
+ ctrl = ''
78
+ val = nil
79
+
80
+ max_len = src.length
81
+ start = current_pos
82
+
83
+ # handle hex special
84
+ if src[current_pos] == "'"
85
+ val = src[(current_pos + 1), 2].hex.chr
86
+ current_pos += 3
87
+ return [:hex, val, current_pos]
88
+ end
89
+
90
+ while (true)
91
+ break if current_pos >= max_len
92
+ break if STOP_CHARS.include?(src[current_pos])
93
+
94
+ current_pos += 1
95
+ end
96
+ return [src[current_pos].to_sym, nil, current_pos + 1] if start == current_pos
97
+
98
+ contents = src[start, current_pos - start]
99
+ m = contents.match(/([\*a-z]+)(\-?\d+)?\*?/)
100
+ ctrl = m[1].to_sym
101
+ val = m[2].to_i unless m[2].nil?
102
+
103
+ # we advance past the optional space if present
104
+ current_pos += 1 if src[current_pos] == ' '
105
+
106
+ [ctrl, val, current_pos]
107
+ end
108
+
109
+ # Handle a given control
110
+ #
111
+ # @param name [Symbol] The control name
112
+ # @param val [Integer|nil] The controls value, or nil if non associated
113
+ # @param src [String] The source document
114
+ # @param current_pos [Integer] The current document position
115
+ # @return [Integer] The new current position
116
+ #
117
+ # @api private
118
+ def handle_control(name, val, src, current_pos)
119
+ case(name)
120
+ when :rtf then ;
121
+ when :deff then @doc.default_font = val
122
+ when *[:ansi, :mac, :pc, :pca] then @doc.character_set = name
123
+ when :fonttbl then current_pos = parse_font_table(src, current_pos)
124
+ when :colortbl then current_pos = parse_colour_table(src, current_pos)
125
+ when :stylesheet then current_pos = parse_stylesheet(src, current_pos)
126
+ when :info then current_pos = parse_info(src, current_pos)
127
+ when :* then current_pos = parse_skip(src, current_pos)
128
+
129
+ when :f then add_section!(:font => @doc.font_table[val])
130
+
131
+ # RTF font sizes are in half-points. divide by 2 to get points
132
+ when :fs then add_section!(:font_size => (val.to_f / 2.0))
133
+ when :b then add_section!(:bold => true)
134
+ when :i then add_section!(:italic => true)
135
+ when :ul then add_section!(:underline => true)
136
+ when :super then add_section!(:superscript => true)
137
+ when :sub then add_section!(:subscript => true)
138
+ when :strike then add_section!(:strikethrough => true)
139
+ when :scaps then add_section!(:smallcaps => true)
140
+ when :ql then add_section!(:justification => :left)
141
+ when :qr then add_section!(:justification => :right)
142
+ when :qj then add_section!(:justification => :full)
143
+ when :qc then add_section!(:justification => :center)
144
+ when :fi then add_section!(:first_line_indent => RubyRTF.twips_to_points(val))
145
+ when :li then add_section!(:left_indent => RubyRTF.twips_to_points(val))
146
+ when :ri then add_section!(:right_indent => RubyRTF.twips_to_points(val))
147
+ when :margl then add_section!(:left_margin => RubyRTF.twips_to_points(val))
148
+ when :margr then add_section!(:right_margin => RubyRTF.twips_to_points(val))
149
+ when :margt then add_section!(:top_margin => RubyRTF.twips_to_points(val))
150
+ when :margb then add_section!(:bottom_margin => RubyRTF.twips_to_points(val))
151
+ when :sb then add_section!(:space_before => RubyRTF.twips_to_points(val))
152
+ when :sa then add_section!(:space_after => RubyRTF.twips_to_points(val))
153
+ when :cf then add_section!(:foreground_colour => @doc.colour_table[val])
154
+ when :cb then add_section!(:background_colour => @doc.colour_table[val])
155
+ when :hex then current_section[:text] << val
156
+ when :u then
157
+ char = if val > 0 && val < 10_000
158
+ '\u' + ("0" * (4 - val.to_s.length)) + val.to_s
159
+ elsif val > 0
160
+ '\u' + ("%04x" % val)
161
+ else
162
+ '\u' + ("%04x" % (val + 65_536))
163
+ end
164
+ current_section[:text] << eval("\"#{char}\"")
165
+
166
+ when *[:rquote, :lquote] then add_modifier_section({name => true}, "'")
167
+ when *[:rdblquote, :ldblquote] then add_modifier_section({name => true}, '"')
168
+
169
+ when :'{' then current_section[:text] << "{"
170
+ when :'}' then current_section[:text] << "}"
171
+ when :'\\' then current_section[:text] << '\\'
172
+
173
+ when :~ then add_modifier_section({:nbsp => true}, " ")
174
+
175
+ when :tab then add_modifier_section({:tab => true}, "\t")
176
+ when :emdash then add_modifier_section({:emdash => true}, "--")
177
+ when :endash then add_modifier_section({:endash => true}, "-")
178
+
179
+ when *[:line, :"\n"] then add_modifier_section({:newline => true}, "\n")
180
+ when :"\r" then ;
181
+
182
+ when :par then add_modifier_section({:paragraph => true})
183
+ when *[:pard, :plain] then reset_current_section!
184
+
185
+ when :trowd then
186
+ table = nil
187
+ table = doc.sections.last[:modifiers][:table] if doc.sections.last && doc.sections.last[:modifiers][:table]
188
+ if table
189
+ table.add_row
190
+ else
191
+ table = RubyRTF::Table.new
192
+
193
+ if !current_section[:text].empty?
194
+ force_section!({:table => table})
195
+ else
196
+ current_section[:modifiers][:table] = table
197
+ pop_formatting!
198
+ end
199
+
200
+ force_section!
201
+ pop_formatting!
202
+ end
203
+
204
+ @context_stack.push(table.current_row.current_cell)
205
+
206
+ when :trgaph then
207
+ raise "trgaph outside of a table?" if !current_context.respond_to?(:table)
208
+ current_context.table.half_gap = RubyRTF.twips_to_points(val)
209
+
210
+ when :trleft then
211
+ raise "trleft outside of a table?" if !current_context.respond_to?(:table)
212
+ current_context.table.left_margin = RubyRTF.twips_to_points(val)
213
+
214
+ when :cellx then
215
+ raise "cellx outside of a table?" if !current_context.respond_to?(:row)
216
+ current_context.row.end_positions.push(RubyRTF.twips_to_points(val))
217
+
218
+ when :intbl then ;
219
+
220
+ when :cell then
221
+ pop_formatting!
222
+
223
+ table = current_context.table if current_context.respond_to?(:table)
224
+
225
+ force_section! #unless current_section[:text].empty?
226
+ reset_current_section!
227
+
228
+ @context_stack.pop
229
+
230
+ # only add a cell if the row isn't full already
231
+ if table && table.current_row && (table.current_row.cells.length < table.current_row.end_positions.length)
232
+ cell = table.current_row.add_cell
233
+ @context_stack.push(cell)
234
+ end
235
+
236
+ when :row then
237
+ if current_context.sections.empty?
238
+ # empty row
239
+ table = current_context.table
240
+ table.rows.pop
241
+
242
+ @context_stack.pop
243
+ end
244
+
245
+ else
246
+ unless @seen[name]
247
+ @seen[name] = true
248
+ STDERR.puts "Unknown control #{name.inspect} with #{val} at #{current_pos}"
249
+ end
250
+ end
251
+ current_pos
252
+ end
253
+
254
+ # Parses the font table group
255
+ #
256
+ # @param src [String] The source document
257
+ # @param current_pos [Integer] The starting position
258
+ # @return [Integer] The new current position
259
+ #
260
+ # @api private
261
+ def parse_font_table(src, current_pos)
262
+ group = 1
263
+
264
+ font = nil
265
+ in_extra = nil
266
+
267
+ while (true)
268
+ case(src[current_pos])
269
+ when '{' then
270
+ font = RubyRTF::Font.new if group == 1
271
+ in_extra = nil
272
+
273
+ group += 1
274
+
275
+ when '}' then
276
+ group -= 1
277
+
278
+ if group <= 1
279
+ font.cleanup_names
280
+ @doc.font_table[font.number] = font
281
+ end
282
+
283
+ in_extra = nil
284
+
285
+ break if group == 0
286
+
287
+ when '\\' then
288
+ ctrl, val, current_pos = parse_control(src, current_pos + 1)
289
+
290
+ font = RubyRTF::Font.new if font.nil?
291
+
292
+ case(ctrl)
293
+ when :f then font.number = val
294
+ when :fprq then font.pitch = val
295
+ when :fcharset then font.character_set = val
296
+ when *[:flomajor, :fhimajor, :fdbmajor, :fbimajor,
297
+ :flominor, :fhiminor, :fdbminor, :fbiminor] then
298
+ font.theme = ctrl.to_s[1..-1].to_sym
299
+
300
+ when *[:falt, :fname, :panose] then in_extra = ctrl
301
+ else
302
+ cmd = ctrl.to_s[1..-1].to_sym
303
+ if RubyRTF::Font::FAMILIES.include?(cmd)
304
+ font.family_command = cmd
305
+ end
306
+ end
307
+
308
+ # need to next as parse_control will leave current_pos at the
309
+ # next character already so current_pos += 1 below would move us too far
310
+ next
311
+ when *["\r", "\n"] then ;
312
+ else
313
+ case(in_extra)
314
+ when :falt then font.alternate_name << src[current_pos]
315
+ when :panose then font.panose << src[current_pos]
316
+ when :fname then font.non_tagged_name << src[current_pos]
317
+ when nil then font.name << src[current_pos]
318
+ end
319
+ end
320
+ current_pos += 1
321
+ end
322
+
323
+ current_pos
324
+ end
325
+
326
+ # Parses the colour table group
327
+ #
328
+ # @param src [String] The source document
329
+ # @param current_pos [Integer] The starting position
330
+ # @return [Integer] The new current position
331
+ #
332
+ # @api private
333
+ def parse_colour_table(src, current_pos)
334
+ if src[current_pos] == ';'
335
+ colour = RubyRTF::Colour.new
336
+ colour.use_default = true
337
+
338
+ @doc.colour_table << colour
339
+
340
+ current_pos += 1
341
+ end
342
+
343
+ colour = RubyRTF::Colour.new
344
+
345
+ while (true)
346
+ case(src[current_pos])
347
+ when '\\' then
348
+ ctrl, val, current_pos = parse_control(src, current_pos + 1)
349
+
350
+ case(ctrl)
351
+ when :red then colour.red = val
352
+ when :green then colour.green = val
353
+ when :blue then colour.blue = val
354
+ when :ctint then colour.tint = val
355
+ when :cshade then colour.shade = val
356
+ when *[:cmaindarkone, :cmainlightone, :cmaindarktwo, :cmainlighttwo, :caccentone,
357
+ :caccenttwo, :caccentthree, :caccentfour, :caccentfive, :caccentsix,
358
+ :chyperlink, :cfollowedhyperlink, :cbackgroundone, :ctextone,
359
+ :cbackgroundtwo, :ctexttwo] then
360
+ colour.theme = ctrl.to_s[1..-1].to_sym
361
+ end
362
+
363
+ when *["\r", "\n"] then current_pos += 1
364
+ when ';' then
365
+ @doc.colour_table << colour
366
+
367
+ colour = RubyRTF::Colour.new
368
+ current_pos += 1
369
+
370
+ when '}' then break
371
+ end
372
+ end
373
+
374
+ current_pos
375
+ end
376
+
377
+ # Parses the stylesheet group
378
+ #
379
+ # @param src [String] The source document
380
+ # @param current_pos [Integer] The starting position
381
+ # @return [Integer] The new current position
382
+ #
383
+ # @api private
384
+ def parse_stylesheet(src, current_pos)
385
+ group = 1
386
+ while (true)
387
+ case(src[current_pos])
388
+ when '{' then group += 1
389
+ when '}' then
390
+ group -= 1
391
+ break if group == 0
392
+ end
393
+ current_pos += 1
394
+ end
395
+
396
+ current_pos
397
+ end
398
+
399
+ # Parses the info group
400
+ #
401
+ # @param src [String] The source document
402
+ # @param current_pos [Integer] The starting position
403
+ # @return [Integer] The new current position
404
+ #
405
+ # @api private
406
+ def parse_info(src, current_pos)
407
+ group = 1
408
+ while (true)
409
+ case(src[current_pos])
410
+ when '{' then group += 1
411
+ when '}' then
412
+ group -= 1
413
+ break if group == 0
414
+ end
415
+ current_pos += 1
416
+ end
417
+
418
+ current_pos
419
+ end
420
+
421
+ # Parses a comment group
422
+ #
423
+ # @param src [String] The source document
424
+ # @param current_pos [Integer] The starting position
425
+ # @return [Integer] The new current position
426
+ #
427
+ # @api private
428
+ def parse_skip(src, current_pos)
429
+ group = 1
430
+ while (true)
431
+ case(src[current_pos])
432
+ when '{' then group += 1
433
+ when '}' then
434
+ group -= 1
435
+ break if group == 0
436
+ end
437
+ current_pos += 1
438
+ end
439
+
440
+ current_pos
441
+ end
442
+
443
+ def add_modifier_section(mods = {}, text = nil)
444
+ force_section!(mods, text)
445
+ pop_formatting!
446
+
447
+ force_section!
448
+ pop_formatting!
449
+ end
450
+
451
+ def add_section!(mods = {})
452
+ if current_section[:text].empty?
453
+ current_section[:modifiers].merge!(mods)
454
+ else
455
+ force_section!(mods)
456
+ end
457
+ end
458
+
459
+ # Keys that aren't inherited
460
+ BLACKLISTED = [:paragraph, :newline, :tab, :lquote, :rquote, :ldblquote, :rdblquote]
461
+ def force_section!(mods = {}, text = nil)
462
+ current_context << @current_section
463
+
464
+ formatting_stack.last.each_pair do |k, v|
465
+ next if BLACKLISTED.include?(k)
466
+ mods[k] = v
467
+ end
468
+ formatting_stack.push(mods)
469
+
470
+ @current_section = {:text => (text || ''), :modifiers => mods}
471
+ end
472
+
473
+ # Resets the current section to default formating
474
+ #
475
+ # @return [Nil]
476
+ def reset_current_section!
477
+ current_section[:modifiers].clear
478
+ end
479
+
480
+ def current_context
481
+ @context_stack.last || doc
482
+ end
483
+
484
+ # Pop the current top element off the formatting stack.
485
+ # @note This will not allow you to remove the defualt formatting parameters
486
+ #
487
+ # @return [Nil]
488
+ def pop_formatting!
489
+ formatting_stack.pop if formatting_stack.length > 1
490
+ end
491
+ end
492
+ end