ruby-rtf 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ module RubyRTF
2
+ # Raised if the parsed document is not valid RTF
3
+ class InvalidDocument < RuntimeError ; end
4
+ end
@@ -0,0 +1,492 @@
1
+ # encoding: utf-8
2
+
3
+ module RubyRTF
4
+ # Handles the parsing of RTF content into an RubyRTF::Document
5
+ class Parser
6
+ attr_accessor :current_section
7
+
8
+ # @return [Array] The current formatting block to use as the basis for new sections
9
+ attr_reader :formatting_stack
10
+
11
+ attr_reader :doc
12
+
13
+ def initialize
14
+ default_mods = {}
15
+ @formatting_stack = [default_mods]
16
+ @current_section = {:text => '', :modifiers => default_mods}
17
+
18
+ @seen = {}
19
+
20
+ @doc = RubyRTF::Document.new
21
+ @context_stack = []
22
+ end
23
+
24
+ # Parses a given string into an RubyRTF::Document
25
+ #
26
+ # @param src [String] The document to parse
27
+ # @return [RubyRTF::Document] The RTF document representing the provided @doc
28
+ # @raise [RubyRTF::InvalidDocument] Raised if the document is not valid RTF
29
+ def parse(src)
30
+ raise RubyRTF::InvalidDocument.new("Opening \\rtf1 missing") unless src =~ /\{\\rtf1/
31
+
32
+ current_pos = 0
33
+ len = src.length
34
+
35
+ group_level = 0
36
+ while (current_pos < len)
37
+ char = src[current_pos]
38
+ current_pos += 1
39
+
40
+ case(char)
41
+ when '\\' then
42
+ name, val, current_pos = parse_control(src, current_pos)
43
+ current_pos = handle_control(name, val, src, current_pos)
44
+
45
+ when '{' then
46
+ add_section!
47
+ group_level += 1
48
+
49
+ when '}' then
50
+ pop_formatting!
51
+ add_section!
52
+ group_level -= 1
53
+
54
+ when *["\r", "\n"] then ;
55
+ else current_section[:text] << char
56
+ end
57
+ end
58
+
59
+ unless current_section[:text].empty?
60
+ current_context << current_section
61
+ end
62
+
63
+ raise RubyRTF::InvalidDocument.new("Unbalanced {}s") unless group_level == 0
64
+ @doc
65
+ end
66
+
67
+ STOP_CHARS = [' ', '\\', '{', '}', "\r", "\n", ';']
68
+
69
+ # Parses a control switch
70
+ #
71
+ # @param src [String] The fragment to parse
72
+ # @param current_pos [Integer] The position in string the control starts at (after the \)
73
+ # @return [String, String|Integer, Integer] The name, optional control value and the new current position
74
+ #
75
+ # @api private
76
+ def parse_control(src, current_pos = 0)
77
+ ctrl = ''
78
+ val = nil
79
+
80
+ max_len = src.length
81
+ start = current_pos
82
+
83
+ # handle hex special
84
+ if src[current_pos] == "'"
85
+ val = src[(current_pos + 1), 2].hex.chr
86
+ current_pos += 3
87
+ return [:hex, val, current_pos]
88
+ end
89
+
90
+ while (true)
91
+ break if current_pos >= max_len
92
+ break if STOP_CHARS.include?(src[current_pos])
93
+
94
+ current_pos += 1
95
+ end
96
+ return [src[current_pos].to_sym, nil, current_pos + 1] if start == current_pos
97
+
98
+ contents = src[start, current_pos - start]
99
+ m = contents.match(/([\*a-z]+)(\-?\d+)?\*?/)
100
+ ctrl = m[1].to_sym
101
+ val = m[2].to_i unless m[2].nil?
102
+
103
+ # we advance past the optional space if present
104
+ current_pos += 1 if src[current_pos] == ' '
105
+
106
+ [ctrl, val, current_pos]
107
+ end
108
+
109
+ # Handle a given control
110
+ #
111
+ # @param name [Symbol] The control name
112
+ # @param val [Integer|nil] The controls value, or nil if non associated
113
+ # @param src [String] The source document
114
+ # @param current_pos [Integer] The current document position
115
+ # @return [Integer] The new current position
116
+ #
117
+ # @api private
118
+ def handle_control(name, val, src, current_pos)
119
+ case(name)
120
+ when :rtf then ;
121
+ when :deff then @doc.default_font = val
122
+ when *[:ansi, :mac, :pc, :pca] then @doc.character_set = name
123
+ when :fonttbl then current_pos = parse_font_table(src, current_pos)
124
+ when :colortbl then current_pos = parse_colour_table(src, current_pos)
125
+ when :stylesheet then current_pos = parse_stylesheet(src, current_pos)
126
+ when :info then current_pos = parse_info(src, current_pos)
127
+ when :* then current_pos = parse_skip(src, current_pos)
128
+
129
+ when :f then add_section!(:font => @doc.font_table[val])
130
+
131
+ # RTF font sizes are in half-points. divide by 2 to get points
132
+ when :fs then add_section!(:font_size => (val.to_f / 2.0))
133
+ when :b then add_section!(:bold => true)
134
+ when :i then add_section!(:italic => true)
135
+ when :ul then add_section!(:underline => true)
136
+ when :super then add_section!(:superscript => true)
137
+ when :sub then add_section!(:subscript => true)
138
+ when :strike then add_section!(:strikethrough => true)
139
+ when :scaps then add_section!(:smallcaps => true)
140
+ when :ql then add_section!(:justification => :left)
141
+ when :qr then add_section!(:justification => :right)
142
+ when :qj then add_section!(:justification => :full)
143
+ when :qc then add_section!(:justification => :center)
144
+ when :fi then add_section!(:first_line_indent => RubyRTF.twips_to_points(val))
145
+ when :li then add_section!(:left_indent => RubyRTF.twips_to_points(val))
146
+ when :ri then add_section!(:right_indent => RubyRTF.twips_to_points(val))
147
+ when :margl then add_section!(:left_margin => RubyRTF.twips_to_points(val))
148
+ when :margr then add_section!(:right_margin => RubyRTF.twips_to_points(val))
149
+ when :margt then add_section!(:top_margin => RubyRTF.twips_to_points(val))
150
+ when :margb then add_section!(:bottom_margin => RubyRTF.twips_to_points(val))
151
+ when :sb then add_section!(:space_before => RubyRTF.twips_to_points(val))
152
+ when :sa then add_section!(:space_after => RubyRTF.twips_to_points(val))
153
+ when :cf then add_section!(:foreground_colour => @doc.colour_table[val])
154
+ when :cb then add_section!(:background_colour => @doc.colour_table[val])
155
+ when :hex then current_section[:text] << val
156
+ when :u then
157
+ char = if val > 0 && val < 10_000
158
+ '\u' + ("0" * (4 - val.to_s.length)) + val.to_s
159
+ elsif val > 0
160
+ '\u' + ("%04x" % val)
161
+ else
162
+ '\u' + ("%04x" % (val + 65_536))
163
+ end
164
+ current_section[:text] << eval("\"#{char}\"")
165
+
166
+ when *[:rquote, :lquote] then add_modifier_section({name => true}, "'")
167
+ when *[:rdblquote, :ldblquote] then add_modifier_section({name => true}, '"')
168
+
169
+ when :'{' then current_section[:text] << "{"
170
+ when :'}' then current_section[:text] << "}"
171
+ when :'\\' then current_section[:text] << '\\'
172
+
173
+ when :~ then add_modifier_section({:nbsp => true}, " ")
174
+
175
+ when :tab then add_modifier_section({:tab => true}, "\t")
176
+ when :emdash then add_modifier_section({:emdash => true}, "--")
177
+ when :endash then add_modifier_section({:endash => true}, "-")
178
+
179
+ when *[:line, :"\n"] then add_modifier_section({:newline => true}, "\n")
180
+ when :"\r" then ;
181
+
182
+ when :par then add_modifier_section({:paragraph => true})
183
+ when *[:pard, :plain] then reset_current_section!
184
+
185
+ when :trowd then
186
+ table = nil
187
+ table = doc.sections.last[:modifiers][:table] if doc.sections.last && doc.sections.last[:modifiers][:table]
188
+ if table
189
+ table.add_row
190
+ else
191
+ table = RubyRTF::Table.new
192
+
193
+ if !current_section[:text].empty?
194
+ force_section!({:table => table})
195
+ else
196
+ current_section[:modifiers][:table] = table
197
+ pop_formatting!
198
+ end
199
+
200
+ force_section!
201
+ pop_formatting!
202
+ end
203
+
204
+ @context_stack.push(table.current_row.current_cell)
205
+
206
+ when :trgaph then
207
+ raise "trgaph outside of a table?" if !current_context.respond_to?(:table)
208
+ current_context.table.half_gap = RubyRTF.twips_to_points(val)
209
+
210
+ when :trleft then
211
+ raise "trleft outside of a table?" if !current_context.respond_to?(:table)
212
+ current_context.table.left_margin = RubyRTF.twips_to_points(val)
213
+
214
+ when :cellx then
215
+ raise "cellx outside of a table?" if !current_context.respond_to?(:row)
216
+ current_context.row.end_positions.push(RubyRTF.twips_to_points(val))
217
+
218
+ when :intbl then ;
219
+
220
+ when :cell then
221
+ pop_formatting!
222
+
223
+ table = current_context.table if current_context.respond_to?(:table)
224
+
225
+ force_section! #unless current_section[:text].empty?
226
+ reset_current_section!
227
+
228
+ @context_stack.pop
229
+
230
+ # only add a cell if the row isn't full already
231
+ if table && table.current_row && (table.current_row.cells.length < table.current_row.end_positions.length)
232
+ cell = table.current_row.add_cell
233
+ @context_stack.push(cell)
234
+ end
235
+
236
+ when :row then
237
+ if current_context.sections.empty?
238
+ # empty row
239
+ table = current_context.table
240
+ table.rows.pop
241
+
242
+ @context_stack.pop
243
+ end
244
+
245
+ else
246
+ unless @seen[name]
247
+ @seen[name] = true
248
+ STDERR.puts "Unknown control #{name.inspect} with #{val} at #{current_pos}"
249
+ end
250
+ end
251
+ current_pos
252
+ end
253
+
254
+ # Parses the font table group
255
+ #
256
+ # @param src [String] The source document
257
+ # @param current_pos [Integer] The starting position
258
+ # @return [Integer] The new current position
259
+ #
260
+ # @api private
261
+ def parse_font_table(src, current_pos)
262
+ group = 1
263
+
264
+ font = nil
265
+ in_extra = nil
266
+
267
+ while (true)
268
+ case(src[current_pos])
269
+ when '{' then
270
+ font = RubyRTF::Font.new if group == 1
271
+ in_extra = nil
272
+
273
+ group += 1
274
+
275
+ when '}' then
276
+ group -= 1
277
+
278
+ if group <= 1
279
+ font.cleanup_names
280
+ @doc.font_table[font.number] = font
281
+ end
282
+
283
+ in_extra = nil
284
+
285
+ break if group == 0
286
+
287
+ when '\\' then
288
+ ctrl, val, current_pos = parse_control(src, current_pos + 1)
289
+
290
+ font = RubyRTF::Font.new if font.nil?
291
+
292
+ case(ctrl)
293
+ when :f then font.number = val
294
+ when :fprq then font.pitch = val
295
+ when :fcharset then font.character_set = val
296
+ when *[:flomajor, :fhimajor, :fdbmajor, :fbimajor,
297
+ :flominor, :fhiminor, :fdbminor, :fbiminor] then
298
+ font.theme = ctrl.to_s[1..-1].to_sym
299
+
300
+ when *[:falt, :fname, :panose] then in_extra = ctrl
301
+ else
302
+ cmd = ctrl.to_s[1..-1].to_sym
303
+ if RubyRTF::Font::FAMILIES.include?(cmd)
304
+ font.family_command = cmd
305
+ end
306
+ end
307
+
308
+ # need to next as parse_control will leave current_pos at the
309
+ # next character already so current_pos += 1 below would move us too far
310
+ next
311
+ when *["\r", "\n"] then ;
312
+ else
313
+ case(in_extra)
314
+ when :falt then font.alternate_name << src[current_pos]
315
+ when :panose then font.panose << src[current_pos]
316
+ when :fname then font.non_tagged_name << src[current_pos]
317
+ when nil then font.name << src[current_pos]
318
+ end
319
+ end
320
+ current_pos += 1
321
+ end
322
+
323
+ current_pos
324
+ end
325
+
326
+ # Parses the colour table group
327
+ #
328
+ # @param src [String] The source document
329
+ # @param current_pos [Integer] The starting position
330
+ # @return [Integer] The new current position
331
+ #
332
+ # @api private
333
+ def parse_colour_table(src, current_pos)
334
+ if src[current_pos] == ';'
335
+ colour = RubyRTF::Colour.new
336
+ colour.use_default = true
337
+
338
+ @doc.colour_table << colour
339
+
340
+ current_pos += 1
341
+ end
342
+
343
+ colour = RubyRTF::Colour.new
344
+
345
+ while (true)
346
+ case(src[current_pos])
347
+ when '\\' then
348
+ ctrl, val, current_pos = parse_control(src, current_pos + 1)
349
+
350
+ case(ctrl)
351
+ when :red then colour.red = val
352
+ when :green then colour.green = val
353
+ when :blue then colour.blue = val
354
+ when :ctint then colour.tint = val
355
+ when :cshade then colour.shade = val
356
+ when *[:cmaindarkone, :cmainlightone, :cmaindarktwo, :cmainlighttwo, :caccentone,
357
+ :caccenttwo, :caccentthree, :caccentfour, :caccentfive, :caccentsix,
358
+ :chyperlink, :cfollowedhyperlink, :cbackgroundone, :ctextone,
359
+ :cbackgroundtwo, :ctexttwo] then
360
+ colour.theme = ctrl.to_s[1..-1].to_sym
361
+ end
362
+
363
+ when *["\r", "\n"] then current_pos += 1
364
+ when ';' then
365
+ @doc.colour_table << colour
366
+
367
+ colour = RubyRTF::Colour.new
368
+ current_pos += 1
369
+
370
+ when '}' then break
371
+ end
372
+ end
373
+
374
+ current_pos
375
+ end
376
+
377
+ # Parses the stylesheet group
378
+ #
379
+ # @param src [String] The source document
380
+ # @param current_pos [Integer] The starting position
381
+ # @return [Integer] The new current position
382
+ #
383
+ # @api private
384
+ def parse_stylesheet(src, current_pos)
385
+ group = 1
386
+ while (true)
387
+ case(src[current_pos])
388
+ when '{' then group += 1
389
+ when '}' then
390
+ group -= 1
391
+ break if group == 0
392
+ end
393
+ current_pos += 1
394
+ end
395
+
396
+ current_pos
397
+ end
398
+
399
+ # Parses the info group
400
+ #
401
+ # @param src [String] The source document
402
+ # @param current_pos [Integer] The starting position
403
+ # @return [Integer] The new current position
404
+ #
405
+ # @api private
406
+ def parse_info(src, current_pos)
407
+ group = 1
408
+ while (true)
409
+ case(src[current_pos])
410
+ when '{' then group += 1
411
+ when '}' then
412
+ group -= 1
413
+ break if group == 0
414
+ end
415
+ current_pos += 1
416
+ end
417
+
418
+ current_pos
419
+ end
420
+
421
+ # Parses a comment group
422
+ #
423
+ # @param src [String] The source document
424
+ # @param current_pos [Integer] The starting position
425
+ # @return [Integer] The new current position
426
+ #
427
+ # @api private
428
+ def parse_skip(src, current_pos)
429
+ group = 1
430
+ while (true)
431
+ case(src[current_pos])
432
+ when '{' then group += 1
433
+ when '}' then
434
+ group -= 1
435
+ break if group == 0
436
+ end
437
+ current_pos += 1
438
+ end
439
+
440
+ current_pos
441
+ end
442
+
443
+ def add_modifier_section(mods = {}, text = nil)
444
+ force_section!(mods, text)
445
+ pop_formatting!
446
+
447
+ force_section!
448
+ pop_formatting!
449
+ end
450
+
451
+ def add_section!(mods = {})
452
+ if current_section[:text].empty?
453
+ current_section[:modifiers].merge!(mods)
454
+ else
455
+ force_section!(mods)
456
+ end
457
+ end
458
+
459
+ # Keys that aren't inherited
460
+ BLACKLISTED = [:paragraph, :newline, :tab, :lquote, :rquote, :ldblquote, :rdblquote]
461
+ def force_section!(mods = {}, text = nil)
462
+ current_context << @current_section
463
+
464
+ formatting_stack.last.each_pair do |k, v|
465
+ next if BLACKLISTED.include?(k)
466
+ mods[k] = v
467
+ end
468
+ formatting_stack.push(mods)
469
+
470
+ @current_section = {:text => (text || ''), :modifiers => mods}
471
+ end
472
+
473
+ # Resets the current section to default formating
474
+ #
475
+ # @return [Nil]
476
+ def reset_current_section!
477
+ current_section[:modifiers].clear
478
+ end
479
+
480
+ def current_context
481
+ @context_stack.last || doc
482
+ end
483
+
484
+ # Pop the current top element off the formatting stack.
485
+ # @note This will not allow you to remove the defualt formatting parameters
486
+ #
487
+ # @return [Nil]
488
+ def pop_formatting!
489
+ formatting_stack.pop if formatting_stack.length > 1
490
+ end
491
+ end
492
+ end