ruby-rtf 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +10 -0
- data/.infinity_test +24 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +101 -0
- data/LICENSE +18 -0
- data/README +12 -0
- data/Rakefile +20 -0
- data/bin/rtf_parse +112 -0
- data/lib/ruby-rtf.rb +11 -0
- data/lib/ruby-rtf/colour.rb +50 -0
- data/lib/ruby-rtf/document.rb +36 -0
- data/lib/ruby-rtf/font.rb +83 -0
- data/lib/ruby-rtf/invalid_document.rb +4 -0
- data/lib/ruby-rtf/parser.rb +492 -0
- data/lib/ruby-rtf/ruby-rtf.rb +7 -0
- data/lib/ruby-rtf/table.rb +72 -0
- data/lib/ruby-rtf/version.rb +5 -0
- data/ruby-rtf.gemspec +30 -0
- data/spec/colour_spec.rb +12 -0
- data/spec/document_spec.rb +38 -0
- data/spec/font_spec.rb +15 -0
- data/spec/parser_spec.rb +926 -0
- data/spec/spec_helper.rb +1 -0
- metadata +130 -0
@@ -0,0 +1,492 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module RubyRTF
|
4
|
+
# Handles the parsing of RTF content into an RubyRTF::Document
|
5
|
+
class Parser
|
6
|
+
attr_accessor :current_section
|
7
|
+
|
8
|
+
# @return [Array] The current formatting block to use as the basis for new sections
|
9
|
+
attr_reader :formatting_stack
|
10
|
+
|
11
|
+
attr_reader :doc
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
default_mods = {}
|
15
|
+
@formatting_stack = [default_mods]
|
16
|
+
@current_section = {:text => '', :modifiers => default_mods}
|
17
|
+
|
18
|
+
@seen = {}
|
19
|
+
|
20
|
+
@doc = RubyRTF::Document.new
|
21
|
+
@context_stack = []
|
22
|
+
end
|
23
|
+
|
24
|
+
# Parses a given string into an RubyRTF::Document
|
25
|
+
#
|
26
|
+
# @param src [String] The document to parse
|
27
|
+
# @return [RubyRTF::Document] The RTF document representing the provided @doc
|
28
|
+
# @raise [RubyRTF::InvalidDocument] Raised if the document is not valid RTF
|
29
|
+
def parse(src)
|
30
|
+
raise RubyRTF::InvalidDocument.new("Opening \\rtf1 missing") unless src =~ /\{\\rtf1/
|
31
|
+
|
32
|
+
current_pos = 0
|
33
|
+
len = src.length
|
34
|
+
|
35
|
+
group_level = 0
|
36
|
+
while (current_pos < len)
|
37
|
+
char = src[current_pos]
|
38
|
+
current_pos += 1
|
39
|
+
|
40
|
+
case(char)
|
41
|
+
when '\\' then
|
42
|
+
name, val, current_pos = parse_control(src, current_pos)
|
43
|
+
current_pos = handle_control(name, val, src, current_pos)
|
44
|
+
|
45
|
+
when '{' then
|
46
|
+
add_section!
|
47
|
+
group_level += 1
|
48
|
+
|
49
|
+
when '}' then
|
50
|
+
pop_formatting!
|
51
|
+
add_section!
|
52
|
+
group_level -= 1
|
53
|
+
|
54
|
+
when *["\r", "\n"] then ;
|
55
|
+
else current_section[:text] << char
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
unless current_section[:text].empty?
|
60
|
+
current_context << current_section
|
61
|
+
end
|
62
|
+
|
63
|
+
raise RubyRTF::InvalidDocument.new("Unbalanced {}s") unless group_level == 0
|
64
|
+
@doc
|
65
|
+
end
|
66
|
+
|
67
|
+
STOP_CHARS = [' ', '\\', '{', '}', "\r", "\n", ';']
|
68
|
+
|
69
|
+
# Parses a control switch
|
70
|
+
#
|
71
|
+
# @param src [String] The fragment to parse
|
72
|
+
# @param current_pos [Integer] The position in string the control starts at (after the \)
|
73
|
+
# @return [String, String|Integer, Integer] The name, optional control value and the new current position
|
74
|
+
#
|
75
|
+
# @api private
|
76
|
+
def parse_control(src, current_pos = 0)
|
77
|
+
ctrl = ''
|
78
|
+
val = nil
|
79
|
+
|
80
|
+
max_len = src.length
|
81
|
+
start = current_pos
|
82
|
+
|
83
|
+
# handle hex special
|
84
|
+
if src[current_pos] == "'"
|
85
|
+
val = src[(current_pos + 1), 2].hex.chr
|
86
|
+
current_pos += 3
|
87
|
+
return [:hex, val, current_pos]
|
88
|
+
end
|
89
|
+
|
90
|
+
while (true)
|
91
|
+
break if current_pos >= max_len
|
92
|
+
break if STOP_CHARS.include?(src[current_pos])
|
93
|
+
|
94
|
+
current_pos += 1
|
95
|
+
end
|
96
|
+
return [src[current_pos].to_sym, nil, current_pos + 1] if start == current_pos
|
97
|
+
|
98
|
+
contents = src[start, current_pos - start]
|
99
|
+
m = contents.match(/([\*a-z]+)(\-?\d+)?\*?/)
|
100
|
+
ctrl = m[1].to_sym
|
101
|
+
val = m[2].to_i unless m[2].nil?
|
102
|
+
|
103
|
+
# we advance past the optional space if present
|
104
|
+
current_pos += 1 if src[current_pos] == ' '
|
105
|
+
|
106
|
+
[ctrl, val, current_pos]
|
107
|
+
end
|
108
|
+
|
109
|
+
# Handle a given control
|
110
|
+
#
|
111
|
+
# @param name [Symbol] The control name
|
112
|
+
# @param val [Integer|nil] The controls value, or nil if non associated
|
113
|
+
# @param src [String] The source document
|
114
|
+
# @param current_pos [Integer] The current document position
|
115
|
+
# @return [Integer] The new current position
|
116
|
+
#
|
117
|
+
# @api private
|
118
|
+
def handle_control(name, val, src, current_pos)
|
119
|
+
case(name)
|
120
|
+
when :rtf then ;
|
121
|
+
when :deff then @doc.default_font = val
|
122
|
+
when *[:ansi, :mac, :pc, :pca] then @doc.character_set = name
|
123
|
+
when :fonttbl then current_pos = parse_font_table(src, current_pos)
|
124
|
+
when :colortbl then current_pos = parse_colour_table(src, current_pos)
|
125
|
+
when :stylesheet then current_pos = parse_stylesheet(src, current_pos)
|
126
|
+
when :info then current_pos = parse_info(src, current_pos)
|
127
|
+
when :* then current_pos = parse_skip(src, current_pos)
|
128
|
+
|
129
|
+
when :f then add_section!(:font => @doc.font_table[val])
|
130
|
+
|
131
|
+
# RTF font sizes are in half-points. divide by 2 to get points
|
132
|
+
when :fs then add_section!(:font_size => (val.to_f / 2.0))
|
133
|
+
when :b then add_section!(:bold => true)
|
134
|
+
when :i then add_section!(:italic => true)
|
135
|
+
when :ul then add_section!(:underline => true)
|
136
|
+
when :super then add_section!(:superscript => true)
|
137
|
+
when :sub then add_section!(:subscript => true)
|
138
|
+
when :strike then add_section!(:strikethrough => true)
|
139
|
+
when :scaps then add_section!(:smallcaps => true)
|
140
|
+
when :ql then add_section!(:justification => :left)
|
141
|
+
when :qr then add_section!(:justification => :right)
|
142
|
+
when :qj then add_section!(:justification => :full)
|
143
|
+
when :qc then add_section!(:justification => :center)
|
144
|
+
when :fi then add_section!(:first_line_indent => RubyRTF.twips_to_points(val))
|
145
|
+
when :li then add_section!(:left_indent => RubyRTF.twips_to_points(val))
|
146
|
+
when :ri then add_section!(:right_indent => RubyRTF.twips_to_points(val))
|
147
|
+
when :margl then add_section!(:left_margin => RubyRTF.twips_to_points(val))
|
148
|
+
when :margr then add_section!(:right_margin => RubyRTF.twips_to_points(val))
|
149
|
+
when :margt then add_section!(:top_margin => RubyRTF.twips_to_points(val))
|
150
|
+
when :margb then add_section!(:bottom_margin => RubyRTF.twips_to_points(val))
|
151
|
+
when :sb then add_section!(:space_before => RubyRTF.twips_to_points(val))
|
152
|
+
when :sa then add_section!(:space_after => RubyRTF.twips_to_points(val))
|
153
|
+
when :cf then add_section!(:foreground_colour => @doc.colour_table[val])
|
154
|
+
when :cb then add_section!(:background_colour => @doc.colour_table[val])
|
155
|
+
when :hex then current_section[:text] << val
|
156
|
+
when :u then
|
157
|
+
char = if val > 0 && val < 10_000
|
158
|
+
'\u' + ("0" * (4 - val.to_s.length)) + val.to_s
|
159
|
+
elsif val > 0
|
160
|
+
'\u' + ("%04x" % val)
|
161
|
+
else
|
162
|
+
'\u' + ("%04x" % (val + 65_536))
|
163
|
+
end
|
164
|
+
current_section[:text] << eval("\"#{char}\"")
|
165
|
+
|
166
|
+
when *[:rquote, :lquote] then add_modifier_section({name => true}, "'")
|
167
|
+
when *[:rdblquote, :ldblquote] then add_modifier_section({name => true}, '"')
|
168
|
+
|
169
|
+
when :'{' then current_section[:text] << "{"
|
170
|
+
when :'}' then current_section[:text] << "}"
|
171
|
+
when :'\\' then current_section[:text] << '\\'
|
172
|
+
|
173
|
+
when :~ then add_modifier_section({:nbsp => true}, " ")
|
174
|
+
|
175
|
+
when :tab then add_modifier_section({:tab => true}, "\t")
|
176
|
+
when :emdash then add_modifier_section({:emdash => true}, "--")
|
177
|
+
when :endash then add_modifier_section({:endash => true}, "-")
|
178
|
+
|
179
|
+
when *[:line, :"\n"] then add_modifier_section({:newline => true}, "\n")
|
180
|
+
when :"\r" then ;
|
181
|
+
|
182
|
+
when :par then add_modifier_section({:paragraph => true})
|
183
|
+
when *[:pard, :plain] then reset_current_section!
|
184
|
+
|
185
|
+
when :trowd then
|
186
|
+
table = nil
|
187
|
+
table = doc.sections.last[:modifiers][:table] if doc.sections.last && doc.sections.last[:modifiers][:table]
|
188
|
+
if table
|
189
|
+
table.add_row
|
190
|
+
else
|
191
|
+
table = RubyRTF::Table.new
|
192
|
+
|
193
|
+
if !current_section[:text].empty?
|
194
|
+
force_section!({:table => table})
|
195
|
+
else
|
196
|
+
current_section[:modifiers][:table] = table
|
197
|
+
pop_formatting!
|
198
|
+
end
|
199
|
+
|
200
|
+
force_section!
|
201
|
+
pop_formatting!
|
202
|
+
end
|
203
|
+
|
204
|
+
@context_stack.push(table.current_row.current_cell)
|
205
|
+
|
206
|
+
when :trgaph then
|
207
|
+
raise "trgaph outside of a table?" if !current_context.respond_to?(:table)
|
208
|
+
current_context.table.half_gap = RubyRTF.twips_to_points(val)
|
209
|
+
|
210
|
+
when :trleft then
|
211
|
+
raise "trleft outside of a table?" if !current_context.respond_to?(:table)
|
212
|
+
current_context.table.left_margin = RubyRTF.twips_to_points(val)
|
213
|
+
|
214
|
+
when :cellx then
|
215
|
+
raise "cellx outside of a table?" if !current_context.respond_to?(:row)
|
216
|
+
current_context.row.end_positions.push(RubyRTF.twips_to_points(val))
|
217
|
+
|
218
|
+
when :intbl then ;
|
219
|
+
|
220
|
+
when :cell then
|
221
|
+
pop_formatting!
|
222
|
+
|
223
|
+
table = current_context.table if current_context.respond_to?(:table)
|
224
|
+
|
225
|
+
force_section! #unless current_section[:text].empty?
|
226
|
+
reset_current_section!
|
227
|
+
|
228
|
+
@context_stack.pop
|
229
|
+
|
230
|
+
# only add a cell if the row isn't full already
|
231
|
+
if table && table.current_row && (table.current_row.cells.length < table.current_row.end_positions.length)
|
232
|
+
cell = table.current_row.add_cell
|
233
|
+
@context_stack.push(cell)
|
234
|
+
end
|
235
|
+
|
236
|
+
when :row then
|
237
|
+
if current_context.sections.empty?
|
238
|
+
# empty row
|
239
|
+
table = current_context.table
|
240
|
+
table.rows.pop
|
241
|
+
|
242
|
+
@context_stack.pop
|
243
|
+
end
|
244
|
+
|
245
|
+
else
|
246
|
+
unless @seen[name]
|
247
|
+
@seen[name] = true
|
248
|
+
STDERR.puts "Unknown control #{name.inspect} with #{val} at #{current_pos}"
|
249
|
+
end
|
250
|
+
end
|
251
|
+
current_pos
|
252
|
+
end
|
253
|
+
|
254
|
+
# Parses the font table group
|
255
|
+
#
|
256
|
+
# @param src [String] The source document
|
257
|
+
# @param current_pos [Integer] The starting position
|
258
|
+
# @return [Integer] The new current position
|
259
|
+
#
|
260
|
+
# @api private
|
261
|
+
def parse_font_table(src, current_pos)
|
262
|
+
group = 1
|
263
|
+
|
264
|
+
font = nil
|
265
|
+
in_extra = nil
|
266
|
+
|
267
|
+
while (true)
|
268
|
+
case(src[current_pos])
|
269
|
+
when '{' then
|
270
|
+
font = RubyRTF::Font.new if group == 1
|
271
|
+
in_extra = nil
|
272
|
+
|
273
|
+
group += 1
|
274
|
+
|
275
|
+
when '}' then
|
276
|
+
group -= 1
|
277
|
+
|
278
|
+
if group <= 1
|
279
|
+
font.cleanup_names
|
280
|
+
@doc.font_table[font.number] = font
|
281
|
+
end
|
282
|
+
|
283
|
+
in_extra = nil
|
284
|
+
|
285
|
+
break if group == 0
|
286
|
+
|
287
|
+
when '\\' then
|
288
|
+
ctrl, val, current_pos = parse_control(src, current_pos + 1)
|
289
|
+
|
290
|
+
font = RubyRTF::Font.new if font.nil?
|
291
|
+
|
292
|
+
case(ctrl)
|
293
|
+
when :f then font.number = val
|
294
|
+
when :fprq then font.pitch = val
|
295
|
+
when :fcharset then font.character_set = val
|
296
|
+
when *[:flomajor, :fhimajor, :fdbmajor, :fbimajor,
|
297
|
+
:flominor, :fhiminor, :fdbminor, :fbiminor] then
|
298
|
+
font.theme = ctrl.to_s[1..-1].to_sym
|
299
|
+
|
300
|
+
when *[:falt, :fname, :panose] then in_extra = ctrl
|
301
|
+
else
|
302
|
+
cmd = ctrl.to_s[1..-1].to_sym
|
303
|
+
if RubyRTF::Font::FAMILIES.include?(cmd)
|
304
|
+
font.family_command = cmd
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
# need to next as parse_control will leave current_pos at the
|
309
|
+
# next character already so current_pos += 1 below would move us too far
|
310
|
+
next
|
311
|
+
when *["\r", "\n"] then ;
|
312
|
+
else
|
313
|
+
case(in_extra)
|
314
|
+
when :falt then font.alternate_name << src[current_pos]
|
315
|
+
when :panose then font.panose << src[current_pos]
|
316
|
+
when :fname then font.non_tagged_name << src[current_pos]
|
317
|
+
when nil then font.name << src[current_pos]
|
318
|
+
end
|
319
|
+
end
|
320
|
+
current_pos += 1
|
321
|
+
end
|
322
|
+
|
323
|
+
current_pos
|
324
|
+
end
|
325
|
+
|
326
|
+
# Parses the colour table group
|
327
|
+
#
|
328
|
+
# @param src [String] The source document
|
329
|
+
# @param current_pos [Integer] The starting position
|
330
|
+
# @return [Integer] The new current position
|
331
|
+
#
|
332
|
+
# @api private
|
333
|
+
def parse_colour_table(src, current_pos)
|
334
|
+
if src[current_pos] == ';'
|
335
|
+
colour = RubyRTF::Colour.new
|
336
|
+
colour.use_default = true
|
337
|
+
|
338
|
+
@doc.colour_table << colour
|
339
|
+
|
340
|
+
current_pos += 1
|
341
|
+
end
|
342
|
+
|
343
|
+
colour = RubyRTF::Colour.new
|
344
|
+
|
345
|
+
while (true)
|
346
|
+
case(src[current_pos])
|
347
|
+
when '\\' then
|
348
|
+
ctrl, val, current_pos = parse_control(src, current_pos + 1)
|
349
|
+
|
350
|
+
case(ctrl)
|
351
|
+
when :red then colour.red = val
|
352
|
+
when :green then colour.green = val
|
353
|
+
when :blue then colour.blue = val
|
354
|
+
when :ctint then colour.tint = val
|
355
|
+
when :cshade then colour.shade = val
|
356
|
+
when *[:cmaindarkone, :cmainlightone, :cmaindarktwo, :cmainlighttwo, :caccentone,
|
357
|
+
:caccenttwo, :caccentthree, :caccentfour, :caccentfive, :caccentsix,
|
358
|
+
:chyperlink, :cfollowedhyperlink, :cbackgroundone, :ctextone,
|
359
|
+
:cbackgroundtwo, :ctexttwo] then
|
360
|
+
colour.theme = ctrl.to_s[1..-1].to_sym
|
361
|
+
end
|
362
|
+
|
363
|
+
when *["\r", "\n"] then current_pos += 1
|
364
|
+
when ';' then
|
365
|
+
@doc.colour_table << colour
|
366
|
+
|
367
|
+
colour = RubyRTF::Colour.new
|
368
|
+
current_pos += 1
|
369
|
+
|
370
|
+
when '}' then break
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
current_pos
|
375
|
+
end
|
376
|
+
|
377
|
+
# Parses the stylesheet group
|
378
|
+
#
|
379
|
+
# @param src [String] The source document
|
380
|
+
# @param current_pos [Integer] The starting position
|
381
|
+
# @return [Integer] The new current position
|
382
|
+
#
|
383
|
+
# @api private
|
384
|
+
def parse_stylesheet(src, current_pos)
|
385
|
+
group = 1
|
386
|
+
while (true)
|
387
|
+
case(src[current_pos])
|
388
|
+
when '{' then group += 1
|
389
|
+
when '}' then
|
390
|
+
group -= 1
|
391
|
+
break if group == 0
|
392
|
+
end
|
393
|
+
current_pos += 1
|
394
|
+
end
|
395
|
+
|
396
|
+
current_pos
|
397
|
+
end
|
398
|
+
|
399
|
+
# Parses the info group
|
400
|
+
#
|
401
|
+
# @param src [String] The source document
|
402
|
+
# @param current_pos [Integer] The starting position
|
403
|
+
# @return [Integer] The new current position
|
404
|
+
#
|
405
|
+
# @api private
|
406
|
+
def parse_info(src, current_pos)
|
407
|
+
group = 1
|
408
|
+
while (true)
|
409
|
+
case(src[current_pos])
|
410
|
+
when '{' then group += 1
|
411
|
+
when '}' then
|
412
|
+
group -= 1
|
413
|
+
break if group == 0
|
414
|
+
end
|
415
|
+
current_pos += 1
|
416
|
+
end
|
417
|
+
|
418
|
+
current_pos
|
419
|
+
end
|
420
|
+
|
421
|
+
# Parses a comment group
|
422
|
+
#
|
423
|
+
# @param src [String] The source document
|
424
|
+
# @param current_pos [Integer] The starting position
|
425
|
+
# @return [Integer] The new current position
|
426
|
+
#
|
427
|
+
# @api private
|
428
|
+
def parse_skip(src, current_pos)
|
429
|
+
group = 1
|
430
|
+
while (true)
|
431
|
+
case(src[current_pos])
|
432
|
+
when '{' then group += 1
|
433
|
+
when '}' then
|
434
|
+
group -= 1
|
435
|
+
break if group == 0
|
436
|
+
end
|
437
|
+
current_pos += 1
|
438
|
+
end
|
439
|
+
|
440
|
+
current_pos
|
441
|
+
end
|
442
|
+
|
443
|
+
def add_modifier_section(mods = {}, text = nil)
|
444
|
+
force_section!(mods, text)
|
445
|
+
pop_formatting!
|
446
|
+
|
447
|
+
force_section!
|
448
|
+
pop_formatting!
|
449
|
+
end
|
450
|
+
|
451
|
+
def add_section!(mods = {})
|
452
|
+
if current_section[:text].empty?
|
453
|
+
current_section[:modifiers].merge!(mods)
|
454
|
+
else
|
455
|
+
force_section!(mods)
|
456
|
+
end
|
457
|
+
end
|
458
|
+
|
459
|
+
# Keys that aren't inherited
|
460
|
+
BLACKLISTED = [:paragraph, :newline, :tab, :lquote, :rquote, :ldblquote, :rdblquote]
|
461
|
+
def force_section!(mods = {}, text = nil)
|
462
|
+
current_context << @current_section
|
463
|
+
|
464
|
+
formatting_stack.last.each_pair do |k, v|
|
465
|
+
next if BLACKLISTED.include?(k)
|
466
|
+
mods[k] = v
|
467
|
+
end
|
468
|
+
formatting_stack.push(mods)
|
469
|
+
|
470
|
+
@current_section = {:text => (text || ''), :modifiers => mods}
|
471
|
+
end
|
472
|
+
|
473
|
+
# Resets the current section to default formating
|
474
|
+
#
|
475
|
+
# @return [Nil]
|
476
|
+
def reset_current_section!
|
477
|
+
current_section[:modifiers].clear
|
478
|
+
end
|
479
|
+
|
480
|
+
def current_context
|
481
|
+
@context_stack.last || doc
|
482
|
+
end
|
483
|
+
|
484
|
+
# Pop the current top element off the formatting stack.
|
485
|
+
# @note This will not allow you to remove the defualt formatting parameters
|
486
|
+
#
|
487
|
+
# @return [Nil]
|
488
|
+
def pop_formatting!
|
489
|
+
formatting_stack.pop if formatting_stack.length > 1
|
490
|
+
end
|
491
|
+
end
|
492
|
+
end
|