ruby-rtf 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +10 -0
- data/.infinity_test +24 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +101 -0
- data/LICENSE +18 -0
- data/README +12 -0
- data/Rakefile +20 -0
- data/bin/rtf_parse +112 -0
- data/lib/ruby-rtf.rb +11 -0
- data/lib/ruby-rtf/colour.rb +50 -0
- data/lib/ruby-rtf/document.rb +36 -0
- data/lib/ruby-rtf/font.rb +83 -0
- data/lib/ruby-rtf/invalid_document.rb +4 -0
- data/lib/ruby-rtf/parser.rb +492 -0
- data/lib/ruby-rtf/ruby-rtf.rb +7 -0
- data/lib/ruby-rtf/table.rb +72 -0
- data/lib/ruby-rtf/version.rb +5 -0
- data/ruby-rtf.gemspec +30 -0
- data/spec/colour_spec.rb +12 -0
- data/spec/document_spec.rb +38 -0
- data/spec/font_spec.rb +15 -0
- data/spec/parser_spec.rb +926 -0
- data/spec/spec_helper.rb +1 -0
- metadata +130 -0
@@ -0,0 +1,492 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module RubyRTF
|
4
|
+
# Handles the parsing of RTF content into an RubyRTF::Document
|
5
|
+
class Parser
|
6
|
+
attr_accessor :current_section
|
7
|
+
|
8
|
+
# @return [Array] The current formatting block to use as the basis for new sections
|
9
|
+
attr_reader :formatting_stack
|
10
|
+
|
11
|
+
attr_reader :doc
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
default_mods = {}
|
15
|
+
@formatting_stack = [default_mods]
|
16
|
+
@current_section = {:text => '', :modifiers => default_mods}
|
17
|
+
|
18
|
+
@seen = {}
|
19
|
+
|
20
|
+
@doc = RubyRTF::Document.new
|
21
|
+
@context_stack = []
|
22
|
+
end
|
23
|
+
|
24
|
+
# Parses a given string into an RubyRTF::Document
|
25
|
+
#
|
26
|
+
# @param src [String] The document to parse
|
27
|
+
# @return [RubyRTF::Document] The RTF document representing the provided @doc
|
28
|
+
# @raise [RubyRTF::InvalidDocument] Raised if the document is not valid RTF
|
29
|
+
def parse(src)
|
30
|
+
raise RubyRTF::InvalidDocument.new("Opening \\rtf1 missing") unless src =~ /\{\\rtf1/
|
31
|
+
|
32
|
+
current_pos = 0
|
33
|
+
len = src.length
|
34
|
+
|
35
|
+
group_level = 0
|
36
|
+
while (current_pos < len)
|
37
|
+
char = src[current_pos]
|
38
|
+
current_pos += 1
|
39
|
+
|
40
|
+
case(char)
|
41
|
+
when '\\' then
|
42
|
+
name, val, current_pos = parse_control(src, current_pos)
|
43
|
+
current_pos = handle_control(name, val, src, current_pos)
|
44
|
+
|
45
|
+
when '{' then
|
46
|
+
add_section!
|
47
|
+
group_level += 1
|
48
|
+
|
49
|
+
when '}' then
|
50
|
+
pop_formatting!
|
51
|
+
add_section!
|
52
|
+
group_level -= 1
|
53
|
+
|
54
|
+
when *["\r", "\n"] then ;
|
55
|
+
else current_section[:text] << char
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
unless current_section[:text].empty?
|
60
|
+
current_context << current_section
|
61
|
+
end
|
62
|
+
|
63
|
+
raise RubyRTF::InvalidDocument.new("Unbalanced {}s") unless group_level == 0
|
64
|
+
@doc
|
65
|
+
end
|
66
|
+
|
67
|
+
STOP_CHARS = [' ', '\\', '{', '}', "\r", "\n", ';']
|
68
|
+
|
69
|
+
# Parses a control switch
|
70
|
+
#
|
71
|
+
# @param src [String] The fragment to parse
|
72
|
+
# @param current_pos [Integer] The position in string the control starts at (after the \)
|
73
|
+
# @return [String, String|Integer, Integer] The name, optional control value and the new current position
|
74
|
+
#
|
75
|
+
# @api private
|
76
|
+
def parse_control(src, current_pos = 0)
|
77
|
+
ctrl = ''
|
78
|
+
val = nil
|
79
|
+
|
80
|
+
max_len = src.length
|
81
|
+
start = current_pos
|
82
|
+
|
83
|
+
# handle hex special
|
84
|
+
if src[current_pos] == "'"
|
85
|
+
val = src[(current_pos + 1), 2].hex.chr
|
86
|
+
current_pos += 3
|
87
|
+
return [:hex, val, current_pos]
|
88
|
+
end
|
89
|
+
|
90
|
+
while (true)
|
91
|
+
break if current_pos >= max_len
|
92
|
+
break if STOP_CHARS.include?(src[current_pos])
|
93
|
+
|
94
|
+
current_pos += 1
|
95
|
+
end
|
96
|
+
return [src[current_pos].to_sym, nil, current_pos + 1] if start == current_pos
|
97
|
+
|
98
|
+
contents = src[start, current_pos - start]
|
99
|
+
m = contents.match(/([\*a-z]+)(\-?\d+)?\*?/)
|
100
|
+
ctrl = m[1].to_sym
|
101
|
+
val = m[2].to_i unless m[2].nil?
|
102
|
+
|
103
|
+
# we advance past the optional space if present
|
104
|
+
current_pos += 1 if src[current_pos] == ' '
|
105
|
+
|
106
|
+
[ctrl, val, current_pos]
|
107
|
+
end
|
108
|
+
|
109
|
+
# Handle a given control
|
110
|
+
#
|
111
|
+
# @param name [Symbol] The control name
|
112
|
+
# @param val [Integer|nil] The controls value, or nil if non associated
|
113
|
+
# @param src [String] The source document
|
114
|
+
# @param current_pos [Integer] The current document position
|
115
|
+
# @return [Integer] The new current position
|
116
|
+
#
|
117
|
+
# @api private
|
118
|
+
def handle_control(name, val, src, current_pos)
|
119
|
+
case(name)
|
120
|
+
when :rtf then ;
|
121
|
+
when :deff then @doc.default_font = val
|
122
|
+
when *[:ansi, :mac, :pc, :pca] then @doc.character_set = name
|
123
|
+
when :fonttbl then current_pos = parse_font_table(src, current_pos)
|
124
|
+
when :colortbl then current_pos = parse_colour_table(src, current_pos)
|
125
|
+
when :stylesheet then current_pos = parse_stylesheet(src, current_pos)
|
126
|
+
when :info then current_pos = parse_info(src, current_pos)
|
127
|
+
when :* then current_pos = parse_skip(src, current_pos)
|
128
|
+
|
129
|
+
when :f then add_section!(:font => @doc.font_table[val])
|
130
|
+
|
131
|
+
# RTF font sizes are in half-points. divide by 2 to get points
|
132
|
+
when :fs then add_section!(:font_size => (val.to_f / 2.0))
|
133
|
+
when :b then add_section!(:bold => true)
|
134
|
+
when :i then add_section!(:italic => true)
|
135
|
+
when :ul then add_section!(:underline => true)
|
136
|
+
when :super then add_section!(:superscript => true)
|
137
|
+
when :sub then add_section!(:subscript => true)
|
138
|
+
when :strike then add_section!(:strikethrough => true)
|
139
|
+
when :scaps then add_section!(:smallcaps => true)
|
140
|
+
when :ql then add_section!(:justification => :left)
|
141
|
+
when :qr then add_section!(:justification => :right)
|
142
|
+
when :qj then add_section!(:justification => :full)
|
143
|
+
when :qc then add_section!(:justification => :center)
|
144
|
+
when :fi then add_section!(:first_line_indent => RubyRTF.twips_to_points(val))
|
145
|
+
when :li then add_section!(:left_indent => RubyRTF.twips_to_points(val))
|
146
|
+
when :ri then add_section!(:right_indent => RubyRTF.twips_to_points(val))
|
147
|
+
when :margl then add_section!(:left_margin => RubyRTF.twips_to_points(val))
|
148
|
+
when :margr then add_section!(:right_margin => RubyRTF.twips_to_points(val))
|
149
|
+
when :margt then add_section!(:top_margin => RubyRTF.twips_to_points(val))
|
150
|
+
when :margb then add_section!(:bottom_margin => RubyRTF.twips_to_points(val))
|
151
|
+
when :sb then add_section!(:space_before => RubyRTF.twips_to_points(val))
|
152
|
+
when :sa then add_section!(:space_after => RubyRTF.twips_to_points(val))
|
153
|
+
when :cf then add_section!(:foreground_colour => @doc.colour_table[val])
|
154
|
+
when :cb then add_section!(:background_colour => @doc.colour_table[val])
|
155
|
+
when :hex then current_section[:text] << val
|
156
|
+
when :u then
|
157
|
+
char = if val > 0 && val < 10_000
|
158
|
+
'\u' + ("0" * (4 - val.to_s.length)) + val.to_s
|
159
|
+
elsif val > 0
|
160
|
+
'\u' + ("%04x" % val)
|
161
|
+
else
|
162
|
+
'\u' + ("%04x" % (val + 65_536))
|
163
|
+
end
|
164
|
+
current_section[:text] << eval("\"#{char}\"")
|
165
|
+
|
166
|
+
when *[:rquote, :lquote] then add_modifier_section({name => true}, "'")
|
167
|
+
when *[:rdblquote, :ldblquote] then add_modifier_section({name => true}, '"')
|
168
|
+
|
169
|
+
when :'{' then current_section[:text] << "{"
|
170
|
+
when :'}' then current_section[:text] << "}"
|
171
|
+
when :'\\' then current_section[:text] << '\\'
|
172
|
+
|
173
|
+
when :~ then add_modifier_section({:nbsp => true}, " ")
|
174
|
+
|
175
|
+
when :tab then add_modifier_section({:tab => true}, "\t")
|
176
|
+
when :emdash then add_modifier_section({:emdash => true}, "--")
|
177
|
+
when :endash then add_modifier_section({:endash => true}, "-")
|
178
|
+
|
179
|
+
when *[:line, :"\n"] then add_modifier_section({:newline => true}, "\n")
|
180
|
+
when :"\r" then ;
|
181
|
+
|
182
|
+
when :par then add_modifier_section({:paragraph => true})
|
183
|
+
when *[:pard, :plain] then reset_current_section!
|
184
|
+
|
185
|
+
when :trowd then
|
186
|
+
table = nil
|
187
|
+
table = doc.sections.last[:modifiers][:table] if doc.sections.last && doc.sections.last[:modifiers][:table]
|
188
|
+
if table
|
189
|
+
table.add_row
|
190
|
+
else
|
191
|
+
table = RubyRTF::Table.new
|
192
|
+
|
193
|
+
if !current_section[:text].empty?
|
194
|
+
force_section!({:table => table})
|
195
|
+
else
|
196
|
+
current_section[:modifiers][:table] = table
|
197
|
+
pop_formatting!
|
198
|
+
end
|
199
|
+
|
200
|
+
force_section!
|
201
|
+
pop_formatting!
|
202
|
+
end
|
203
|
+
|
204
|
+
@context_stack.push(table.current_row.current_cell)
|
205
|
+
|
206
|
+
when :trgaph then
|
207
|
+
raise "trgaph outside of a table?" if !current_context.respond_to?(:table)
|
208
|
+
current_context.table.half_gap = RubyRTF.twips_to_points(val)
|
209
|
+
|
210
|
+
when :trleft then
|
211
|
+
raise "trleft outside of a table?" if !current_context.respond_to?(:table)
|
212
|
+
current_context.table.left_margin = RubyRTF.twips_to_points(val)
|
213
|
+
|
214
|
+
when :cellx then
|
215
|
+
raise "cellx outside of a table?" if !current_context.respond_to?(:row)
|
216
|
+
current_context.row.end_positions.push(RubyRTF.twips_to_points(val))
|
217
|
+
|
218
|
+
when :intbl then ;
|
219
|
+
|
220
|
+
when :cell then
|
221
|
+
pop_formatting!
|
222
|
+
|
223
|
+
table = current_context.table if current_context.respond_to?(:table)
|
224
|
+
|
225
|
+
force_section! #unless current_section[:text].empty?
|
226
|
+
reset_current_section!
|
227
|
+
|
228
|
+
@context_stack.pop
|
229
|
+
|
230
|
+
# only add a cell if the row isn't full already
|
231
|
+
if table && table.current_row && (table.current_row.cells.length < table.current_row.end_positions.length)
|
232
|
+
cell = table.current_row.add_cell
|
233
|
+
@context_stack.push(cell)
|
234
|
+
end
|
235
|
+
|
236
|
+
when :row then
|
237
|
+
if current_context.sections.empty?
|
238
|
+
# empty row
|
239
|
+
table = current_context.table
|
240
|
+
table.rows.pop
|
241
|
+
|
242
|
+
@context_stack.pop
|
243
|
+
end
|
244
|
+
|
245
|
+
else
|
246
|
+
unless @seen[name]
|
247
|
+
@seen[name] = true
|
248
|
+
STDERR.puts "Unknown control #{name.inspect} with #{val} at #{current_pos}"
|
249
|
+
end
|
250
|
+
end
|
251
|
+
current_pos
|
252
|
+
end
|
253
|
+
|
254
|
+
# Parses the font table group
|
255
|
+
#
|
256
|
+
# @param src [String] The source document
|
257
|
+
# @param current_pos [Integer] The starting position
|
258
|
+
# @return [Integer] The new current position
|
259
|
+
#
|
260
|
+
# @api private
|
261
|
+
def parse_font_table(src, current_pos)
|
262
|
+
group = 1
|
263
|
+
|
264
|
+
font = nil
|
265
|
+
in_extra = nil
|
266
|
+
|
267
|
+
while (true)
|
268
|
+
case(src[current_pos])
|
269
|
+
when '{' then
|
270
|
+
font = RubyRTF::Font.new if group == 1
|
271
|
+
in_extra = nil
|
272
|
+
|
273
|
+
group += 1
|
274
|
+
|
275
|
+
when '}' then
|
276
|
+
group -= 1
|
277
|
+
|
278
|
+
if group <= 1
|
279
|
+
font.cleanup_names
|
280
|
+
@doc.font_table[font.number] = font
|
281
|
+
end
|
282
|
+
|
283
|
+
in_extra = nil
|
284
|
+
|
285
|
+
break if group == 0
|
286
|
+
|
287
|
+
when '\\' then
|
288
|
+
ctrl, val, current_pos = parse_control(src, current_pos + 1)
|
289
|
+
|
290
|
+
font = RubyRTF::Font.new if font.nil?
|
291
|
+
|
292
|
+
case(ctrl)
|
293
|
+
when :f then font.number = val
|
294
|
+
when :fprq then font.pitch = val
|
295
|
+
when :fcharset then font.character_set = val
|
296
|
+
when *[:flomajor, :fhimajor, :fdbmajor, :fbimajor,
|
297
|
+
:flominor, :fhiminor, :fdbminor, :fbiminor] then
|
298
|
+
font.theme = ctrl.to_s[1..-1].to_sym
|
299
|
+
|
300
|
+
when *[:falt, :fname, :panose] then in_extra = ctrl
|
301
|
+
else
|
302
|
+
cmd = ctrl.to_s[1..-1].to_sym
|
303
|
+
if RubyRTF::Font::FAMILIES.include?(cmd)
|
304
|
+
font.family_command = cmd
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
# need to next as parse_control will leave current_pos at the
|
309
|
+
# next character already so current_pos += 1 below would move us too far
|
310
|
+
next
|
311
|
+
when *["\r", "\n"] then ;
|
312
|
+
else
|
313
|
+
case(in_extra)
|
314
|
+
when :falt then font.alternate_name << src[current_pos]
|
315
|
+
when :panose then font.panose << src[current_pos]
|
316
|
+
when :fname then font.non_tagged_name << src[current_pos]
|
317
|
+
when nil then font.name << src[current_pos]
|
318
|
+
end
|
319
|
+
end
|
320
|
+
current_pos += 1
|
321
|
+
end
|
322
|
+
|
323
|
+
current_pos
|
324
|
+
end
|
325
|
+
|
326
|
+
# Parses the colour table group
|
327
|
+
#
|
328
|
+
# @param src [String] The source document
|
329
|
+
# @param current_pos [Integer] The starting position
|
330
|
+
# @return [Integer] The new current position
|
331
|
+
#
|
332
|
+
# @api private
|
333
|
+
def parse_colour_table(src, current_pos)
|
334
|
+
if src[current_pos] == ';'
|
335
|
+
colour = RubyRTF::Colour.new
|
336
|
+
colour.use_default = true
|
337
|
+
|
338
|
+
@doc.colour_table << colour
|
339
|
+
|
340
|
+
current_pos += 1
|
341
|
+
end
|
342
|
+
|
343
|
+
colour = RubyRTF::Colour.new
|
344
|
+
|
345
|
+
while (true)
|
346
|
+
case(src[current_pos])
|
347
|
+
when '\\' then
|
348
|
+
ctrl, val, current_pos = parse_control(src, current_pos + 1)
|
349
|
+
|
350
|
+
case(ctrl)
|
351
|
+
when :red then colour.red = val
|
352
|
+
when :green then colour.green = val
|
353
|
+
when :blue then colour.blue = val
|
354
|
+
when :ctint then colour.tint = val
|
355
|
+
when :cshade then colour.shade = val
|
356
|
+
when *[:cmaindarkone, :cmainlightone, :cmaindarktwo, :cmainlighttwo, :caccentone,
|
357
|
+
:caccenttwo, :caccentthree, :caccentfour, :caccentfive, :caccentsix,
|
358
|
+
:chyperlink, :cfollowedhyperlink, :cbackgroundone, :ctextone,
|
359
|
+
:cbackgroundtwo, :ctexttwo] then
|
360
|
+
colour.theme = ctrl.to_s[1..-1].to_sym
|
361
|
+
end
|
362
|
+
|
363
|
+
when *["\r", "\n"] then current_pos += 1
|
364
|
+
when ';' then
|
365
|
+
@doc.colour_table << colour
|
366
|
+
|
367
|
+
colour = RubyRTF::Colour.new
|
368
|
+
current_pos += 1
|
369
|
+
|
370
|
+
when '}' then break
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
current_pos
|
375
|
+
end
|
376
|
+
|
377
|
+
# Parses the stylesheet group
|
378
|
+
#
|
379
|
+
# @param src [String] The source document
|
380
|
+
# @param current_pos [Integer] The starting position
|
381
|
+
# @return [Integer] The new current position
|
382
|
+
#
|
383
|
+
# @api private
|
384
|
+
def parse_stylesheet(src, current_pos)
|
385
|
+
group = 1
|
386
|
+
while (true)
|
387
|
+
case(src[current_pos])
|
388
|
+
when '{' then group += 1
|
389
|
+
when '}' then
|
390
|
+
group -= 1
|
391
|
+
break if group == 0
|
392
|
+
end
|
393
|
+
current_pos += 1
|
394
|
+
end
|
395
|
+
|
396
|
+
current_pos
|
397
|
+
end
|
398
|
+
|
399
|
+
# Parses the info group
|
400
|
+
#
|
401
|
+
# @param src [String] The source document
|
402
|
+
# @param current_pos [Integer] The starting position
|
403
|
+
# @return [Integer] The new current position
|
404
|
+
#
|
405
|
+
# @api private
|
406
|
+
def parse_info(src, current_pos)
|
407
|
+
group = 1
|
408
|
+
while (true)
|
409
|
+
case(src[current_pos])
|
410
|
+
when '{' then group += 1
|
411
|
+
when '}' then
|
412
|
+
group -= 1
|
413
|
+
break if group == 0
|
414
|
+
end
|
415
|
+
current_pos += 1
|
416
|
+
end
|
417
|
+
|
418
|
+
current_pos
|
419
|
+
end
|
420
|
+
|
421
|
+
# Parses a comment group
|
422
|
+
#
|
423
|
+
# @param src [String] The source document
|
424
|
+
# @param current_pos [Integer] The starting position
|
425
|
+
# @return [Integer] The new current position
|
426
|
+
#
|
427
|
+
# @api private
|
428
|
+
def parse_skip(src, current_pos)
|
429
|
+
group = 1
|
430
|
+
while (true)
|
431
|
+
case(src[current_pos])
|
432
|
+
when '{' then group += 1
|
433
|
+
when '}' then
|
434
|
+
group -= 1
|
435
|
+
break if group == 0
|
436
|
+
end
|
437
|
+
current_pos += 1
|
438
|
+
end
|
439
|
+
|
440
|
+
current_pos
|
441
|
+
end
|
442
|
+
|
443
|
+
def add_modifier_section(mods = {}, text = nil)
|
444
|
+
force_section!(mods, text)
|
445
|
+
pop_formatting!
|
446
|
+
|
447
|
+
force_section!
|
448
|
+
pop_formatting!
|
449
|
+
end
|
450
|
+
|
451
|
+
def add_section!(mods = {})
|
452
|
+
if current_section[:text].empty?
|
453
|
+
current_section[:modifiers].merge!(mods)
|
454
|
+
else
|
455
|
+
force_section!(mods)
|
456
|
+
end
|
457
|
+
end
|
458
|
+
|
459
|
+
# Keys that aren't inherited
|
460
|
+
BLACKLISTED = [:paragraph, :newline, :tab, :lquote, :rquote, :ldblquote, :rdblquote]
|
461
|
+
def force_section!(mods = {}, text = nil)
|
462
|
+
current_context << @current_section
|
463
|
+
|
464
|
+
formatting_stack.last.each_pair do |k, v|
|
465
|
+
next if BLACKLISTED.include?(k)
|
466
|
+
mods[k] = v
|
467
|
+
end
|
468
|
+
formatting_stack.push(mods)
|
469
|
+
|
470
|
+
@current_section = {:text => (text || ''), :modifiers => mods}
|
471
|
+
end
|
472
|
+
|
473
|
+
# Resets the current section to default formating
|
474
|
+
#
|
475
|
+
# @return [Nil]
|
476
|
+
def reset_current_section!
|
477
|
+
current_section[:modifiers].clear
|
478
|
+
end
|
479
|
+
|
480
|
+
def current_context
|
481
|
+
@context_stack.last || doc
|
482
|
+
end
|
483
|
+
|
484
|
+
# Pop the current top element off the formatting stack.
|
485
|
+
# @note This will not allow you to remove the defualt formatting parameters
|
486
|
+
#
|
487
|
+
# @return [Nil]
|
488
|
+
def pop_formatting!
|
489
|
+
formatting_stack.pop if formatting_stack.length > 1
|
490
|
+
end
|
491
|
+
end
|
492
|
+
end
|