rbbcode 0.1.11 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,430 +0,0 @@
1
- require 'pp'
2
-
3
- module RbbCode
4
- module CharCodes
5
- CR_CODE = 13
6
- LF_CODE = 10
7
-
8
- L_BRACK_CODE = 91
9
- R_BRACK_CODE = 93
10
- SLASH_CODE = 47
11
-
12
- LOWER_A_CODE = 97
13
- LOWER_Z_CODE = 122
14
-
15
- UPPER_A_CODE = 65
16
- UPPER_Z_CODE = 90
17
- end
18
-
19
- class Node
20
- def << (child)
21
- @children << child
22
- end
23
-
24
- attr_accessor :children
25
-
26
- def initialize(parent)
27
- @parent = parent
28
- @children = []
29
- end
30
-
31
- attr_accessor :parent
32
- end
33
-
34
- class RootNode < Node
35
- def initialize
36
- @children = []
37
- end
38
- end
39
-
40
- class TextNode < Node
41
-
42
- undef_method '<<'.to_sym
43
- undef_method :children
44
-
45
- def initialize(parent, text)
46
- @parent = parent
47
- @text = text
48
- end
49
-
50
- attr_accessor :text
51
-
52
- def to_bb_code
53
- @text
54
- end
55
- end
56
-
57
- class TagNode < Node
58
- def self.from_opening_bb_code(parent, bb_code)
59
- # Remove colon if leave_tag
60
- if bb_code[1,1] == ':'
61
- bb_code = "[#{bb_code[2..-1]}"
62
- end
63
-
64
- if equal_index = bb_code.index('=')
65
- tag_name = bb_code[1, equal_index - 1]
66
- value = bb_code[(equal_index + 1)..-2]
67
- else
68
- tag_name = bb_code[1..-2]
69
- value = nil
70
- end
71
- new(parent, tag_name, value)
72
- end
73
-
74
- def initialize(parent, tag_name, value = nil)
75
- super(parent)
76
- @tag_name = tag_name
77
- @value = value
78
- @preformatted = false
79
- end
80
-
81
- def inner_bb_code
82
- @children.inject('') do |output, child|
83
- output << child.to_bb_code
84
- end
85
- end
86
-
87
- def preformat!
88
- @preformatted = true
89
- end
90
-
91
- def preformatted?
92
- @preformatted
93
- end
94
-
95
- def to_bb_code
96
- if @value.nil?
97
- output = "[#{@tag_name}]"
98
- else
99
- output = "[#{@tag_name}=#{@value}]"
100
- end
101
- output << inner_bb_code << "[/#{@tag_name}]"
102
- end
103
-
104
- attr_reader :tag_name
105
-
106
- attr_reader :value
107
- end
108
-
109
- class TreeMaker
110
- include CharCodes
111
-
112
- def initialize(schema)
113
- @schema = schema
114
- end
115
-
116
- def make_tree(str)
117
- delete_junk_breaks!(
118
- delete_invalid_empty_tags!(
119
- parse_str(str)
120
- )
121
- )
122
- end
123
-
124
- protected
125
-
126
- def ancestor_list(parent)
127
- ancestors = []
128
- while parent.is_a?(TagNode)
129
- ancestors << parent.tag_name
130
- parent = parent.parent
131
- end
132
- ancestors
133
- end
134
-
135
- def break_type(break_str)
136
- if break_str.length > 2
137
- :paragraph
138
- elsif break_str.length == 1
139
- :line_break
140
- elsif break_str == "\r\n"
141
- :line_break
142
- else
143
- :paragraph
144
- end
145
- end
146
-
147
- # Delete empty paragraphs and line breaks at the end of block-level elements
148
- def delete_junk_breaks!(node)
149
- node.children.reject! do |child|
150
- if child.is_a?(TagNode)
151
- if !child.children.empty?
152
- delete_junk_breaks!(child)
153
- false
154
- elsif child.tag_name == @schema.paragraph_tag_name
155
- # It's an empty paragraph tag
156
- true
157
- #elsif not node.is_a?(RootNode) and @schema.block_level?(node.tag_name) and child.tag_name == @schema.line_break_tag_name and node.children.last == child
158
- elsif @schema.block_level?(node.tag_name) and child.tag_name == @schema.line_break_tag_name and node.children.last == child
159
- # It's a line break a the end of the block-level element
160
- true
161
- else
162
- false
163
- end
164
- else
165
- false
166
- end
167
- end
168
- node
169
- end
170
-
171
- # The schema defines some tags that may not be empty. This method removes any such empty tags from the tree.
172
- def delete_invalid_empty_tags!(node)
173
- node.children.reject! do |child|
174
- if child.is_a?(TagNode)
175
- if child.children.empty? and !@schema.tag_may_be_empty?(child.tag_name)
176
- true
177
- else
178
- delete_invalid_empty_tags!(child)
179
- false
180
- end
181
- end
182
- end
183
- node
184
- end
185
-
186
- def parse_str(str)
187
- tree = RootNode.new
188
- # Initially, we open a paragraph tag. If it turns out that the first thing we encounter
189
- # is a block-level element, no problem: we'll be calling promote_block_level_elements
190
- # later anyway.
191
- current_parent = TagNode.new(tree, @schema.paragraph_tag_name)
192
- tree << current_parent
193
- current_token = ''
194
- current_token_type = :unknown
195
- # It may seem naive to use each_byte. What about Unicode? So long as we're using UTF-8, none of the
196
- # BB Code control characters will appear as part of multibyte characters, because UTF-8 doesn't allow
197
- # the range 0x00-0x7F in multibyte chars. As for the multibyte characters themselves, yes, they will
198
- # be temporarily split up as we append bytes onto the text nodes. But as of yet, I haven't found
199
- # a way that this could cause a problem. The bytes always come back together again. (It would be a problem
200
- # if we tried to count the characters for some reason, but we don't do that.)
201
-
202
- # AQ: #each_byte doesn't work with ruby 1.9+, but luckily we have #each_char
203
- split_method = :each_byte
204
- split_method = :each_char if RUBY_VERSION.split('.')[1] > "8"
205
-
206
- block = Proc.new do |char|
207
- if split_method == :each_char
208
- # ruby 1.9
209
- char_code = char.ord
210
- else
211
- # ruby 1.8
212
- char_code = char
213
- char = char_code.chr
214
- end
215
-
216
- case current_token_type
217
- when :unknown
218
- case char
219
- when '['
220
- current_token_type = :possible_tag
221
- current_token << char
222
- when "\r", "\n"
223
- current_token_type = :break
224
- current_token << char
225
- else
226
- if current_parent.is_a?(RootNode)
227
- new_paragraph_tag = TagNode.new(current_parent, @schema.paragraph_tag_name)
228
- current_parent << new_paragraph_tag
229
- current_parent = new_paragraph_tag
230
- end
231
- current_token_type = :text
232
- current_token << char
233
- end
234
- when :text
235
- case char
236
- when "["
237
- if @schema.text_valid_in_context?(*ancestor_list(current_parent))
238
- current_parent << TextNode.new(current_parent, current_token)
239
- end
240
- current_token = '['
241
- current_token_type = :possible_tag
242
- when "\r", "\n"
243
- if @schema.text_valid_in_context?(*ancestor_list(current_parent))
244
- current_parent << TextNode.new(current_parent, current_token)
245
- end
246
- current_token = char
247
- current_token_type = :break
248
- else
249
- current_token << char
250
- end
251
- when :break
252
- if char_code == CR_CODE or char_code == LF_CODE
253
- current_token << char
254
- else
255
- if break_type(current_token) == :paragraph
256
- while current_parent.is_a?(TagNode) and !@schema.block_level?(current_parent.tag_name) and current_parent.tag_name != @schema.paragraph_tag_name
257
- current_parent = current_parent.parent
258
- end
259
- # The current parent might be a paragraph tag, in which case we should move up one more level.
260
- # Otherwise, it might be a block-level element or a root node, in which case we should not move up.
261
- if current_parent.is_a?(TagNode) and current_parent.tag_name == @schema.paragraph_tag_name
262
- current_parent = current_parent.parent
263
- end
264
- # Regardless of whether the current parent is a block-level element, we need to open a new paragraph.
265
- new_paragraph_node = TagNode.new(current_parent, @schema.paragraph_tag_name)
266
- current_parent << new_paragraph_node
267
- current_parent = new_paragraph_node
268
- else # line break
269
- prev_sibling = current_parent.children.last
270
- if prev_sibling.is_a?(TagNode) and @schema.block_level?(prev_sibling.tag_name)
271
- # Although the input only contains a single newline, we should
272
- # interpret is as the start of a new paragraph, because the last
273
- # thing we encountered was a block-level element.
274
- new_paragraph_node = TagNode.new(current_parent, @schema.paragraph_tag_name)
275
- current_parent << new_paragraph_node
276
- current_parent = new_paragraph_node
277
- elsif @schema.tag(@schema.line_break_tag_name).valid_in_context?(*ancestor_list(current_parent))
278
- current_parent << TagNode.new(current_parent, @schema.line_break_tag_name)
279
- end
280
- end
281
- if char == '['
282
- current_token = '['
283
- current_token_type = :possible_tag
284
- else
285
- current_token = char
286
- current_token_type = :text
287
- end
288
- end
289
- when :possible_tag
290
- case char
291
- when '['
292
- current_parent << TextNode.new(current_parent, '[')
293
- # No need to reset current_token or current_token_type, because now we're in a new possible tag
294
- when '/'
295
- current_token_type = :closing_tag
296
- current_token << '/'
297
- when ':'
298
- current_token_type = :leaf_tag
299
- current_token << ':'
300
- else
301
- if tag_name_char?(char_code)
302
- current_token_type = :opening_tag
303
- current_token << char
304
- else
305
- current_token_type = :text
306
- current_token << char
307
- end
308
- end
309
- when :opening_tag
310
- if tag_name_char?(char_code) or char == '='
311
- current_token << char
312
- elsif char == ']'
313
- current_token << ']'
314
- tag_node = TagNode.from_opening_bb_code(current_parent, current_token)
315
- if @schema.block_level?(tag_node.tag_name) and current_parent.tag_name == @schema.paragraph_tag_name
316
- # If there is a line break before this, it's superfluous and should be deleted
317
- prev_sibling = current_parent.children.last
318
- if prev_sibling.is_a?(TagNode) and prev_sibling.tag_name == @schema.line_break_tag_name
319
- current_parent.children.pop
320
- end
321
- # Promote a block-level element
322
- current_parent = current_parent.parent
323
- tag_node.parent = current_parent
324
- current_parent << tag_node
325
- current_parent = tag_node
326
- # If all of this results in empty paragraph tags, no worries: they will be deleted later.
327
- elsif tag_node.tag_name == current_parent.tag_name and @schema.close_twins?(tag_node.tag_name)
328
- # The current tag and the tag we're now opening are of the same type, and this kind of tag auto-closes its twins
329
- # (E.g. * tags in the default config.)
330
- current_parent.parent << tag_node
331
- current_parent = tag_node
332
- elsif @schema.tag(tag_node.tag_name).valid_in_context?(*ancestor_list(current_parent))
333
- current_parent << tag_node
334
- current_parent = tag_node
335
- end # else, don't do anything--the tag is invalid and will be ignored
336
- if @schema.preformatted?(current_parent.tag_name)
337
- current_token_type = :preformatted
338
- current_parent.preformat!
339
- else
340
- current_token_type = :unknown
341
- end
342
- current_token = ''
343
- elsif char == "\r" or char == "\n"
344
- current_parent << TextNode.new(current_parent, current_token)
345
- current_token = char
346
- current_token_type = :break
347
- elsif current_token.include?('=')
348
- current_token << char
349
- else
350
- current_token_type = :text
351
- current_token << char
352
- end
353
- when :leaf_tag
354
- if tag_name_char?(char_code) or char == '='
355
- current_token << char
356
- elsif char == ']'
357
- current_token << ']'
358
- tag_node = TagNode.from_opening_bb_code(current_parent, current_token)
359
-
360
- if @schema.tag(tag_node.tag_name).valid_in_context?(*ancestor_list(current_parent))
361
- current_parent.children << tag_node
362
- current_token_type = :unknown
363
- current_token = ''
364
- else
365
- current_token_type = :text
366
- end
367
- end
368
- when :closing_tag
369
- if tag_name_char?(char_code)
370
- current_token << char
371
- elsif char == ']'
372
- original_parent = current_parent
373
- while current_parent.is_a?(TagNode) and current_parent.tag_name != current_token[2..-1]
374
- current_parent = current_parent.parent
375
- end
376
- if current_parent.is_a?(TagNode)
377
- current_parent = current_parent.parent
378
- else # current_parent is a RootNode
379
- # we made it to the top of the tree, and never found the tag to close
380
- # so we'll just ignore the closing tag altogether
381
- current_parent = original_parent
382
- end
383
- current_token_type = :unknown
384
- current_token = ''
385
- elsif char == "\r" or char == "\n"
386
- current_parent << TextNode.new(current_parent, current_token)
387
- current_token = char
388
- current_token_type = :break
389
- else
390
- current_token_type = :text
391
- current_token << char
392
- end
393
- when :preformatted
394
- if char == '['
395
- current_parent << TextNode.new(current_parent, current_token)
396
- current_token_type = :possible_preformatted_end
397
- current_token = '['
398
- else
399
- current_token << char
400
- end
401
- when :possible_preformatted_end
402
- current_token << char
403
- if current_token == "[/#{current_parent.tag_name}]" # Did we just see the closing tag for this preformatted element?
404
- current_parent = current_parent.parent
405
- current_token_type = :unknown
406
- current_token = ''
407
- elsif char == ']' # We're at the end of this opening/closing tag, and it's not the closing tag for the preformatted element
408
- current_parent << TextNode.new(current_parent, current_token)
409
- current_token_type = :preformatted
410
- current_token = ''
411
- end
412
- else
413
- raise "Unknown token type in state machine: #{current_token_type}"
414
- end
415
- end
416
-
417
- str.send(split_method, &block)
418
-
419
- # Handle whatever's left in the current token
420
- if current_token_type != :break and !current_token.empty?
421
- current_parent << TextNode.new(current_parent, current_token)
422
- end
423
- tree
424
- end
425
-
426
- def tag_name_char?(char_code)
427
- (char_code >= LOWER_A_CODE and char_code <= LOWER_Z_CODE) or (char_code >= UPPER_A_CODE and char_code <= UPPER_Z_CODE) or char_code.chr == '*'
428
- end
429
- end
430
- end
Binary file
data/rbbcode.gemspec DELETED
@@ -1,82 +0,0 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
- # -*- encoding: utf-8 -*-
5
-
6
- Gem::Specification.new do |s|
7
- s.name = %q{rbbcode}
8
- s.version = "0.1.11"
9
-
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["Jarrett Colby", "aq1018@gmail.com"]
12
- s.date = %q{2011-02-19}
13
- s.description = %q{RbbCode is a customizable Ruby library for parsing BB Code. RbbCode validates and cleans input. It supports customizable schemas so you can set rules about what tags are allowed where. The default rules are designed to ensure valid HTML output.}
14
- s.email = %q{jarrett@jarrettcolby.com, aq1018@gmail.com}
15
- s.extra_rdoc_files = [
16
- "LICENSE.txt",
17
- "README.markdown"
18
- ]
19
- s.files = [
20
- ".document",
21
- "Gemfile",
22
- "LICENSE.txt",
23
- "README.markdown",
24
- "Rakefile",
25
- "VERSION",
26
- "lib/rbbcode.rb",
27
- "lib/rbbcode/html_maker.rb",
28
- "lib/rbbcode/parser.rb",
29
- "lib/rbbcode/schema.rb",
30
- "lib/rbbcode/tree_maker.rb",
31
- "pkg/rbbcode-0.1.8.gem",
32
- "rbbcode.gemspec",
33
- "spec/html_maker_spec.rb",
34
- "spec/node_spec_helper.rb",
35
- "spec/parser_spec.rb",
36
- "spec/schema_spec.rb",
37
- "spec/spec_helper.rb",
38
- "spec/tree_maker_spec.rb"
39
- ]
40
- s.homepage = %q{http://github.com/jarrett/rbbcode}
41
- s.licenses = ["MIT"]
42
- s.require_paths = ["lib"]
43
- s.rubygems_version = %q{1.3.7}
44
- s.summary = %q{Ruby BB Code parser}
45
- s.test_files = [
46
- "spec/html_maker_spec.rb",
47
- "spec/node_spec_helper.rb",
48
- "spec/parser_spec.rb",
49
- "spec/schema_spec.rb",
50
- "spec/spec_helper.rb",
51
- "spec/tree_maker_spec.rb"
52
- ]
53
-
54
- if s.respond_to? :specification_version then
55
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
56
- s.specification_version = 3
57
-
58
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
59
- s.add_runtime_dependency(%q<sanitize-url>, [">= 0.1.3"])
60
- s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
61
- s.add_development_dependency(%q<bluecloth>, [">= 0"])
62
- s.add_development_dependency(%q<yard>, ["~> 0.6.0"])
63
- s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
64
- s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
65
- else
66
- s.add_dependency(%q<sanitize-url>, [">= 0.1.3"])
67
- s.add_dependency(%q<rspec>, ["~> 2.3.0"])
68
- s.add_dependency(%q<bluecloth>, [">= 0"])
69
- s.add_dependency(%q<yard>, ["~> 0.6.0"])
70
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
71
- s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
72
- end
73
- else
74
- s.add_dependency(%q<sanitize-url>, [">= 0.1.3"])
75
- s.add_dependency(%q<rspec>, ["~> 2.3.0"])
76
- s.add_dependency(%q<bluecloth>, [">= 0"])
77
- s.add_dependency(%q<yard>, ["~> 0.6.0"])
78
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
79
- s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
80
- end
81
- end
82
-
@@ -1,92 +0,0 @@
1
- # encoding: UTF-8
2
-
3
- require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
4
- require File.expand_path(File.dirname(__FILE__) + '/node_spec_helper')
5
-
6
- describe RbbCode::HtmlMaker do
7
- context '#make_html' do
8
- def expect_html(expected_html, &block)
9
- @html_maker.make_html(NodeBuilder.build(&block)).should == expected_html
10
- end
11
-
12
- before :each do
13
- @html_maker = RbbCode::HtmlMaker.new
14
- end
15
-
16
- it 'should replace simple BB code tags with HTML tags' do
17
- expect_html('<p>This is <strong>bold</strong> text</p>') do
18
- tag('p') do
19
- text 'This is '
20
- tag('b') { text 'bold' }
21
- text ' text'
22
- end
23
- end
24
- end
25
-
26
- it 'should work for nested tags' do
27
- expect_html('<p>This is <strong>bold and <u>underlined</u></strong> text</p>') do
28
- tag('p') do
29
- text 'This is '
30
- tag('b') do
31
- text 'bold and '
32
- tag('u') { text 'underlined' }
33
- end
34
- text ' text'
35
- end
36
- end
37
- end
38
-
39
- it 'should not allow JavaScript in URLs' do
40
- urls = [
41
- 'javascript:alert("1");',
42
- 'j a v a script:alert("2");',
43
- ' javascript:alert("3");',
44
- 'JavaScript:alert("4");',
45
- "java\nscript:alert(\"5\");",
46
- "java\rscript:alert(\"6\");"
47
- ]
48
-
49
- # url tag
50
- urls.each do |evil_url|
51
- expect_html('<p><a href="">foo</a></p>') do
52
- tag('p') do
53
- tag('url', evil_url) do
54
- text 'foo'
55
- end
56
- end
57
- end
58
- end
59
-
60
- # img tag
61
- urls.each do |evil_url, clean_url|
62
- expect_html("<p><img src=\"#{clean_url}\" alt=\"\"/></p>") do
63
- tag('p') do
64
- tag('img') do
65
- text evil_url
66
- end
67
- end
68
- end
69
- end
70
- end
71
-
72
- it 'should hex-encode double-quotes in the URL' do
73
- expect_html('<p><a href="http://example.com/foo%22bar">Foo</a></p>') do
74
- tag('p') do
75
- tag('url', 'http://example.com/foo"bar') do
76
- text 'Foo'
77
- end
78
- end
79
- end
80
- end
81
-
82
- it 'wraps preformatted tags in <pre>' do
83
- expect_html('<p><pre><code>Some code</code></pre></p>') do
84
- tag('p') do
85
- tag('code', nil, true) do
86
- text 'Some code'
87
- end
88
- end
89
- end
90
- end
91
- end
92
- end