rbbcode 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/{README → README.markdown} +14 -6
- data/lib/rbbcode/html_maker.rb +96 -0
- data/lib/rbbcode/parser.rb +32 -0
- data/lib/rbbcode/schema.rb +256 -0
- data/lib/rbbcode/tree_maker.rb +346 -0
- data/lib/rbbcode.rb +6 -0
- metadata +9 -4
data/{README → README.markdown}
RENAMED
@@ -1,3 +1,11 @@
|
|
1
|
+
The gem is fixed!
|
2
|
+
=================
|
3
|
+
|
4
|
+
Due to a defective gemspec, at least one version (0.1.3) wasn't packaging the lib directory in the .gem file. This has been corrected in 0.1.4. Sorry for the delay in fixing this.
|
5
|
+
|
6
|
+
About RbbCode
|
7
|
+
=============
|
8
|
+
|
1
9
|
RbbCode is a customizable Ruby library for parsing BB Code.
|
2
10
|
|
3
11
|
RbbCode validates and cleans input. It supports customizable schemas so you can set rules about what tags are allowed where. The default rules are designed to ensure valid HTML output.
|
@@ -19,18 +27,18 @@ You can customize RbbCode by subclassing HtmlMaker and/or by passing configurati
|
|
19
27
|
|
20
28
|
HtmlMaker can be extended by adding methods like this:
|
21
29
|
|
22
|
-
class MyHtmlMaker < RbbCode::HtmlMaker
|
23
|
-
|
24
|
-
|
30
|
+
class MyHtmlMaker < RbbCode::HtmlMaker
|
31
|
+
def html_from_TAGNAME_tag(node)
|
32
|
+
# ...
|
33
|
+
end
|
25
34
|
end
|
26
|
-
end
|
27
35
|
|
28
36
|
...where TAGNAME should be replaced with the name of the tag. The method should accept an RbbCode::TagNode and return HTML as a string. (See tree_maker.rb for the definition of RbbCode::TagNode.) Anytime the parser encounters the specified tag, it will call your method and insert the returned HTML into the output.
|
29
37
|
|
30
38
|
Now you just have to tell the Parser object to use an instance of your custom subclass instead of the default HtmlMaker:
|
31
39
|
|
32
|
-
my_html_maker = MyHtmlMaker.new
|
33
|
-
parser = RbbCode::Parser.new(:html_maker => my_html_maker)
|
40
|
+
my_html_maker = MyHtmlMaker.new
|
41
|
+
parser = RbbCode::Parser.new(:html_maker => my_html_maker)
|
34
42
|
|
35
43
|
RbbCode removes invalid markup by comparing the input against a Schema object. The Schema is much like a DTD in XML. You can set your own rules and change the default ones by calling configuration methods on a Schema instance. Look at Schema#use_defaults in schema.rb for examples.
|
36
44
|
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# TODO: Lists must be surrounded by </p> and <p>
|
2
|
+
|
3
|
+
require 'cgi'
|
4
|
+
|
5
|
+
module RbbCode
|
6
|
+
DEFAULT_TAG_MAPPINGS = {
|
7
|
+
'p' => 'p',
|
8
|
+
'br' => 'br',
|
9
|
+
'b' => 'strong',
|
10
|
+
'i' => 'em',
|
11
|
+
'u' => 'u',
|
12
|
+
'code' => 'code',
|
13
|
+
'quote' => 'blockquote',
|
14
|
+
'list' => 'ul',
|
15
|
+
'*' => 'li'
|
16
|
+
}
|
17
|
+
|
18
|
+
class HtmlMaker
|
19
|
+
def make_html(node)
|
20
|
+
output = ''
|
21
|
+
case node.class.to_s
|
22
|
+
when 'RbbCode::RootNode'
|
23
|
+
node.children.each do |child|
|
24
|
+
output << make_html(child)
|
25
|
+
end
|
26
|
+
when 'RbbCode::TagNode'
|
27
|
+
custom_tag_method = "html_from_#{node.tag_name}_tag"
|
28
|
+
if respond_to?(custom_tag_method)
|
29
|
+
output << send(custom_tag_method, node)
|
30
|
+
else
|
31
|
+
inner_html = ''
|
32
|
+
node.children.each do |child|
|
33
|
+
inner_html << make_html(child)
|
34
|
+
end
|
35
|
+
output << content_tag(map_tag_name(node.tag_name), inner_html)
|
36
|
+
end
|
37
|
+
when 'RbbCode::TextNode'
|
38
|
+
output << node.text
|
39
|
+
else
|
40
|
+
raise "Don't know how to make HTML from #{node.class}"
|
41
|
+
end
|
42
|
+
output
|
43
|
+
end
|
44
|
+
|
45
|
+
protected
|
46
|
+
|
47
|
+
def content_tag(tag_name, contents, attributes = {})
|
48
|
+
output = "<#{tag_name}"
|
49
|
+
attributes.each do |attr, value|
|
50
|
+
output << " #{attr}=\"#{value}\""
|
51
|
+
end
|
52
|
+
if contents.nil? or contents.empty?
|
53
|
+
output << '/>'
|
54
|
+
else
|
55
|
+
output << ">#{contents}</#{tag_name}>"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def html_from_img_tag(node)
|
60
|
+
src = sanitize_url(node.inner_bb_code)
|
61
|
+
content_tag('img', nil, {'src' => src, 'alt' => ''})
|
62
|
+
end
|
63
|
+
|
64
|
+
def html_from_url_tag(node)
|
65
|
+
if node.value.nil?
|
66
|
+
url = node.inner_bb_code
|
67
|
+
else
|
68
|
+
url = node.value
|
69
|
+
end
|
70
|
+
url = sanitize_url(url)
|
71
|
+
inner_html = node.children.inject('') do |output, child|
|
72
|
+
output + make_html(child)
|
73
|
+
end
|
74
|
+
content_tag('a', inner_html, {'href' => url})
|
75
|
+
end
|
76
|
+
|
77
|
+
def map_tag_name(tag_name)
|
78
|
+
unless DEFAULT_TAG_MAPPINGS.has_key?(tag_name)
|
79
|
+
raise "No tag mapping for '#{tag_name}'"
|
80
|
+
end
|
81
|
+
DEFAULT_TAG_MAPPINGS[tag_name]
|
82
|
+
end
|
83
|
+
|
84
|
+
def sanitize_url(url)
|
85
|
+
# Prepend a protocol if there isn't one
|
86
|
+
unless url.match(/^[a-zA-Z]+:\/\//)
|
87
|
+
url = 'http://' + url
|
88
|
+
end
|
89
|
+
# Replace all functional permutations of "javascript:" with a hex-encoded version of the same
|
90
|
+
url.gsub!(/(\s*j\s*\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*):/i) do |match_str|
|
91
|
+
'%' + $1.unpack('H2' * $1.length).join('%').upcase + '%3A'
|
92
|
+
end
|
93
|
+
url.gsub('"', '%22')
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module RbbCode
|
2
|
+
class Parser
|
3
|
+
def initialize(config = {})
|
4
|
+
config.each_key do |key|
|
5
|
+
raise(ArgumentError, "Unknown option #{key}") unless known_options.include?(key)
|
6
|
+
end
|
7
|
+
@config = config
|
8
|
+
end
|
9
|
+
|
10
|
+
def parse(str)
|
11
|
+
str = escape_html_tags(str)
|
12
|
+
|
13
|
+
schema = @config[:schema] || RbbCode::Schema.new
|
14
|
+
|
15
|
+
tree_maker = @config[:tree_maker] || RbbCode::TreeMaker.new(schema)
|
16
|
+
tree = tree_maker.make_tree(str)
|
17
|
+
|
18
|
+
html_maker = @config[:html_maker] || RbbCode::HtmlMaker.new
|
19
|
+
html_maker.make_html(tree)
|
20
|
+
end
|
21
|
+
|
22
|
+
protected
|
23
|
+
|
24
|
+
def escape_html_tags(str)
|
25
|
+
str.gsub('<', '<').gsub('>', '>')
|
26
|
+
end
|
27
|
+
|
28
|
+
def known_options
|
29
|
+
[:schema, :tree_maker, :html_maker]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,256 @@
|
|
1
|
+
module RbbCode
|
2
|
+
DEFAULT_ALLOWED_TAGS = [
|
3
|
+
'p',
|
4
|
+
'br',
|
5
|
+
'b',
|
6
|
+
'i',
|
7
|
+
'u',
|
8
|
+
'url',
|
9
|
+
'img',
|
10
|
+
'code',
|
11
|
+
'quote',
|
12
|
+
'list',
|
13
|
+
'*'
|
14
|
+
]
|
15
|
+
|
16
|
+
DEFAULT_BLOCK_LEVEL_ELEMENTS = [
|
17
|
+
'quote',
|
18
|
+
'list',
|
19
|
+
'*'
|
20
|
+
]
|
21
|
+
|
22
|
+
class SchemaNode
|
23
|
+
def initialize(schema)
|
24
|
+
@schema = schema
|
25
|
+
end
|
26
|
+
|
27
|
+
protected
|
28
|
+
|
29
|
+
def normalize_ancestors(ancestors)
|
30
|
+
if ancestors.length == 1 and ancestors[0].is_a?(Array)
|
31
|
+
ancestors = ancestors[0]
|
32
|
+
end
|
33
|
+
ancestors
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
class SchemaTag < SchemaNode
|
38
|
+
def initialize(schema, name)
|
39
|
+
@schema = schema
|
40
|
+
@name = name
|
41
|
+
end
|
42
|
+
|
43
|
+
def may_be_nested
|
44
|
+
@schema.allow_descent(@name, @name)
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
def may_contain_text
|
49
|
+
@schema.allow_text(@name)
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
def may_not_be_empty
|
54
|
+
@schema.forbid_emptiness(@name)
|
55
|
+
end
|
56
|
+
|
57
|
+
def may_not_be_nested
|
58
|
+
@schema.forbid_descent(@name, @name)
|
59
|
+
self
|
60
|
+
end
|
61
|
+
|
62
|
+
def may_descend_from(tag_name)
|
63
|
+
@schema.allow_descent(tag_name, @name)
|
64
|
+
self
|
65
|
+
end
|
66
|
+
|
67
|
+
def may_only_be_parent_of(*tag_names)
|
68
|
+
@schema.forbid_children_except(@name, *tag_names)
|
69
|
+
self
|
70
|
+
end
|
71
|
+
|
72
|
+
def may_not_contain_text
|
73
|
+
@schema.forbid_text(@name)
|
74
|
+
self
|
75
|
+
end
|
76
|
+
|
77
|
+
def may_not_descend_from(tag_name)
|
78
|
+
@schema.forbid_descent(tag_name, @name)
|
79
|
+
self
|
80
|
+
end
|
81
|
+
|
82
|
+
def must_be_child_of(*tag_names)
|
83
|
+
@schema.require_parents(tag_names, @name)
|
84
|
+
self
|
85
|
+
end
|
86
|
+
|
87
|
+
def must_be_empty
|
88
|
+
@schema.forbid_children_except(@name, [])
|
89
|
+
may_not_contain_text
|
90
|
+
self
|
91
|
+
end
|
92
|
+
|
93
|
+
def need_not_be_child_of(tag_name)
|
94
|
+
@schema.unrequire_parent(tag_name, @name)
|
95
|
+
self
|
96
|
+
end
|
97
|
+
|
98
|
+
# Returns true if tag_name is valid in the context defined by its list of ancestors.
|
99
|
+
# ancestors should be ordered from most recent ancestor to most distant.
|
100
|
+
def valid_in_context?(*ancestors)
|
101
|
+
@schema.tag_valid_in_context?(@name, normalize_ancestors(ancestors))
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
class SchemaText < SchemaNode
|
106
|
+
def valid_in_context?(*ancestors)
|
107
|
+
@schema.text_valid_in_context?(normalize_ancestors(ancestors))
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
class Schema
|
112
|
+
def allow_descent(ancestor, descendant) #:nodoc:
|
113
|
+
if @forbidden_descent.has_key?(descendant.to_s) and @forbidden_descent[descendant.to_s].include?(ancestor.to_s)
|
114
|
+
@forbidden_descent[descendant.to_s].delete(ancestor.to_s)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def allow_emptiness(tag_name)
|
119
|
+
@never_empty.delete(tag_name.to_s)
|
120
|
+
end
|
121
|
+
|
122
|
+
def allow_tag(*tag_names)
|
123
|
+
tag_names.each do |tag_name|
|
124
|
+
unless @allowed_tags.include?(tag_name.to_s)
|
125
|
+
@allowed_tags << tag_name.to_s
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def allow_text(tag_name)
|
131
|
+
@no_text.delete(tag_name.to_s)
|
132
|
+
end
|
133
|
+
|
134
|
+
def block_level?(tag_name)
|
135
|
+
DEFAULT_BLOCK_LEVEL_ELEMENTS.include?(tag_name.to_s)
|
136
|
+
end
|
137
|
+
|
138
|
+
alias_method :allow_tags, :allow_tag
|
139
|
+
|
140
|
+
def clear
|
141
|
+
@allowed_tags = []
|
142
|
+
@never_empty = []
|
143
|
+
@forbidden_descent = {}
|
144
|
+
@required_parents = {}
|
145
|
+
@no_text = []
|
146
|
+
end
|
147
|
+
|
148
|
+
def forbid_children_except(parent, children)
|
149
|
+
@child_requirements[parent.to_s] = children.collect { |c| c.to_s }
|
150
|
+
end
|
151
|
+
|
152
|
+
def forbid_descent(ancestor, descendant) #:nodoc:
|
153
|
+
@forbidden_descent[descendant.to_s] ||= []
|
154
|
+
unless @forbidden_descent[descendant.to_s].include?(ancestor.to_s)
|
155
|
+
@forbidden_descent[descendant.to_s] << ancestor.to_s
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def forbid_emptiness(tag_name)
|
160
|
+
@never_empty << tag_name.to_s unless @never_empty.include?(tag_name.to_s)
|
161
|
+
end
|
162
|
+
|
163
|
+
def forbid_tag(name)
|
164
|
+
@allowed_tags.delete(name.to_s)
|
165
|
+
end
|
166
|
+
|
167
|
+
def forbid_text(tag_name)
|
168
|
+
@no_text << tag_name.to_s unless @no_text.include?(tag_name.to_s)
|
169
|
+
end
|
170
|
+
|
171
|
+
def initialize
|
172
|
+
@allowed_tags = DEFAULT_ALLOWED_TAGS.dup
|
173
|
+
@forbidden_descent = {}
|
174
|
+
@required_parents = {}
|
175
|
+
@child_requirements = {}
|
176
|
+
@never_empty = []
|
177
|
+
@no_text = []
|
178
|
+
use_defaults
|
179
|
+
end
|
180
|
+
|
181
|
+
def line_break_tag_name
|
182
|
+
'br'
|
183
|
+
end
|
184
|
+
|
185
|
+
def paragraph_tag_name
|
186
|
+
'p'
|
187
|
+
end
|
188
|
+
|
189
|
+
def require_parents(parents, child) #:nodoc:
|
190
|
+
@required_parents[child.to_s] = parents.collect { |p| p.to_s }
|
191
|
+
parents.each do |parent|
|
192
|
+
if @forbidden_descent.has_key?(child.to_s)
|
193
|
+
@forbidden_descent[child.to_s].delete(parent)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def tag(name)
|
199
|
+
SchemaTag.new(self, name)
|
200
|
+
end
|
201
|
+
|
202
|
+
def tag_may_be_empty?(tag_name)
|
203
|
+
!@never_empty.include?(tag_name.to_s)
|
204
|
+
end
|
205
|
+
|
206
|
+
def tag_valid_in_context?(tag_name, ancestors)
|
207
|
+
return false unless @allowed_tags.include?(tag_name.to_s)
|
208
|
+
if @required_parents.has_key?(tag_name.to_s) and !@required_parents[tag_name.to_s].include?(ancestors[0].to_s)
|
209
|
+
return false
|
210
|
+
end
|
211
|
+
if @child_requirements.has_key?(ancestors[0].to_s) and !@child_requirements[ancestors[0].to_s].include?(tag_name.to_s)
|
212
|
+
return false
|
213
|
+
end
|
214
|
+
if @forbidden_descent.has_key?(tag_name.to_s)
|
215
|
+
@forbidden_descent[tag_name.to_s].each do |forbidden_ancestor|
|
216
|
+
return false if ancestors.include?(forbidden_ancestor)
|
217
|
+
end
|
218
|
+
end
|
219
|
+
return true
|
220
|
+
end
|
221
|
+
|
222
|
+
def text
|
223
|
+
SchemaText.new(self)
|
224
|
+
end
|
225
|
+
|
226
|
+
def text_valid_in_context?(*ancestors)
|
227
|
+
if @no_text.include?(ancestors[0].to_s)
|
228
|
+
return false
|
229
|
+
end
|
230
|
+
return true
|
231
|
+
end
|
232
|
+
|
233
|
+
def unrequire_parent(parent, child)
|
234
|
+
@required_parents.delete(child.to_s)
|
235
|
+
end
|
236
|
+
|
237
|
+
def use_defaults
|
238
|
+
tag('br').must_be_empty
|
239
|
+
tag('p').may_not_be_nested
|
240
|
+
tag('b').may_not_be_nested
|
241
|
+
tag('b').may_not_be_empty
|
242
|
+
tag('i').may_not_be_nested
|
243
|
+
tag('i').may_not_be_empty
|
244
|
+
tag('u').may_not_be_nested
|
245
|
+
tag('u').may_not_be_empty
|
246
|
+
tag('url').may_not_be_nested
|
247
|
+
tag('img').may_not_be_nested
|
248
|
+
tag('code').may_not_be_nested
|
249
|
+
tag('p').may_not_be_nested
|
250
|
+
tag('*').must_be_child_of('list')
|
251
|
+
tag('list').may_not_descend_from('p')
|
252
|
+
tag('list').may_only_be_parent_of('*')
|
253
|
+
tag('list').may_not_contain_text
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
@@ -0,0 +1,346 @@
|
|
1
|
+
require 'pp'
|
2
|
+
|
3
|
+
module RbbCode
|
4
|
+
module CharCodes
|
5
|
+
CR_CODE = 13
|
6
|
+
LF_CODE = 10
|
7
|
+
|
8
|
+
L_BRACK_CODE = 91
|
9
|
+
R_BRACK_CODE = 93
|
10
|
+
SLASH_CODE = 47
|
11
|
+
|
12
|
+
LOWER_A_CODE = 97
|
13
|
+
LOWER_Z_CODE = 122
|
14
|
+
|
15
|
+
UPPER_A_CODE = 65
|
16
|
+
UPPER_Z_CODE = 90
|
17
|
+
end
|
18
|
+
|
19
|
+
class Node
|
20
|
+
def << (child)
|
21
|
+
@children << child
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_accessor :children
|
25
|
+
|
26
|
+
def initialize(parent)
|
27
|
+
@parent = parent
|
28
|
+
@children = []
|
29
|
+
end
|
30
|
+
|
31
|
+
attr_accessor :parent
|
32
|
+
end
|
33
|
+
|
34
|
+
class RootNode < Node
|
35
|
+
def initialize
|
36
|
+
@children = []
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class TextNode < Node
|
41
|
+
|
42
|
+
undef_method '<<'.to_sym
|
43
|
+
undef_method :children
|
44
|
+
|
45
|
+
def initialize(parent, text)
|
46
|
+
@parent = parent
|
47
|
+
@text = text
|
48
|
+
end
|
49
|
+
|
50
|
+
attr_accessor :text
|
51
|
+
|
52
|
+
def to_bb_code
|
53
|
+
@text
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
class TagNode < Node
|
58
|
+
def self.from_opening_bb_code(parent, bb_code)
|
59
|
+
if equal_index = bb_code.index('=')
|
60
|
+
tag_name = bb_code[1, equal_index - 1]
|
61
|
+
value = bb_code[(equal_index + 1)..-2]
|
62
|
+
else
|
63
|
+
tag_name = bb_code[1..-2]
|
64
|
+
value = nil
|
65
|
+
end
|
66
|
+
new(parent, tag_name, value)
|
67
|
+
end
|
68
|
+
|
69
|
+
def initialize(parent, tag_name, value = nil)
|
70
|
+
super(parent)
|
71
|
+
@tag_name = tag_name
|
72
|
+
@value = value
|
73
|
+
end
|
74
|
+
|
75
|
+
def inner_bb_code
|
76
|
+
@children.inject('') do |output, child|
|
77
|
+
output << child.to_bb_code
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def to_bb_code
|
82
|
+
if @value.nil?
|
83
|
+
output = "[#{@tag_name}]"
|
84
|
+
else
|
85
|
+
output = "[#{@tag_name}=#{@value}]"
|
86
|
+
end
|
87
|
+
output << inner_bb_code << "[/#{@tag_name}]"
|
88
|
+
end
|
89
|
+
|
90
|
+
attr_reader :tag_name
|
91
|
+
|
92
|
+
attr_reader :value
|
93
|
+
end
|
94
|
+
|
95
|
+
class TreeMaker
|
96
|
+
include CharCodes
|
97
|
+
|
98
|
+
def initialize(schema)
|
99
|
+
@schema = schema
|
100
|
+
end
|
101
|
+
|
102
|
+
def make_tree(str)
|
103
|
+
delete_empty_paragraphs!(
|
104
|
+
delete_invalid_empty_tags!(
|
105
|
+
parse_str(str)
|
106
|
+
)
|
107
|
+
)
|
108
|
+
end
|
109
|
+
|
110
|
+
protected
|
111
|
+
|
112
|
+
def ancestor_list(parent)
|
113
|
+
ancestors = []
|
114
|
+
while parent.is_a?(TagNode)
|
115
|
+
ancestors << parent.tag_name
|
116
|
+
parent = parent.parent
|
117
|
+
end
|
118
|
+
ancestors
|
119
|
+
end
|
120
|
+
|
121
|
+
def break_type(break_str)
|
122
|
+
if break_str.length > 2
|
123
|
+
:paragraph
|
124
|
+
elsif break_str.length == 1
|
125
|
+
:line_break
|
126
|
+
elsif break_str == "\r\n"
|
127
|
+
:line_break
|
128
|
+
else
|
129
|
+
:paragraph
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def delete_empty_paragraphs!(node)
|
134
|
+
node.children.reject! do |child|
|
135
|
+
if child.is_a?(TagNode)
|
136
|
+
if !child.children.empty?
|
137
|
+
delete_empty_paragraphs!(child)
|
138
|
+
false
|
139
|
+
elsif child.tag_name == @schema.paragraph_tag_name
|
140
|
+
# It's an empty paragraph tag, so the reject! block should return true
|
141
|
+
true
|
142
|
+
else
|
143
|
+
false
|
144
|
+
end
|
145
|
+
else
|
146
|
+
false
|
147
|
+
end
|
148
|
+
end
|
149
|
+
node
|
150
|
+
end
|
151
|
+
|
152
|
+
# The schema defines some tags that may not be empty. This method removes any such empty tags from the tree.
|
153
|
+
def delete_invalid_empty_tags!(node)
|
154
|
+
node.children.reject! do |child|
|
155
|
+
if child.is_a?(TagNode)
|
156
|
+
if child.children.empty? and !@schema.tag_may_be_empty?(child.tag_name)
|
157
|
+
true
|
158
|
+
else
|
159
|
+
delete_invalid_empty_tags!(child)
|
160
|
+
false
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
node
|
165
|
+
end
|
166
|
+
|
167
|
+
def parse_str(str)
|
168
|
+
tree = RootNode.new
|
169
|
+
# Initially, we open a paragraph tag. If it turns out that the first thing we encounter
|
170
|
+
# is a block-level element, no problem: we'll be calling promote_block_level_elements
|
171
|
+
# later anyway.
|
172
|
+
current_parent = TagNode.new(tree, @schema.paragraph_tag_name)
|
173
|
+
tree << current_parent
|
174
|
+
current_token = ''
|
175
|
+
current_token_type = :unknown
|
176
|
+
# It may seem naive to use each_byte. What about Unicode? So long as we're using UTF-8, none of the
|
177
|
+
# BB Code control characters will appear as part of multibyte characters, because UTF-8 doesn't allow
|
178
|
+
# the range 0x00-0x7F in multibyte chars. As for the multibyte characters themselves, yes, they will
|
179
|
+
# be temporarily split up as we append bytes onto the text nodes. But as of yet, I haven't found
|
180
|
+
# a way that this could cause a problem. The bytes always come back together again. (It would be a problem
|
181
|
+
# if we tried to count the characters for some reason, but we don't do that.)
|
182
|
+
str.each_byte do |char_code|
|
183
|
+
char = char_code.chr
|
184
|
+
case current_token_type
|
185
|
+
when :unknown
|
186
|
+
case char
|
187
|
+
when '['
|
188
|
+
current_token_type = :possible_tag
|
189
|
+
current_token << char
|
190
|
+
when "\r", "\n"
|
191
|
+
current_token_type = :break
|
192
|
+
current_token << char
|
193
|
+
else
|
194
|
+
if current_parent.is_a?(RootNode)
|
195
|
+
new_paragraph_tag = TagNode.new(current_parent, @schema.paragraph_tag_name)
|
196
|
+
current_parent << new_paragraph_tag
|
197
|
+
current_parent = new_paragraph_tag
|
198
|
+
end
|
199
|
+
current_token_type = :text
|
200
|
+
current_token << char
|
201
|
+
end
|
202
|
+
when :text
|
203
|
+
case char
|
204
|
+
when "["
|
205
|
+
if @schema.text_valid_in_context?(*ancestor_list(current_parent))
|
206
|
+
current_parent << TextNode.new(current_parent, current_token)
|
207
|
+
end
|
208
|
+
current_token = '['
|
209
|
+
current_token_type = :possible_tag
|
210
|
+
when "\r", "\n"
|
211
|
+
if @schema.text_valid_in_context?(*ancestor_list(current_parent))
|
212
|
+
current_parent << TextNode.new(current_parent, current_token)
|
213
|
+
end
|
214
|
+
current_token = char
|
215
|
+
current_token_type = :break
|
216
|
+
else
|
217
|
+
current_token << char
|
218
|
+
end
|
219
|
+
when :break
|
220
|
+
if char == CR_CODE or char_code == LF_CODE
|
221
|
+
current_token << char
|
222
|
+
else
|
223
|
+
if break_type(current_token) == :paragraph
|
224
|
+
while current_parent.is_a?(TagNode) and !@schema.block_level?(current_parent.tag_name) and current_parent.tag_name != @schema.paragraph_tag_name
|
225
|
+
current_parent = current_parent.parent
|
226
|
+
end
|
227
|
+
# The current parent might be a paragraph tag, in which case we should move up one more level.
|
228
|
+
# Otherwise, it might be a block-level element or a root node, in which case we should not move up.
|
229
|
+
if current_parent.is_a?(TagNode) and current_parent.tag_name == @schema.paragraph_tag_name
|
230
|
+
current_parent = current_parent.parent
|
231
|
+
end
|
232
|
+
# Regardless of whether the current parent is a block-level element, we need to open a new paragraph.
|
233
|
+
new_paragraph_node = TagNode.new(current_parent, @schema.paragraph_tag_name)
|
234
|
+
current_parent << new_paragraph_node
|
235
|
+
current_parent = new_paragraph_node
|
236
|
+
else # line break
|
237
|
+
prev_sibling = current_parent.children.last
|
238
|
+
if prev_sibling.is_a?(TagNode) and @schema.block_level?(prev_sibling.tag_name)
|
239
|
+
# Although the input only contains a single newline, we should
|
240
|
+
# interpret is as the start of a new paragraph, because the last
|
241
|
+
# thing we encountered was a block-level element.
|
242
|
+
new_paragraph_node = TagNode.new(current_parent, @schema.paragraph_tag_name)
|
243
|
+
current_parent << new_paragraph_node
|
244
|
+
current_parent = new_paragraph_node
|
245
|
+
elsif @schema.tag(@schema.line_break_tag_name).valid_in_context?(*ancestor_list(current_parent))
|
246
|
+
current_parent << TagNode.new(current_parent, @schema.line_break_tag_name)
|
247
|
+
end
|
248
|
+
end
|
249
|
+
if char == '['
|
250
|
+
current_token = '['
|
251
|
+
current_token_type = :possible_tag
|
252
|
+
else
|
253
|
+
current_token = char
|
254
|
+
current_token_type = :text
|
255
|
+
end
|
256
|
+
end
|
257
|
+
when :possible_tag
|
258
|
+
case char
|
259
|
+
when '['
|
260
|
+
current_parent << TextNode.new(current_parent, '[')
|
261
|
+
# No need to reset current_token or current_token_type
|
262
|
+
when '/'
|
263
|
+
current_token_type = :closing_tag
|
264
|
+
current_token << '/'
|
265
|
+
else
|
266
|
+
if tag_name_char?(char_code)
|
267
|
+
current_token_type = :opening_tag
|
268
|
+
current_token << char
|
269
|
+
else
|
270
|
+
current_token_type = :text
|
271
|
+
current_token << char
|
272
|
+
end
|
273
|
+
end
|
274
|
+
when :opening_tag
|
275
|
+
if tag_name_char?(char_code) or char == '='
|
276
|
+
current_token << char
|
277
|
+
elsif char == ']'
|
278
|
+
current_token << ']'
|
279
|
+
tag_node = TagNode.from_opening_bb_code(current_parent, current_token)
|
280
|
+
if @schema.block_level?(tag_node.tag_name) and current_parent.tag_name == @schema.paragraph_tag_name
|
281
|
+
# If there is a line break before this, it's superfluous and should be deleted
|
282
|
+
prev_sibling = current_parent.children.last
|
283
|
+
if prev_sibling.is_a?(TagNode) and prev_sibling.tag_name == @schema.line_break_tag_name
|
284
|
+
current_parent.children.pop
|
285
|
+
end
|
286
|
+
# Promote a block-level element
|
287
|
+
current_parent = current_parent.parent
|
288
|
+
tag_node.parent = current_parent
|
289
|
+
current_parent << tag_node
|
290
|
+
current_parent = tag_node
|
291
|
+
# If all of this results in empty paragraph tags, no worries: they will be deleted later.
|
292
|
+
elsif @schema.tag(tag_node.tag_name).valid_in_context?(*ancestor_list(current_parent))
|
293
|
+
current_parent << tag_node
|
294
|
+
current_parent = tag_node
|
295
|
+
end # else, don't do anything--the tag is invalid and will be ignored
|
296
|
+
current_token_type = :unknown
|
297
|
+
current_token = ''
|
298
|
+
elsif char == "\r" or char == "\n"
|
299
|
+
current_parent << TextNode.new(current_parent, current_token)
|
300
|
+
current_token = char
|
301
|
+
current_token_type = :break
|
302
|
+
elsif current_token.include?('=')
|
303
|
+
current_token << char
|
304
|
+
else
|
305
|
+
current_token_type = :text
|
306
|
+
current_token << char
|
307
|
+
end
|
308
|
+
when :closing_tag
|
309
|
+
if tag_name_char?(char_code)
|
310
|
+
current_token << char
|
311
|
+
elsif char == ']'
|
312
|
+
original_parent = current_parent
|
313
|
+
while current_parent.is_a?(TagNode) and current_parent.tag_name != current_token[2..-1]
|
314
|
+
current_parent = current_parent.parent
|
315
|
+
end
|
316
|
+
if current_parent.is_a?(TagNode)
|
317
|
+
current_parent = current_parent.parent
|
318
|
+
else # current_parent is a RootNode
|
319
|
+
# we made it to the top of the tree, and never found the tag to close
|
320
|
+
# so we'll just ignore the closing tag altogether
|
321
|
+
current_parent = original_parent
|
322
|
+
end
|
323
|
+
current_token_type = :unknown
|
324
|
+
current_token = ''
|
325
|
+
elsif char == "\r" or char == "\n"
|
326
|
+
current_parent << TextNode.new(current_parent, current_token)
|
327
|
+
current_token = char
|
328
|
+
current_token_type = :break
|
329
|
+
else
|
330
|
+
current_token_type = :text
|
331
|
+
current_token << char
|
332
|
+
end
|
333
|
+
end
|
334
|
+
end
|
335
|
+
# Handle whatever's left in the current token
|
336
|
+
if current_token_type != :break and !current_token.empty?
|
337
|
+
current_parent << TextNode.new(current_parent, current_token)
|
338
|
+
end
|
339
|
+
tree
|
340
|
+
end
|
341
|
+
|
342
|
+
def tag_name_char?(char_code)
|
343
|
+
(char_code >= LOWER_A_CODE and char_code <= LOWER_Z_CODE) or (char_code >= UPPER_A_CODE and char_code <= UPPER_Z_CODE) or char_code.chr == '*'
|
344
|
+
end
|
345
|
+
end
|
346
|
+
end
|
data/lib/rbbcode.rb
ADDED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbcode
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jarrett Colby
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2010-02-17 00:00:00 -06:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -20,9 +20,14 @@ executables: []
|
|
20
20
|
extensions: []
|
21
21
|
|
22
22
|
extra_rdoc_files:
|
23
|
-
- README
|
23
|
+
- README.markdown
|
24
24
|
files:
|
25
|
-
-
|
25
|
+
- lib/rbbcode.rb
|
26
|
+
- lib/rbbcode/html_maker.rb
|
27
|
+
- lib/rbbcode/parser.rb
|
28
|
+
- lib/rbbcode/schema.rb
|
29
|
+
- lib/rbbcode/tree_maker.rb
|
30
|
+
- README.markdown
|
26
31
|
has_rdoc: true
|
27
32
|
homepage: http://github.com/jarrett/rbbcode
|
28
33
|
licenses: []
|