ruby-bbcode-to-md 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,132 @@
1
+ module RubyBBCode
2
+ # tag info is basically what the regex scan get's converted into
3
+ # during the tag_sifter#process_text method.
4
+ # This class was made mostly just to keep track of all of the confusing
5
+ # the logic conditions that are checked.
6
+ #
7
+ class TagInfo
8
+ def initialize(tag_info, dictionary)
9
+ @tag_data = find_tag_info(tag_info)
10
+ @dictionary = dictionary
11
+ @definition = @dictionary[@tag_data[:tag].to_sym] unless @tag_data[:tag].nil?
12
+ end
13
+
14
+ def [](key)
15
+ @tag_data[key]
16
+ end
17
+
18
+ def []=(key, value)
19
+ @tag_data[key] = value
20
+ end
21
+
22
+ def tag_data
23
+ @tag_data
24
+ end
25
+
26
+ def definition
27
+ @definition
28
+ end
29
+
30
+ def definition=(val)
31
+ @definition = val
32
+ end
33
+
34
+ def dictionary # need this for reasigning multi_tag elements
35
+ @dictionary
36
+ end
37
+
38
+ # This represents the text value of the element (if it's not a tag element)
39
+ # Newlines are converted to html <br /> syntax before being returned.
40
+ def text
41
+ text = @tag_data[:text]
42
+ # convert_newlines_to_br
43
+ text.gsub!("\r\n", "\n")
44
+ text.gsub!("\n", "\n")
45
+ text
46
+ end
47
+
48
+ # allows for a very snazy case/ when conditional
49
+ def type
50
+ return :opening_tag if element_is_opening_tag?
51
+ return :text if element_is_text?
52
+ return :closing_tag if element_is_closing_tag?
53
+ end
54
+
55
+ def handle_unregistered_tags_as_text
56
+ if element_is_tag? and tag_missing_from_tag_dictionary?
57
+ # Handle as text from now on!
58
+ self[:is_tag] = false
59
+ self[:closing_tag] = false
60
+ self[:text] = self[:complete_match]
61
+ end
62
+ end
63
+
64
+ def element_is_tag?
65
+ self[:is_tag]
66
+ end
67
+
68
+ def element_is_opening_tag?
69
+ self[:is_tag] and !self[:closing_tag]
70
+ end
71
+
72
+ def element_is_closing_tag?
73
+ self[:closing_tag]
74
+ end
75
+
76
+ def element_is_text?
77
+ !self[:text].nil?
78
+ end
79
+
80
+ def has_params?
81
+ self[:params][:tag_param] != nil
82
+ end
83
+
84
+ def tag_missing_from_tag_dictionary?
85
+ !@dictionary.include?(self[:tag].to_sym)
86
+ end
87
+
88
+ def allowed_outside_parent_tags?
89
+ @definition[:only_in].nil?
90
+ end
91
+
92
+ def constrained_to_within_parent_tags?
93
+ !@definition[:only_in].nil?
94
+ end
95
+
96
+ def allowed_in(tag_symbol)
97
+ @definition[:only_in].include?(tag_symbol)
98
+ end
99
+
100
+ def can_have_params?
101
+ @definition[:allow_tag_param]
102
+ end
103
+
104
+ # Checks if the tag param matches the regex pattern defined in tags.rb
105
+ def invalid_param?
106
+ self[:params][:tag_param].match(@definition[:tag_param]).nil?
107
+ end
108
+
109
+ protected
110
+
111
+ def find_tag_info(tag_info)
112
+ ti = {}
113
+ ti[:complete_match] = tag_info[0]
114
+ ti[:is_tag] = (tag_info[0].start_with? '[')
115
+ if ti[:is_tag]
116
+ ti[:closing_tag] = (tag_info[2] == '/')
117
+ ti[:tag] = tag_info[3]
118
+ ti[:params] = {}
119
+ if tag_info[4][0] == ?=
120
+ ti[:params][:tag_param] = tag_info[4][1..-1]
121
+ elsif tag_info[4][0] == ?\s
122
+ #FIXME: Find params... Delete this or write a test to cover this and implement it
123
+ end
124
+ else
125
+ # Plain text
126
+ ti[:text] = tag_info[8]
127
+ end
128
+ ti
129
+ end
130
+
131
+ end
132
+ end
@@ -0,0 +1,67 @@
1
+ module RubyBBCode
2
+ # TagNodes are nodes that are stored up in the BBTree's @current_node.children array I think... which is a bit misleading...
3
+ #
4
+ # TagNodes specify either opening tag elements or text elements... TagInfo elements are essentially converted into these nodes which are
5
+ # later converted into html output in the bbtree_to_html method
6
+ class TagNode
7
+ include ::RubyBBCode::DebugBBTree
8
+ attr_accessor :element
9
+
10
+ def initialize(element, nodes = [])
11
+ @element = element # { :is_tag=>false, :text=>"ITALLICS" } || { :is_tag=>true, :tag=>:i, :nodes => [] }
12
+ end
13
+
14
+ def [](key)
15
+ @element[key]
16
+ end
17
+
18
+ def []=(key, value)
19
+ @element[key] = value
20
+ end
21
+
22
+ # Debugging/ visualization purposes
23
+ def type
24
+ return :tag if @element[:is_tag]
25
+ return :text if !@element[:is_tag]
26
+ end
27
+
28
+ def parent_type
29
+ @element[:parent_tag]
30
+ end
31
+
32
+ # Checks to see if the parameter for the TagNode has been set.
33
+ def param_not_set?
34
+ (@element[:params].nil? or @element[:params][:tag_param].nil?)
35
+ end
36
+
37
+ # check if the parameter for the TagNode is set
38
+ def param_set?
39
+ !param_not_set?
40
+ end
41
+
42
+ def has_children?
43
+ return false if type == :text or children.length == 0 # text nodes return false too
44
+ return true if children.length > 0
45
+ end
46
+
47
+ def allow_tag_param?
48
+ definition[:allow_tag_param]
49
+ end
50
+
51
+ # shows the tag definition for this TagNode as defined in tags.rb
52
+ def definition
53
+ @element[:definition]
54
+ end
55
+
56
+ def children
57
+ @element[:nodes]
58
+ end
59
+
60
+ # Easy way to set the tag_param value of the hash, which represents
61
+ # the parameter supplied
62
+ def tag_param=(param)
63
+ @element[:params] = {:tag_param => param}
64
+ end
65
+
66
+ end
67
+ end
@@ -0,0 +1,303 @@
1
+ module RubyBBCode
2
+ # Tag sifter is in charge of building up the BBTree with nodes as it parses through the
3
+ # supplied text such as "[b]hello world[/b]"
4
+ class TagSifter
5
+ attr_reader :bbtree, :errors
6
+
7
+ def initialize(text_to_parse, dictionary, escape_html = true)
8
+ @text = escape_html ? text_to_parse.gsub('<', '&lt;').gsub('>', '&gt;').gsub('"', "&quot;") : text_to_parse
9
+
10
+ @dictionary = dictionary # the dictionary for all the defined tags in tags.rb
11
+ @bbtree = BBTree.new({:nodes => TagCollection.new}, dictionary)
12
+ @ti = nil
13
+ @errors = false
14
+ end
15
+
16
+ def invalid?
17
+ @errors != false
18
+ end
19
+
20
+
21
+ def process_text
22
+ regex_string = '((\[ (\/)? (\w+) ((=[^\[\]]+) | (\s\w+=\w+)* | ([^\]]*))? \]) | ([^\[]+))'
23
+ @text.scan(/#{regex_string}/ix) do |tag_info|
24
+ @ti = TagInfo.new(tag_info, @dictionary)
25
+
26
+ @ti.handle_unregistered_tags_as_text # if the tag isn't in the @dictionary list, then treat it as text
27
+ handle_closing_tags_that_are_multi_as_text_if_it_doesnt_match_the_latest_opener_tag_on_the_stack
28
+
29
+ return if !valid_element?
30
+
31
+ case @ti.type # Validation of tag succeeded, add to @bbtree.tags_list and/or bbtree
32
+ when :opening_tag
33
+ element = {:is_tag => true, :tag => @ti[:tag].to_sym, :definition => @ti.definition, :nodes => TagCollection.new }
34
+ element[:params] = {:tag_param => get_formatted_element_params} if @ti.can_have_params? and @ti.has_params?
35
+ @bbtree.build_up_new_tag(element)
36
+
37
+ @bbtree.escalate_bbtree(element)
38
+ when :text
39
+ set_parent_tag_from_multi_tag_to_concrete! if @bbtree.current_node.definition && @bbtree.current_node.definition[:multi_tag] == true
40
+ element = {:is_tag => false, :text => @ti.text }
41
+ if within_open_tag?
42
+ tag = @bbtree.current_node.definition
43
+ if tag[:require_between]
44
+ @bbtree.current_node[:between] = get_formatted_element_params
45
+ if candidate_for_using_between_as_param?
46
+ use_between_as_tag_param # Did not specify tag_param, so use between text.
47
+ end
48
+ next # don't add this node to @bbtree.current_node.children if we're within an open tag that requires_between (to be a param), and the between couldn't be used as a param... Yet it passed validation so the param must have been specified within the opening tag???
49
+ end
50
+ end
51
+ @bbtree.build_up_new_tag(element)
52
+ when :closing_tag
53
+ @bbtree.retrogress_bbtree
54
+ end
55
+
56
+ end # end of scan loop
57
+
58
+ validate_all_tags_closed_off # TODO: consider automatically closing off all the tags... I think that's how the HTML 5 parser works too
59
+ validate_stack_level_too_deep_potential
60
+ end
61
+
62
+ def set_parent_tag_from_multi_tag_to_concrete!
63
+ # if the proper tag can't be matched, we need to treat the parent tag as text instead! Or throw an error message....
64
+
65
+ proper_tag = get_proper_tag
66
+ if proper_tag == :tag_not_found
67
+ #binding.pry
68
+ @bbtree.redefine_parent_tag_as_text
69
+
70
+ @bbtree.nodes << TagNode.new(@ti.tag_data) # escilate the bbtree with this element as though it's regular text data...
71
+ return
72
+ end
73
+ @bbtree.current_node[:definition] = @dictionary[proper_tag]
74
+ @bbtree.current_node[:tag] = proper_tag
75
+ end
76
+
77
+ def get_proper_tag
78
+ supported_tags = @bbtree.current_node[:definition][:supported_tags]
79
+
80
+ supported_tags.each do |tag|
81
+ regex_list = @dictionary[tag][:url_matches]
82
+
83
+ regex_list.each do |regex|
84
+ return tag if regex =~ @ti.tag_data[:text]
85
+ end
86
+ end
87
+ :tag_not_found
88
+ end
89
+
90
+ def handle_closing_tags_that_are_multi_as_text_if_it_doesnt_match_the_latest_opener_tag_on_the_stack
91
+ if @ti.element_is_closing_tag?
92
+ return if @bbtree.current_node[:definition].nil?
93
+ if parent_tag != @ti[:tag].to_sym and @bbtree.current_node[:definition][:multi_tag] # if opening tag doesn't match this closing tag... and if the opener was a multi_tag...
94
+ @ti[:is_tag] = false
95
+ @ti[:closing_tag] = false
96
+ @ti[:text] = @ti.tag_data[:complete_match]
97
+ end
98
+ end
99
+
100
+ end
101
+
102
+
103
+ private
104
+
105
+ # This method allows us to format params if needed...
106
+ # TODO: Maybe this kind of thing *could* be handled in the bbtree_to_html where the %between% is
107
+ # sorted out and the html is generated, but... That code has yet to be refactored and we can.
108
+ # refactor this code easily to happen over there if necessary... Yes, I think it's more logical
109
+ # to be put over there, but that method needs to be cleaned up before we introduce the formatting overthere... and knowing the parent node is helpful!
110
+ def get_formatted_element_params
111
+ if @ti[:is_tag]
112
+ param = @ti[:params][:tag_param]
113
+ if @ti.can_have_params? and @ti.has_params?
114
+ # perform special formatting for cenrtain tags
115
+ param = conduct_special_formatting(param) if @ti[:tag].to_sym == :youtube # note: this line isn't ever used because @@tags don't allow it... I think if we have tags without the same kind of :require_between restriction, we'll need to pay close attention to this case
116
+ end
117
+ return param
118
+ else # must be text... @ti[:is_tag] == false
119
+ param = @ti[:text]
120
+ # perform special formatting for cenrtain tags
121
+ param = conduct_special_formatting(param) if @bbtree.current_node.definition[:url_matches]
122
+ return param
123
+ end
124
+ end
125
+
126
+ def conduct_special_formatting(url, regex_matches = nil)
127
+ regex_matches = @bbtree.current_node.definition[:url_matches] if regex_matches.nil? # for testing purposes we can force in regex_matches
128
+
129
+ regex_matches.each do |regex|
130
+ if url =~ regex
131
+ id = $1
132
+ return id
133
+ end
134
+ end
135
+
136
+ return url # if we couldn't find a match, then just return the url, hopefully it's a valid youtube ID...
137
+ end
138
+
139
+
140
+ # Validates the element
141
+ def valid_element?
142
+ return false if !valid_text_or_opening_element?
143
+ return false if !valid_closing_element?
144
+ return false if !valid_param_supplied_as_text?
145
+ true
146
+ end
147
+
148
+ def valid_text_or_opening_element?
149
+ if @ti.element_is_text? or @ti.element_is_opening_tag?
150
+ return false if validate_opening_tag == false
151
+ return false if validate_constraints_on_child == false
152
+ end
153
+ true
154
+ end
155
+
156
+ def validate_opening_tag
157
+ # TODO: rename this if statement to #validate_opening_tag
158
+ if @ti.element_is_opening_tag?
159
+ unless @ti.allowed_outside_parent_tags? or (within_open_tag? and @ti.allowed_in(parent_tag.to_sym))
160
+ # Tag doesn't belong in the last opened tag
161
+ throw_child_requires_specific_parent_error; return false
162
+ end
163
+
164
+ # Originally: tag[:allow_tag_param] and ti[:params][:tag_param] != nil
165
+ if @ti.can_have_params? and @ti.has_params?
166
+ # Test if matches
167
+ if @ti.invalid_param?
168
+ throw_invalid_param_error; return false
169
+ end
170
+ end
171
+ end
172
+ true
173
+ end
174
+
175
+ def validate_constraints_on_child
176
+ # TODO: Rename this if statement to #validate_constraints_on_child
177
+ if within_open_tag? and parent_has_constraints_on_children?
178
+ # Check if the found tag is allowed
179
+ last_tag = @dictionary[parent_tag]
180
+ allowed_tags = last_tag[:only_allow]
181
+ if (!@ti[:is_tag] and last_tag[:require_between] != true and @ti[:text].lstrip != "") or (@ti[:is_tag] and (allowed_tags.include?(@ti[:tag].to_sym) == false)) # TODO: refactor this, it's just too long
182
+ # Last opened tag does not allow tag
183
+ throw_parent_prohibits_this_child_error; return false
184
+ end
185
+ end
186
+ true
187
+ end
188
+
189
+ def valid_closing_element?
190
+ tag = @ti.definition
191
+
192
+ if @ti.element_is_closing_tag?
193
+ if parent_tag != @ti[:tag].to_sym
194
+ @errors = ["Closing tag [/#{@ti[:tag]}] does match [#{parent_tag}]"]
195
+ return false
196
+ end
197
+
198
+ if tag[:require_between] == true and @bbtree.current_node[:between].nil?
199
+ @errors = ["No text between [#{@ti[:tag]}] and [/#{@ti[:tag]}] tags."]
200
+ return false
201
+ end
202
+ end
203
+ true
204
+ end
205
+
206
+ # This validation is for text elements with between text
207
+ # that might be construed as a param.
208
+ # The validation code checks if the params match constraints
209
+ # imposed by the node/tag/parent.
210
+ def valid_param_supplied_as_text?
211
+ tag = @bbtree.current_node.definition
212
+
213
+ # this conditional ensures whether the validation is apropriate to this tag type
214
+ if @ti.element_is_text? and within_open_tag? and tag[:require_between] and candidate_for_using_between_as_param?
215
+
216
+ # check if valid
217
+ if @ti[:text].match(tag[:tag_param]).nil?
218
+ @errors = [tag[:tag_param_description].gsub('%param%', @ti[:text])]
219
+ return false
220
+ end
221
+ end
222
+ true
223
+ end
224
+
225
+ def validate_all_tags_closed_off
226
+ # if we're still expecting a closing tag and we've come to the end of the string... throw error
227
+ throw_unexpected_end_of_string_error if expecting_a_closing_tag?
228
+ end
229
+
230
+ def validate_stack_level_too_deep_potential
231
+ if @bbtree.nodes.count > 2200
232
+ throw_stack_level_will_be_too_deep_error
233
+ end
234
+ end
235
+
236
+ def throw_child_requires_specific_parent_error
237
+ err = "[#{@ti[:tag]}] can only be used in [#{@ti.definition[:only_in].to_sentence(to_sentence_bbcode_tags)}]"
238
+ err += ", so using it in a [#{parent_tag}] tag is not allowed" if expecting_a_closing_tag?
239
+ @errors = [err]
240
+ end
241
+
242
+ def throw_invalid_param_error
243
+ @errors = [@ti.definition[:tag_param_description].gsub('%param%', @ti[:params][:tag_param])]
244
+ end
245
+
246
+ def throw_parent_prohibits_this_child_error
247
+ allowed_tags = @dictionary[parent_tag][:only_allow]
248
+ err = "[#{parent_tag}] can only contain [#{allowed_tags.to_sentence(to_sentence_bbcode_tags)}] tags, so "
249
+ err += "[#{@ti[:tag]}]" if @ti[:is_tag]
250
+ err += "\"#{@ti[:text]}\"" unless @ti[:is_tag]
251
+ err += ' is not allowed'
252
+ @errors = [err]
253
+ end
254
+
255
+ def throw_unexpected_end_of_string_error
256
+ @errors = ["[#{@bbtree.tags_list.to_sentence(to_sentence_bbcode_tags)}] not closed"]
257
+ end
258
+
259
+ def throw_stack_level_will_be_too_deep_error
260
+ @errors = ["Stack level would go too deep. You must be trying to process a text containing thousands of BBTree nodes at once. (limit around 2300 tags containing 2,300 strings). Check RubyBBCode::TagCollection#to_html to see why this validation is needed."]
261
+ end
262
+
263
+
264
+ def to_sentence_bbcode_tags
265
+ {:words_connector => "], [",
266
+ :two_words_connector => "] and [",
267
+ :last_word_connector => "] and ["}
268
+ end
269
+
270
+
271
+ def expecting_a_closing_tag?
272
+ @bbtree.expecting_a_closing_tag?
273
+ end
274
+
275
+ def within_open_tag?
276
+ @bbtree.within_open_tag?
277
+ end
278
+
279
+ def use_between_as_tag_param
280
+ param = get_formatted_element_params
281
+ @bbtree.current_node.tag_param = param # @bbtree.current_node[:params] = {:tag_param => @ti[:text]}
282
+ end
283
+
284
+ def candidate_for_using_between_as_param?
285
+ # TODO: the bool values...
286
+ # are unclear and should be worked on. Additional tag might be tag[:requires_param] such that
287
+ # [img] would have that as true... and [url] would have that as well...
288
+ # as it is now, if a tag (say youtube) has tag[:require_between] == true and tag[:allow_tag_param].nil?
289
+ # then the :between is assumed to be the param... that is, a tag that should respond 'true' to tag.requires_param?
290
+ tag = @bbtree.current_node.definition
291
+ tag[:allow_tag_param_between] and @bbtree.current_node.param_not_set?
292
+ end
293
+
294
+ def parent_tag
295
+ @bbtree.parent_tag
296
+ end
297
+
298
+ def parent_has_constraints_on_children?
299
+ @bbtree.parent_has_constraints_on_children?
300
+ end
301
+
302
+ end
303
+ end