RubyGems - jarrett-rbbcode - Versions diffs - 0.1.0 - Mend

jarrett-rbbcode 0.1.0

Files changed (16) hide show

data/MIT-LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+Copyright (c) 2004-2008 David Heinemeier Hansson
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README ADDED Viewed

@@ -0,0 +1,13 @@
+RbbCode is a customizable Ruby library for parsing BB Code.
+RbbCode validates and cleans input. It supports customizable schemas so you can set rules about what tags are allowed where. The default rules are designed to ensure valid HTML output.
+Example usage:
+require 'rubygems'
+require 'rbbcode'
+bb_code = 'This is [b]bold[/b] text'
+parser = RbbCode::Parser.new
+html = parser.parse(bb_code)
+# => 'This is <strong>bold</strong> text'

data/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.1.0

data/lib/rbbcode/html_maker.rb ADDED Viewed

@@ -0,0 +1,93 @@
+# TODO: Lists must be surrounded by </p> and <p>
+require 'cgi'
+module RbbCode
+	DEFAULT_TAG_MAPPINGS = {
+		'p' => 'p',
+		'br' => 'br',
+		'b' => 'strong',
+		'i' => 'em',
+		'u' => 'u',
+		'code' => 'code',
+		'quote' => 'blockquote',
+		'list' => 'ul',
+		'*' => 'li'
+	}
+	class HtmlMaker
+		def make_html(node)
+			output = ''
+			case node.class.to_s
+			when 'RbbCode::RootNode'
+				node.children.each do |child|
+					output << make_html(child)
+				end
+			when 'RbbCode::TagNode'
+				custom_tag_method = "html_from_#{node.tag_name}_tag"
+				if respond_to?(custom_tag_method)
+					output << send(custom_tag_method, node)
+				else
+					inner_html = ''
+					node.children.each do |child|
+						inner_html << make_html(child)
+					end
+					output << content_tag(map_tag_name(node.tag_name), inner_html)
+				end
+			when 'RbbCode::TextNode'
+				output << node.text
+			else
+				raise "Don't know how to make HTML from #{node.class}"
+			end
+			output
+		end
+		protected
+		def content_tag(tag_name, contents, attributes = {})
+			output = "<#{tag_name}"
+			attributes.each do |attr, value|
+				output << " #{attr}=\"#{value}\""
+			end
+			if contents.nil? or contents.empty?
+				output << '/>'
+			else
+				output << ">#{contents}</#{tag_name}>"
+			end
+		end
+		def html_from_img_tag(node)
+			src = sanitize_url(node.inner_bb_code)
+			content_tag('img', nil, {'src' => src, 'alt' => ''})
+		end
+		def html_from_url_tag(node)
+			inner_bb_code = node.inner_bb_code
+			if node.value.nil?
+				url = inner_bb_code
+			else
+				url = node.value
+			end
+			url = sanitize_url(url)
+			content_tag('a', inner_bb_code, {'href' => url})
+		end
+		def map_tag_name(tag_name)
+			unless DEFAULT_TAG_MAPPINGS.has_key?(tag_name)
+				raise "No tag mapping for '#{tag_name}'"
+			end
+			DEFAULT_TAG_MAPPINGS[tag_name]
+		end
+		def sanitize_url(url)
+			# Prepend a protocol if there isn't one
+			unless url.match(/^[a-zA-Z]+:\/\//)
+				url = 'http://' + url
+			end
+			# Replace all functional permutations of "javascript:" with a hex-encoded version of the same
+			url.gsub(/(\s*j\s*\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*):/i) do |match_str|
+				CGI::escape($1) + '%3A'
+			end
+		end
+	end
+end

data/lib/rbbcode/parser.rb ADDED Viewed

@@ -0,0 +1,25 @@
+module RbbCode
+	class Parser
+		def initialize(config = {})
+			@config = config
+		end
+		def parse(str)
+			str = escape_html_tags(str)
+			schema = @config[:schema] || RbbCode::Schema.new
+			tree_maker = @config[:tree_maker] || RbbCode::TreeMaker.new(schema)
+			tree = tree_maker.make_tree(str)
+			html_maker = @config[:html_maker] || RbbCode::HtmlMaker.new
+			html_maker.make_html(tree)
+		end
+		protected
+		def escape_html_tags(str)
+			str.gsub('<', '&lt;').gsub('>', '&gt;')
+		end
+	end
+end

data/lib/rbbcode/schema.rb ADDED Viewed

@@ -0,0 +1,235 @@
+module RbbCode
+	DEFAULT_ALLOWED_TAGS = [
+		'p',
+		'br',
+		'b',
+		'i',
+		'u',
+		'url',
+		'img',
+		'code',
+		'quote',
+		'list',
+		'*'
+	]
+	DEFAULT_BLOCK_LEVEL_ELEMENTS = [
+		'quote',
+		'list',
+		'*'
+	]
+	class SchemaNode
+		def initialize(schema)
+			@schema = schema
+		end
+		protected
+		def normalize_ancestors(ancestors)
+			if ancestors.length == 1 and ancestors[0].is_a?(Array)
+				ancestors = ancestors[0]
+			end
+			ancestors
+		end
+	end
+	class SchemaTag < SchemaNode
+		def initialize(schema, name)
+			@schema = schema
+			@name = name
+		end
+		def may_be_nested
+			@schema.allow_descent(@name, @name)
+			self
+		end
+		def may_contain_text
+			@schema.allow_text(@name)
+			self
+		end
+		def may_not_be_nested
+			@schema.forbid_descent(@name, @name)
+			self
+		end
+		def may_descend_from(tag_name)
+			@schema.allow_descent(tag_name, @name)
+			self
+		end
+		def may_only_be_parent_of(*tag_names)
+			@schema.forbid_children_except(@name, *tag_names)
+			self
+		end
+		def may_not_contain_text
+			@schema.forbid_text(@name)
+			self
+		end
+		def may_not_descend_from(tag_name)
+			@schema.forbid_descent(tag_name, @name)
+			self
+		end
+		def must_be_child_of(*tag_names)
+			@schema.require_parents(tag_names, @name)
+			self
+		end
+		def must_be_empty
+			@schema.forbid_children_except(@name, [])
+			may_not_contain_text
+			self
+		end
+		def need_not_be_child_of(tag_name)
+			@schema.unrequire_parent(tag_name, @name)
+			self
+		end
+		# Returns true if tag_name is valid in the context defined by its list of ancestors.
+		# ancestors should be ordered from most recent ancestor to most distant.
+		def valid_in_context?(*ancestors)
+			@schema.tag_valid_in_context?(@name, normalize_ancestors(ancestors))
+		end
+	end
+	class SchemaText < SchemaNode
+		def valid_in_context?(*ancestors)
+			@schema.text_valid_in_context?(normalize_ancestors(ancestors))
+		end
+	end
+	class Schema
+		def allow_descent(ancestor, descendant) #:nodoc:
+			if @forbidden_descent.has_key?(descendant.to_s) and @forbidden_descent[descendant.to_s].include?(ancestor.to_s)
+				@forbidden_descent[descendant.to_s].delete(ancestor.to_s)
+			end
+		end
+		def allow_tag(*tag_names)
+			tag_names.each do |tag_name|
+				unless @allowed_tags.include?(tag_name.to_s)
+					@allowed_tags << tag_name.to_s
+				end
+			end
+		end
+		def allow_text(tag_name)
+			@no_text.delete(tag_name.to_s)
+		end
+		def block_level?(tag_name)
+			DEFAULT_BLOCK_LEVEL_ELEMENTS.include?(tag_name.to_s)
+		end
+		alias_method :allow_tags, :allow_tag
+		def clear
+			@allowed_tags = []
+			@forbidden_descent = {}
+			@required_parents = {}
+			@no_text = []
+		end
+		def forbid_children_except(parent, children)
+			@child_requirements[parent.to_s] = children.collect { |c| c.to_s }
+		end
+		def forbid_descent(ancestor, descendant) #:nodoc:
+			@forbidden_descent[descendant.to_s] ||= []
+			unless @forbidden_descent[descendant.to_s].include?(ancestor.to_s)
+				@forbidden_descent[descendant.to_s] << ancestor.to_s
+			end
+		end
+		def forbid_tag(name)
+			@allowed_tags.delete(name.to_s)
+		end
+		def forbid_text(tag_name)
+			@no_text << tag_name.to_s unless @no_text.include?(tag_name.to_s)
+		end
+		def initialize
+			@allowed_tags = DEFAULT_ALLOWED_TAGS.dup
+			@forbidden_descent = {}
+			@required_parents = {}
+			@child_requirements = {}
+			@no_text = []
+			use_defaults
+		end
+		def line_break_tag_name
+			'br'
+		end
+		def paragraph_tag_name
+			'p'
+		end
+		def require_parents(parents, child) #:nodoc:
+			@required_parents[child.to_s] = parents.collect { |p| p.to_s }
+			parents.each do |parent|
+				if @forbidden_descent.has_key?(child.to_s)
+					@forbidden_descent[child.to_s].delete(parent)
+				end
+			end
+		end
+		def tag(name)
+			SchemaTag.new(self, name)
+		end
+		def tag_valid_in_context?(tag_name, ancestors)
+			return false unless @allowed_tags.include?(tag_name.to_s)
+			if @required_parents.has_key?(tag_name.to_s) and !@required_parents[tag_name.to_s].include?(ancestors[0].to_s)
+				return false
+			end
+			if @child_requirements.has_key?(ancestors[0].to_s) and !@child_requirements[ancestors[0].to_s].include?(tag_name.to_s)
+				return false
+			end
+			if @forbidden_descent.has_key?(tag_name.to_s)
+				@forbidden_descent[tag_name.to_s].each do |forbidden_ancestor|
+					return false if ancestors.include?(forbidden_ancestor)
+				end
+			end
+			return true
+		end
+		def text
+			SchemaText.new(self)
+		end
+		def text_valid_in_context?(*ancestors)
+			if @no_text.include?(ancestors[0].to_s)
+				return false
+			end
+			return true
+		end
+		def unrequire_parent(parent, child)
+			@required_parents.delete(child.to_s)
+		end
+		def use_defaults
+			tag('br').must_be_empty
+			tag('p').may_not_be_nested
+			tag('b').may_not_be_nested
+			tag('i').may_not_be_nested
+			tag('u').may_not_be_nested
+			tag('url').may_not_be_nested
+			tag('img').may_not_be_nested
+			tag('code').may_not_be_nested
+			tag('p').may_not_be_nested
+			tag('*').must_be_child_of('list')
+			tag('list').may_not_descend_from('p')
+			tag('list').may_only_be_parent_of('*')
+			tag('list').may_not_contain_text
+		end
+	end
+end

data/lib/rbbcode/tree_maker.rb ADDED Viewed

@@ -0,0 +1,321 @@
+require 'pp'
+module RbbCode
+	module CharCodes
+		CR_CODE = 13
+		LF_CODE = 10
+		L_BRACK_CODE = 91
+		R_BRACK_CODE = 93
+		SLASH_CODE = 47
+		LOWER_A_CODE = 97
+		LOWER_Z_CODE = 122
+		UPPER_A_CODE = 65
+		UPPER_Z_CODE = 90
+	end
+	class Node
+		def << (child)
+			@children << child
+		end
+		attr_accessor :children
+		def initialize(parent)
+			@parent = parent
+			@children = []
+		end
+		attr_accessor :parent
+	end
+	class RootNode < Node
+		def initialize
+			@children = []
+		end
+	end
+	class TextNode < Node
+		undef_method '<<'.to_sym
+		undef_method :children
+		def initialize(parent, text)
+			@parent = parent
+			@text = text
+		end
+		attr_accessor :text
+		def to_bb_code
+			@text
+		end
+	end
+	class TagNode < Node
+		def self.from_opening_bb_code(parent, bb_code)
+			if equal_index = bb_code.index('=')
+				tag_name = bb_code[1, equal_index - 1]
+				value = bb_code[(equal_index + 1)..-2]
+			else
+				tag_name = bb_code[1..-2]
+				value = nil
+			end
+			new(parent, tag_name, value)
+		end
+		def initialize(parent, tag_name, value = nil)
+			super(parent)
+			@tag_name = tag_name
+			@value = value
+		end
+		def inner_bb_code
+			@children.inject('') do |output, child|
+				output << child.to_bb_code
+			end
+		end
+		def to_bb_code
+			if @value.nil?
+				output = "[#{@tag_name}]"
+			else
+				output = "[#{@tag_name}=#{@value}]"
+			end
+			output << inner_bb_code << "[/#{@tag_name}]"
+		end
+		attr_reader :tag_name
+		attr_reader :value
+	end
+	class TreeMaker
+		include CharCodes
+		def initialize(schema)
+			@schema = schema
+		end
+		def make_tree(str)
+			delete_empty_paragraphs(parse_str(str))
+		end
+		protected
+		def ancestor_list(parent)
+			ancestors = []
+			while parent.is_a?(TagNode)
+				ancestors << parent.tag_name
+				parent = parent.parent
+			end
+			ancestors
+		end
+		def break_type(break_str)
+			if break_str.length > 2
+				:paragraph
+			elsif break_str.length == 1
+				:line_break
+			elsif break_str == "\r\n"
+				:line_break
+			else
+				:paragraph
+			end
+		end
+		def delete_empty_paragraphs(node)
+			node.children.reject! do |child|
+				if child.is_a?(TagNode)
+					if !child.children.empty?
+						delete_empty_paragraphs(child)
+						false
+					elsif child.tag_name == @schema.paragraph_tag_name
+						# It's an empty paragraph tag, so the reject! block should return true
+						true
+					else
+						false
+					end
+				else
+					false
+				end
+			end
+			node
+		end
+		def parse_str(str)
+			tree = RootNode.new
+			# Initially, we open a paragraph tag. If it turns out that the first thing we encounter
+			# is a block-level element, no problem: we'll be calling promote_block_level_elements
+			# later anyway.
+			current_parent = TagNode.new(tree, @schema.paragraph_tag_name)
+			tree << current_parent
+			current_token = ''
+			current_token_type = :unknown
+			str.each_byte do |char_code|
+				char = char_code.chr
+				case current_token_type
+				when :unknown
+					case char
+					when '['
+						current_token_type = :possible_tag
+						current_token << char
+					when "\r", "\n"
+						current_token_type = :break
+						current_token << char
+					else
+						if current_parent.is_a?(RootNode)
+							new_paragraph_tag = TagNode.new(current_parent, @schema.paragraph_tag_name)
+							current_parent << new_paragraph_tag
+							current_parent = new_paragraph_tag
+						end
+						current_token_type = :text
+						current_token << char
+					end
+				when :text
+					case char
+					when "["
+						if @schema.text_valid_in_context?(*ancestor_list(current_parent))
+							current_parent << TextNode.new(current_parent, current_token)
+						end
+						current_token = '['
+						current_token_type = :possible_tag
+					when "\r", "\n"
+						if @schema.text_valid_in_context?(*ancestor_list(current_parent))
+							current_parent << TextNode.new(current_parent, current_token)
+						end
+						current_token = char
+						current_token_type = :break
+					else
+						current_token << char
+					end
+				when :break
+					if char == CR_CODE or char_code == LF_CODE
+						current_token << char
+					else
+						if break_type(current_token) == :paragraph
+							while current_parent.is_a?(TagNode) and !@schema.block_level?(current_parent.tag_name) and current_parent.tag_name != @schema.paragraph_tag_name
+								current_parent = current_parent.parent
+							end
+							# The current parent might be a paragraph tag, in which case we should move up one more level.
+							# Otherwise, it might be a block-level element or a root node, in which case we should not move up.
+							if current_parent.is_a?(TagNode) and current_parent.tag_name == @schema.paragraph_tag_name
+								current_parent = current_parent.parent
+							end
+							# Regardless of whether the current parent is a block-level element, we need to open a new paragraph.
+							new_paragraph_node = TagNode.new(current_parent, @schema.paragraph_tag_name)
+							current_parent << new_paragraph_node
+							current_parent = new_paragraph_node
+						else # line break
+							prev_sibling = current_parent.children.last
+							if prev_sibling.is_a?(TagNode) and @schema.block_level?(prev_sibling.tag_name)
+								# Although the input only contains a single newline, we should
+								# interpret is as the start of a new paragraph, because the last
+								# thing we encountered was a block-level element.
+								new_paragraph_node = TagNode.new(current_parent, @schema.paragraph_tag_name)
+								current_parent << new_paragraph_node
+								current_parent = new_paragraph_node
+							elsif @schema.tag(@schema.line_break_tag_name).valid_in_context?(*ancestor_list(current_parent))
+								current_parent << TagNode.new(current_parent, @schema.line_break_tag_name)
+							end
+						end
+						if char == '['
+							current_token = '['
+							current_token_type = :possible_tag
+						else
+							current_token = char
+							current_token_type = :text
+						end
+					end
+				when :possible_tag
+					case char
+					when '['
+						current_parent << TextNode.new(current_parent, '[')
+						# No need to reset current_token or current_token_type
+					when '/'
+						current_token_type = :closing_tag
+						current_token << '/'
+					else
+						if tag_name_char?(char_code)
+							current_token_type = :opening_tag
+							current_token << char
+						elsif tag_name
+							current_token_type = :text
+							current_token << char
+						end
+					end
+				when :opening_tag
+					if tag_name_char?(char_code) or char == '='
+						current_token << char
+					elsif char == ']'
+						current_token << ']'
+						tag_node = TagNode.from_opening_bb_code(current_parent, current_token)
+						if @schema.block_level?(tag_node.tag_name) and current_parent.tag_name == @schema.paragraph_tag_name
+							# If there is a line break before this, it's superfluous and should be deleted
+							prev_sibling = current_parent.children.last
+							if prev_sibling.is_a?(TagNode) and prev_sibling.tag_name == @schema.line_break_tag_name
+								current_parent.children.pop
+							end
+							# Promote a block-level element
+							current_parent = current_parent.parent
+							tag_node.parent = current_parent
+							current_parent << tag_node
+							current_parent = tag_node
+							# If all of this results in empty paragraph tags, no worries: they will be deleted later.
+						elsif @schema.tag(tag_node.tag_name).valid_in_context?(*ancestor_list(current_parent))
+							current_parent << tag_node
+							current_parent = tag_node
+						end # else, don't do anything--the tag is invalid and will be ignored
+						current_token_type = :unknown
+						current_token = ''
+					elsif char == "\r" or char == "\n"
+						current_parent << TextNode.new(current_parent, current_token)
+						current_token = char
+						current_token_type = :break
+					elsif current_token.include?('=')
+						current_token << char
+					else
+						current_token_type = :text
+						current_token << char
+					end
+				when :closing_tag
+					if tag_name_char?(char_code)
+						current_token << char
+					elsif char == ']'
+						original_parent = current_parent
+						while current_parent.is_a?(TagNode) and current_parent.tag_name != current_token[2..-1]
+							current_parent = current_parent.parent
+						end
+						if current_parent.is_a?(TagNode)
+							current_parent = current_parent.parent
+						else # current_parent is a RootNode
+							# we made it to the top of the tree, and never found the tag to close
+							# so we'll just ignore the closing tag altogether
+							current_parent = original_parent
+						end
+						current_token_type = :unknown
+						current_token = ''
+					elsif char == "\r" or char == "\n"
+						current_parent << TextNode.new(current_parent, current_token)
+						current_token = char
+						current_token_type = :break
+					else
+						current_token_type = :text
+						current_token << char
+					end
+				end
+			end
+			# Handle whatever's left in the current token
+			if current_token_type != :break and !current_token.empty?
+				current_parent << TextNode.new(current_parent, current_token)
+			end
+			tree
+		end
+		def tag_name_char?(char_code)
+			(char_code >= LOWER_A_CODE and char_code <= LOWER_Z_CODE) or (char_code >= UPPER_A_CODE and char_code <= UPPER_Z_CODE) or char_code.chr == '*'
+		end
+	end
+end

data/lib/rbbcode.rb ADDED Viewed

@@ -0,0 +1,6 @@
+$: << File.expand_path(File.dirname(__FILE__))
+require 'rbbcode/parser'
+require 'rbbcode/schema'
+require 'rbbcode/tree_maker'
+require 'rbbcode/html_maker'

data/rbbcode.gemspec ADDED Viewed

@@ -0,0 +1,55 @@
+# -*- encoding: utf-8 -*-
+Gem::Specification.new do |s|
+  s.name = %q{rbbcode}
+  s.version = "0.1.0"
+  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
+  s.authors = ["Jarrett Colby"]
+  s.date = %q{2009-05-31}
+  s.description = %q{RbbCode is a customizable Ruby library for parsing BB Code. RbbCode validates and cleans input. It supports customizable schemas so you can set rules about what tags are allowed where. The default rules are designed to ensure valid HTML output.}
+  s.email = %q{jarrett@jarrettcolby.com}
+  s.extra_rdoc_files = [
+    "README"
+  ]
+  s.files = [
+    "MIT-LICENSE",
+     "README",
+     "VERSION",
+     "lib/rbbcode.rb",
+     "lib/rbbcode/html_maker.rb",
+     "lib/rbbcode/parser.rb",
+     "lib/rbbcode/schema.rb",
+     "lib/rbbcode/tree_maker.rb",
+     "rbbcode.gemspec",
+     "spec/html_maker_spec.rb",
+     "spec/node_spec_helper.rb",
+     "spec/parser_spec.rb",
+     "spec/schema_spec.rb",
+     "spec/spec_helper.rb",
+     "spec/tree_maker_spec.rb"
+  ]
+  s.homepage = %q{http://github.com/jarrett/rbbcode}
+  s.rdoc_options = ["--charset=UTF-8"]
+  s.require_paths = ["lib"]
+  s.rubygems_version = %q{1.3.3}
+  s.summary = %q{Ruby BB Code parser}
+  s.test_files = [
+    "spec/html_maker_spec.rb",
+     "spec/node_spec_helper.rb",
+     "spec/parser_spec.rb",
+     "spec/schema_spec.rb",
+     "spec/spec_helper.rb",
+     "spec/tree_maker_spec.rb"
+  ]
+  if s.respond_to? :specification_version then
+    current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
+    s.specification_version = 3
+    if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
+    else
+    end
+  else
+  end
+end

data/spec/html_maker_spec.rb ADDED Viewed

@@ -0,0 +1,70 @@
+require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
+require File.expand_path(File.dirname(__FILE__) + '/node_spec_helper')
+describe RbbCode::HtmlMaker do
+	context '#make_html' do
+		def expect_html(expected_html, &block)
+			@html_maker.make_html(NodeBuilder.build(&block)).should == expected_html
+		end
+		before :each do
+			@html_maker = RbbCode::HtmlMaker.new
+		end
+		it 'should replace simple BB code tags with HTML tags' do
+			expect_html('<p>This is <strong>bold</strong> text</p>') do
+				tag('p') do
+					text 'This is '
+					tag('b') { text 'bold' }
+					text ' text'
+				end
+			end
+		end
+		it 'should work for nested tags' do
+			expect_html('<p>This is <strong>bold and <u>underlined</u></strong> text</p>') do
+				tag('p') do
+					text 'This is '
+					tag('b') do
+						text 'bold and '
+						tag('u') { text 'underlined' }
+					end
+					text ' text'
+				end
+			end
+		end
+		it 'should not allow JavaScript in URLs' do
+			urls = {
+				'javascript:alert("foo");' => 'http://javascript%3Aalert("foo");',
+				'j a v a script:alert("foo");' => 'http://j+a+v+a+script%3Aalert("foo");',
+				' javascript:alert("foo");' => 'http://+javascript%3Aalert("foo");',
+				'JavaScript:alert("foo");' => 'http://JavaScript%3Aalert("foo");' ,
+				"java\nscript:alert(\"foo\");" => 'http://java%0Ascript%3Aalert("foo");',
+				"java\rscript:alert(\"foo\");" => 'http://java%0Dscript%3Aalert("foo");'
+			}
+			# url tag
+			urls.each do |evil_url, clean_url|
+				expect_html("<p><a href=\"#{clean_url}\">foo</a></p>") do
+					tag('p') do
+						tag('url', evil_url) do
+							text 'foo'
+						end
+					end
+				end
+			end
+			# img tag
+			urls.each do |evil_url, clean_url|
+				expect_html("<p><img src=\"#{clean_url}\" alt=\"\"/></p>") do
+					tag('p') do
+						tag('img') do
+							text evil_url
+						end
+					end
+				end
+			end
+		end
+	end
+end

data/spec/node_spec_helper.rb ADDED Viewed

@@ -0,0 +1,114 @@
+module RbbCode
+	class RootNode
+		def == (other_node)
+			self.class == other_node.class and self.children == other_node.children
+		end
+		def print_tree(indent = 0)
+			output = ''
+			indent.times { output << "  " }
+			output << 'ROOT'
+			children.each do |child|
+				output << "\n" << child.print_tree(indent + 1)
+			end
+			output << "\n/ROOT"
+			output
+		end
+	end
+	class TagNode
+		def == (other_node)
+			self.class == other_node.class and self.tag_name == other_node.tag_name and self.value == other_node.value and self.children == other_node.children
+		end
+		def print_tree(indent = 0)
+			output = ''
+			indent.times { output << "  " }
+			if value.nil?
+				output << "[#{tag_name}]"
+			else
+				output << "[#{tag_name}=#{value}]"
+			end
+			children.each do |child|
+				output << "\n" << child.print_tree(indent + 1)
+			end
+			output << "\n"
+			indent.times { output << "  " }
+			output << "[/#{tag_name}]"
+			output
+		end
+	end
+	class TextNode
+		def == (other_node)
+			self.class == other_node.class and self.text == other_node.text
+		end
+		def print_tree(indent = 0)
+			output = ''
+			indent.times { output << "  " }
+			output << '"' << text << '"'
+		end
+	end
+end
+class NodeBuilder
+	include RbbCode
+	def self.build(&block)
+		builder = new
+		builder.instance_eval(&block)
+		builder.root
+	end
+	attr_reader :root
+	protected
+	def << (node)
+		@current_parent.children << node
+	end
+	def initialize
+		@root = RootNode.new
+		@current_parent = @root
+	end
+	def text(contents, &block)
+		self << TextNode.new(@current_parent, contents)
+	end
+	def tag(tag_name, value = nil, &block)
+		tag_node = TagNode.new(@current_parent, tag_name, value)
+		self << tag_node
+		original_parent = @current_parent
+		@current_parent = tag_node
+		instance_eval(&block)
+		@current_parent = original_parent
+	end
+end
+module NodeMatchers
+	class MatchNode
+		def initialize(expected_tree)
+			@expected_tree = expected_tree
+		end
+		def matches?(target)
+			@target = target
+			@target == @expected_tree
+		end
+		def failure_message
+			"Expected:\n\n#{@expected_tree.print_tree}\n\nbut got:\n\n#{@target.print_tree}"
+		end
+		def negative_failure_message
+			"Expected anything other than:\n\n#{@expected_tree.print_tree}"
+		end
+	end
+	def match_node(expected_node)
+		MatchNode.new(expected_node)
+	end
+end

data/spec/parser_spec.rb ADDED Viewed

@@ -0,0 +1,75 @@
+require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
+describe RbbCode::Parser do
+	context '#parse_bb_code' do
+		before :each do
+			@parser = RbbCode::Parser.new
+		end
+		it 'should create paragraphs and line breaks' do
+			bb_code = "This is one paragraph.\n\nThis is another paragraph."
+			@parser.parse(bb_code).should == '<p>This is one paragraph.</p><p>This is another paragraph.</p>'
+			bb_code = "This is one line.\nThis is another line."
+			@parser.parse(bb_code).should == '<p>This is one line.<br/>This is another line.</p>'
+		end
+		it 'should turn [b] to <strong>' do
+			@parser.parse('This is [b]bold[/b] text').should == '<p>This is <strong>bold</strong> text</p>'
+		end
+		it 'should turn [i] to <em> by default' do
+			@parser.parse('This is [i]italic[/i] text').should == '<p>This is <em>italic</em> text</p>'
+		end
+		it 'should turn [u] to <u>' do
+			@parser.parse('This is [u]underlined[/u] text').should == '<p>This is <u>underlined</u> text</p>'
+		end
+		it 'should turn [url]http://google.com[/url] to a link' do
+			@parser.parse('Visit [url]http://google.com[/url] now').should == '<p>Visit <a href="http://google.com">http://google.com</a> now</p>'
+		end
+		it 'should turn [url=http://google.com]Google[/url] to a link' do
+			@parser.parse('Visit [url=http://google.com]Google[/url] now').should == '<p>Visit <a href="http://google.com">Google</a> now</p>'
+		end
+		it 'should turn [img] to <img>' do
+			@parser.parse('[img]http://example.com/image.jpg[/img]').should == '<p><img src="http://example.com/image.jpg" alt=""/></p>'
+		end
+		it 'should turn [code] to <code>' do
+			@parser.parse('Too bad [code]method_missing[/code] is rarely useful').should == '<p>Too bad <code>method_missing</code> is rarely useful</p>'
+		end
+		it 'should parse nested tags' do
+			@parser.parse('[b][i]This is bold-italic[/i][/b]').should == '<p><strong><em>This is bold-italic</em></strong></p>'
+		end
+		it 'should not put <p> tags around <ul> tags' do
+			@parser.parse("Text.\n\n[list]\n[*]Foo[/*]\n[*]Bar[/*]\n[/list]\n\nMore text.").should == '<p>Text.</p><ul><li>Foo</li><li>Bar</li></ul><p>More text.</p>'
+		end
+		it 'should ignore forbidden or unrecognized tags' do
+			@parser.parse('There is [foo]no such thing[/foo] as a foo tag').should == '<p>There is no such thing as a foo tag</p>'
+		end
+		it 'should recover gracefully from malformed or improperly matched tags' do
+			@parser.parse('This [i/]tag[/i] is malformed').should == '<p>This [i/]tag is malformed</p>'
+			@parser.parse('This [i]]tag[/i] is malformed').should == '<p>This <em>]tag</em> is malformed</p>'
+			@parser.parse('This [i]tag[[/i] is malformed').should == '<p>This <em>tag[</em> is malformed</p>'
+			@parser.parse('This [i]tag[//i] is malformed').should == '<p>This <em>tag[//i] is malformed</em></p>'
+			@parser.parse('This [[i]tag[/i] is malformed').should == '<p>This [<em>tag</em> is malformed</p>'
+			@parser.parse('This [i]tag[/i]] is malformed').should == '<p>This <em>tag</em>] is malformed</p>'
+			@parser.parse('This [i]i tag[i] is not properly matched').should == '<p>This <em>i tag is not properly matched</em></p>'
+			@parser.parse('This i tag[/i] is not properly matched').should == '<p>This i tag is not properly matched</p>'
+		end
+		it 'should escape < and >' do
+			@parser.parse('This is [i]italic[/i], but this it not <i>italic</i>.').should == '<p>This is <em>italic</em>, but this it not &lt;i&gt;italic&lt;/i&gt;.</p>'
+		end
+		it 'should work when the string begins with a tag' do
+			@parser.parse('[b]This is bold[/b]').should == '<p><strong>This is bold</strong></p>'
+		end
+	end
+end

data/spec/schema_spec.rb ADDED Viewed

@@ -0,0 +1,98 @@
+require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
+describe RbbCode::Schema do
+	before :each do
+		@schema = RbbCode::Schema.new
+		@schema.clear
+		@schema.allow_tags(*RbbCode::DEFAULT_ALLOWED_TAGS)
+	end
+	it 'should allow the default tags at the top level' do
+		schema = RbbCode::Schema.new
+		[
+			'b',
+			'i',
+			'u',
+			'url',
+			'img',
+			'code',
+			'quote',
+			'list'
+		].each do |tag|
+			schema.tag(tag).valid_in_context?().should == true
+		end
+	end
+	it 'should not allow unknown tags' do
+		@schema.tag('foo').valid_in_context?().should == false
+	end
+	it 'should return a new SchemaTag object when tag is called' do
+		@schema.tag('b').should be_a(RbbCode::SchemaTag)
+	end
+	it 'should not allow nesting a tag when may_not_be_nested is called on it' do
+		@schema.tag('b').may_not_be_nested
+		@schema.tag('b').valid_in_context?('b').should == false
+	end
+	it 'should allow nesting a tag when may_be_nested is called on it' do
+		@schema.tag('b').may_not_be_nested
+		@schema.tag('b').may_be_nested
+		@schema.tag('b').valid_in_context?('b').should == true
+	end
+	it 'should not allow a tag to descend from another when forbidden by may_not_descend_from' do
+		@schema.tag('b').may_not_descend_from('u')
+		@schema.tag('b').valid_in_context?('u').should == false
+	end
+	it 'should allow a tag to descend from another when permitted by may_descend_from' do
+		@schema.tag('b').may_not_descend_from('u')
+		@schema.tag('b').may_descend_from('u')
+		@schema.tag('b').valid_in_context?('u').should == true
+	end
+	it 'should not allow a tag to descend from anything other than the tags specified in must_be_child_of' do
+		@schema.tag('b').must_be_child_of('u', 'quote')
+		@schema.tag('b').valid_in_context?('i').should == false
+		@schema.tag('b').valid_in_context?('u').should == true
+		@schema.tag('b').valid_in_context?('quote').should == true
+	end
+	it 'should allow a tag to descend from the one specified in must_be_child_of' do
+		@schema.tag('b').may_not_descend_from('u')
+		@schema.tag('b').must_be_child_of('u')
+		@schema.tag('b').valid_in_context?('u').should == true
+	end
+	it 'should not require a tag to be a child of another when need_not_be_child_of is called' do
+		@schema.tag('b').must_be_child_of('u')
+		@schema.tag('b').need_not_be_child_of('u')
+		@schema.tag('b').valid_in_context?('i').should == true
+	end
+	it 'should allow only the specified tag as a child when may_only_be_parent_of is called' do
+		@schema.tag('list').may_only_be_parent_of('*')
+		@schema.tag('*').valid_in_context?('list').should == true
+		@schema.tag('u').valid_in_context?('list').should == false
+		@schema.tag('u').valid_in_context?('*', 'list').should == true
+	end
+	it 'should not allow text inside a tag when may_not_contain_text is called' do
+		@schema.tag('list').may_not_contain_text
+		@schema.text.valid_in_context?('list').should == false
+	end
+	it 'should allow text inside a tag when may_contain_text is called' do
+		@schema.tag('list').may_not_contain_text
+		@schema.tag('list').may_contain_text
+		@schema.text.valid_in_context?('list').should == true
+	end
+	it 'should not allow text or children when must_be_empty is called' do
+		@schema.tag('br').must_be_empty
+		@schema.text.valid_in_context?('br').should == false
+		@schema.tag('b').valid_in_context?('br').should == false
+	end
+end

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,9 @@
+require 'rubygems'
+require 'test/unit'
+require 'spec'
+def puts(foo)
+	raise 'puts called'
+end
+require File.expand_path(File.dirname(__FILE__) + '/../lib/rbbcode')

data/spec/tree_maker_spec.rb ADDED Viewed

@@ -0,0 +1,107 @@
+require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
+require File.expand_path(File.dirname(__FILE__) + '/node_spec_helper')
+require 'pp'
+describe RbbCode::TreeMaker do
+	include NodeMatchers
+	context '#make_tree' do
+		def expect_tree(str, &block)
+			expected = NodeBuilder.build(&block)
+			@tree_maker.make_tree(str).should match_node(expected)
+		end
+		before :each do
+			@schema = RbbCode::Schema.new
+			@tree_maker = RbbCode::TreeMaker.new(@schema)
+		end
+		it 'should make a tree from a string with one tag' do
+			str = 'This is [b]bold[/b] text'
+			expect_tree(str) do
+				tag('p') do
+					text 'This is '
+					tag('b') { text 'bold' }
+					text ' text'
+				end
+			end
+		end
+		it 'should ignore tags that are invalid in their context' do
+			@schema.tag('u').may_not_descend_from('b')
+			str = 'This is [b]bold and [u]underlined[/u][/b] text'
+			expect_tree(str) do
+				tag('p') do
+					text 'This is '
+					tag('b') do
+						text 'bold and '
+						text 'underlined'
+					end
+					text ' text'
+				end
+			end
+		end
+		it 'should create paragraph tags' do
+			str = "This is a paragraph.\n\nThis is another."
+			expect_tree(str) do
+				tag('p') do
+					text 'This is a paragraph.'
+				end
+				tag('p') do
+					text 'This is another.'
+				end
+			end
+		end
+		it 'should not put block-level elements inside paragraph tags' do
+			str = "This is a list:\n\n[list]\n\n[*]Foo[/i]\n\n[/list]\n\nwith some text after it"
+			expect_tree(str) do
+				tag('p') do
+					text 'This is a list:'
+				end
+				tag('list') do
+					tag('*') { text 'Foo' }
+				end
+				tag('p') do
+					text 'with some text after it'
+				end
+			end
+		end
+		it 'should not insert br tags in the midst of block-level elements' do
+			str = "List:\n[list]\n[*]Foo[/*]\n[*]Bar[/*]\n[/list]\nText after list"
+			expect_tree(str) do
+				tag('p') do
+					text 'List:'
+				end
+				tag('list') do
+					tag('*') { text 'Foo' }
+					tag('*') { text 'Bar' }
+				end
+				tag('p') do
+					text 'Text after list'
+				end
+			end
+		end
+		it 'should store tag values' do
+			str = 'This is a [url=http://google.com]link[/url]'
+			expect_tree(str) do
+				tag('p') do
+					text 'This is a '
+					tag('url', 'http://google.com') do
+						text 'link'
+					end
+				end
+			end
+		end
+	end
+end

metadata ADDED Viewed

@@ -0,0 +1,72 @@
+--- !ruby/object:Gem::Specification
+name: jarrett-rbbcode
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Jarrett Colby
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2009-05-31 00:00:00 -07:00
+default_executable:
+dependencies: []
+description: RbbCode is a customizable Ruby library for parsing BB Code. RbbCode validates and cleans input. It supports customizable schemas so you can set rules about what tags are allowed where. The default rules are designed to ensure valid HTML output.
+email: jarrett@jarrettcolby.com
+executables: []
+extensions: []
+extra_rdoc_files:
+- README
+files:
+- MIT-LICENSE
+- README
+- VERSION
+- lib/rbbcode.rb
+- lib/rbbcode/html_maker.rb
+- lib/rbbcode/parser.rb
+- lib/rbbcode/schema.rb
+- lib/rbbcode/tree_maker.rb
+- rbbcode.gemspec
+- spec/html_maker_spec.rb
+- spec/node_spec_helper.rb
+- spec/parser_spec.rb
+- spec/schema_spec.rb
+- spec/spec_helper.rb
+- spec/tree_maker_spec.rb
+has_rdoc: false
+homepage: http://github.com/jarrett/rbbcode
+post_install_message:
+rdoc_options:
+- --charset=UTF-8
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+requirements: []
+rubyforge_project:
+rubygems_version: 1.2.0
+signing_key:
+specification_version: 3
+summary: Ruby BB Code parser
+test_files:
+- spec/html_maker_spec.rb
+- spec/node_spec_helper.rb
+- spec/parser_spec.rb
+- spec/schema_spec.rb
+- spec/spec_helper.rb
+- spec/tree_maker_spec.rb