RubyGems - rbbcode - Versions diffs - 0.1.3 → 0.1.4 - Mend

rbbcode 0.1.3 → 0.1.4

Files changed (7) hide show

data/{README → README.markdown} RENAMED Viewed

@@ -1,3 +1,11 @@
+The gem is fixed!
+=================
+Due to a defective gemspec, at least one version (0.1.3) wasn't packaging the lib directory in the .gem file. This has been corrected in 0.1.4. Sorry for the delay in fixing this.
+About RbbCode
+=============
 RbbCode is a customizable Ruby library for parsing BB Code.
 RbbCode validates and cleans input. It supports customizable schemas so you can set rules about what tags are allowed where. The default rules are designed to ensure valid HTML output.
@@ -19,18 +27,18 @@ You can customize RbbCode by subclassing HtmlMaker and/or by passing configurati
 HtmlMaker can be extended by adding methods like this:
-class MyHtmlMaker < RbbCode::HtmlMaker
-  def html_from_TAGNAME_tag(node)
-    # ...
+  class MyHtmlMaker < RbbCode::HtmlMaker
+    def html_from_TAGNAME_tag(node)
+      # ...
+    end
   end
-end
 ...where TAGNAME should be replaced with the name of the tag. The method should accept an RbbCode::TagNode and return HTML as a string. (See tree_maker.rb for the definition of RbbCode::TagNode.) Anytime the parser encounters the specified tag, it will call your method and insert the returned HTML into the output.
 Now you just have to tell the Parser object to use an instance of your custom subclass instead of the default HtmlMaker:
-my_html_maker = MyHtmlMaker.new
-parser = RbbCode::Parser.new(:html_maker => my_html_maker)
+  my_html_maker = MyHtmlMaker.new
+  parser = RbbCode::Parser.new(:html_maker => my_html_maker)
 RbbCode removes invalid markup by comparing the input against a Schema object. The Schema is much like a DTD in XML. You can set your own rules and change the default ones by calling configuration methods on a Schema instance. Look at Schema#use_defaults in schema.rb for examples.

data/lib/rbbcode/html_maker.rb ADDED Viewed

@@ -0,0 +1,96 @@
+# TODO: Lists must be surrounded by </p> and <p>
+require 'cgi'
+module RbbCode
+	DEFAULT_TAG_MAPPINGS = {
+		'p' => 'p',
+		'br' => 'br',
+		'b' => 'strong',
+		'i' => 'em',
+		'u' => 'u',
+		'code' => 'code',
+		'quote' => 'blockquote',
+		'list' => 'ul',
+		'*' => 'li'
+	}
+	class HtmlMaker
+		def make_html(node)
+			output = ''
+			case node.class.to_s
+			when 'RbbCode::RootNode'
+				node.children.each do |child|
+					output << make_html(child)
+				end
+			when 'RbbCode::TagNode'
+				custom_tag_method = "html_from_#{node.tag_name}_tag"
+				if respond_to?(custom_tag_method)
+					output << send(custom_tag_method, node)
+				else
+					inner_html = ''
+					node.children.each do |child|
+						inner_html << make_html(child)
+					end
+					output << content_tag(map_tag_name(node.tag_name), inner_html)
+				end
+			when 'RbbCode::TextNode'
+				output << node.text
+			else
+				raise "Don't know how to make HTML from #{node.class}"
+			end
+			output
+		end
+		protected
+		def content_tag(tag_name, contents, attributes = {})
+			output = "<#{tag_name}"
+			attributes.each do |attr, value|
+				output << " #{attr}=\"#{value}\""
+			end
+			if contents.nil? or contents.empty?
+				output << '/>'
+			else
+				output << ">#{contents}</#{tag_name}>"
+			end
+		end
+		def html_from_img_tag(node)
+			src = sanitize_url(node.inner_bb_code)
+			content_tag('img', nil, {'src' => src, 'alt' => ''})
+		end
+		def html_from_url_tag(node)
+			if node.value.nil?
+				url = node.inner_bb_code
+			else
+				url = node.value
+			end
+			url = sanitize_url(url)
+			inner_html = node.children.inject('') do |output, child|
+				output + make_html(child)
+			end
+			content_tag('a', inner_html, {'href' => url})
+		end
+		def map_tag_name(tag_name)
+			unless DEFAULT_TAG_MAPPINGS.has_key?(tag_name)
+				raise "No tag mapping for '#{tag_name}'"
+			end
+			DEFAULT_TAG_MAPPINGS[tag_name]
+		end
+		def sanitize_url(url)
+			# Prepend a protocol if there isn't one
+			unless url.match(/^[a-zA-Z]+:\/\//)
+				url = 'http://' + url
+			end
+			# Replace all functional permutations of "javascript:" with a hex-encoded version of the same
+			url.gsub!(/(\s*j\s*\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*):/i) do |match_str|
+				'%' + $1.unpack('H2' * $1.length).join('%').upcase + '%3A'
+			end
+			url.gsub('"', '%22')
+		end
+	end
+end

data/lib/rbbcode/parser.rb ADDED Viewed

@@ -0,0 +1,32 @@
+module RbbCode
+	class Parser
+		def initialize(config = {})
+			config.each_key do |key|
+				raise(ArgumentError, "Unknown option #{key}") unless known_options.include?(key)
+			end
+			@config = config
+		end
+		def parse(str)
+			str = escape_html_tags(str)
+			schema = @config[:schema] || RbbCode::Schema.new
+			tree_maker = @config[:tree_maker] || RbbCode::TreeMaker.new(schema)
+			tree = tree_maker.make_tree(str)
+			html_maker = @config[:html_maker] || RbbCode::HtmlMaker.new
+			html_maker.make_html(tree)
+		end
+		protected
+		def escape_html_tags(str)
+			str.gsub('<', '&lt;').gsub('>', '&gt;')
+		end
+		def known_options
+			[:schema, :tree_maker, :html_maker]
+		end
+	end
+end

data/lib/rbbcode/schema.rb ADDED Viewed

@@ -0,0 +1,256 @@
+module RbbCode
+	DEFAULT_ALLOWED_TAGS = [
+		'p',
+		'br',
+		'b',
+		'i',
+		'u',
+		'url',
+		'img',
+		'code',
+		'quote',
+		'list',
+		'*'
+	]
+	DEFAULT_BLOCK_LEVEL_ELEMENTS = [
+		'quote',
+		'list',
+		'*'
+	]
+	class SchemaNode
+		def initialize(schema)
+			@schema = schema
+		end
+		protected
+		def normalize_ancestors(ancestors)
+			if ancestors.length == 1 and ancestors[0].is_a?(Array)
+				ancestors = ancestors[0]
+			end
+			ancestors
+		end
+	end
+	class SchemaTag < SchemaNode
+		def initialize(schema, name)
+			@schema = schema
+			@name = name
+		end
+		def may_be_nested
+			@schema.allow_descent(@name, @name)
+			self
+		end
+		def may_contain_text
+			@schema.allow_text(@name)
+			self
+		end
+		def may_not_be_empty
+			@schema.forbid_emptiness(@name)
+		end
+		def may_not_be_nested
+			@schema.forbid_descent(@name, @name)
+			self
+		end
+		def may_descend_from(tag_name)
+			@schema.allow_descent(tag_name, @name)
+			self
+		end
+		def may_only_be_parent_of(*tag_names)
+			@schema.forbid_children_except(@name, *tag_names)
+			self
+		end
+		def may_not_contain_text
+			@schema.forbid_text(@name)
+			self
+		end
+		def may_not_descend_from(tag_name)
+			@schema.forbid_descent(tag_name, @name)
+			self
+		end
+		def must_be_child_of(*tag_names)
+			@schema.require_parents(tag_names, @name)
+			self
+		end
+		def must_be_empty
+			@schema.forbid_children_except(@name, [])
+			may_not_contain_text
+			self
+		end
+		def need_not_be_child_of(tag_name)
+			@schema.unrequire_parent(tag_name, @name)
+			self
+		end
+		# Returns true if tag_name is valid in the context defined by its list of ancestors.
+		# ancestors should be ordered from most recent ancestor to most distant.
+		def valid_in_context?(*ancestors)
+			@schema.tag_valid_in_context?(@name, normalize_ancestors(ancestors))
+		end
+	end
+	class SchemaText < SchemaNode
+		def valid_in_context?(*ancestors)
+			@schema.text_valid_in_context?(normalize_ancestors(ancestors))
+		end
+	end
+	class Schema
+		def allow_descent(ancestor, descendant) #:nodoc:
+			if @forbidden_descent.has_key?(descendant.to_s) and @forbidden_descent[descendant.to_s].include?(ancestor.to_s)
+				@forbidden_descent[descendant.to_s].delete(ancestor.to_s)
+			end
+		end
+		def allow_emptiness(tag_name)
+			@never_empty.delete(tag_name.to_s)
+		end
+		def allow_tag(*tag_names)
+			tag_names.each do |tag_name|
+				unless @allowed_tags.include?(tag_name.to_s)
+					@allowed_tags << tag_name.to_s
+				end
+			end
+		end
+		def allow_text(tag_name)
+			@no_text.delete(tag_name.to_s)
+		end
+		def block_level?(tag_name)
+			DEFAULT_BLOCK_LEVEL_ELEMENTS.include?(tag_name.to_s)
+		end
+		alias_method :allow_tags, :allow_tag
+		def clear
+			@allowed_tags = []
+			@never_empty = []
+			@forbidden_descent = {}
+			@required_parents = {}
+			@no_text = []
+		end
+		def forbid_children_except(parent, children)
+			@child_requirements[parent.to_s] = children.collect { |c| c.to_s }
+		end
+		def forbid_descent(ancestor, descendant) #:nodoc:
+			@forbidden_descent[descendant.to_s] ||= []
+			unless @forbidden_descent[descendant.to_s].include?(ancestor.to_s)
+				@forbidden_descent[descendant.to_s] << ancestor.to_s
+			end
+		end
+		def forbid_emptiness(tag_name)
+			@never_empty << tag_name.to_s unless @never_empty.include?(tag_name.to_s)
+		end
+		def forbid_tag(name)
+			@allowed_tags.delete(name.to_s)
+		end
+		def forbid_text(tag_name)
+			@no_text << tag_name.to_s unless @no_text.include?(tag_name.to_s)
+		end
+		def initialize
+			@allowed_tags = DEFAULT_ALLOWED_TAGS.dup
+			@forbidden_descent = {}
+			@required_parents = {}
+			@child_requirements = {}
+			@never_empty = []
+			@no_text = []
+			use_defaults
+		end
+		def line_break_tag_name
+			'br'
+		end
+		def paragraph_tag_name
+			'p'
+		end
+		def require_parents(parents, child) #:nodoc:
+			@required_parents[child.to_s] = parents.collect { |p| p.to_s }
+			parents.each do |parent|
+				if @forbidden_descent.has_key?(child.to_s)
+					@forbidden_descent[child.to_s].delete(parent)
+				end
+			end
+		end
+		def tag(name)
+			SchemaTag.new(self, name)
+		end
+		def tag_may_be_empty?(tag_name)
+			!@never_empty.include?(tag_name.to_s)
+		end
+		def tag_valid_in_context?(tag_name, ancestors)
+			return false unless @allowed_tags.include?(tag_name.to_s)
+			if @required_parents.has_key?(tag_name.to_s) and !@required_parents[tag_name.to_s].include?(ancestors[0].to_s)
+				return false
+			end
+			if @child_requirements.has_key?(ancestors[0].to_s) and !@child_requirements[ancestors[0].to_s].include?(tag_name.to_s)
+				return false
+			end
+			if @forbidden_descent.has_key?(tag_name.to_s)
+				@forbidden_descent[tag_name.to_s].each do |forbidden_ancestor|
+					return false if ancestors.include?(forbidden_ancestor)
+				end
+			end
+			return true
+		end
+		def text
+			SchemaText.new(self)
+		end
+		def text_valid_in_context?(*ancestors)
+			if @no_text.include?(ancestors[0].to_s)
+				return false
+			end
+			return true
+		end
+		def unrequire_parent(parent, child)
+			@required_parents.delete(child.to_s)
+		end
+		def use_defaults
+			tag('br').must_be_empty
+			tag('p').may_not_be_nested
+			tag('b').may_not_be_nested
+			tag('b').may_not_be_empty
+			tag('i').may_not_be_nested
+			tag('i').may_not_be_empty
+			tag('u').may_not_be_nested
+			tag('u').may_not_be_empty
+			tag('url').may_not_be_nested
+			tag('img').may_not_be_nested
+			tag('code').may_not_be_nested
+			tag('p').may_not_be_nested
+			tag('*').must_be_child_of('list')
+			tag('list').may_not_descend_from('p')
+			tag('list').may_only_be_parent_of('*')
+			tag('list').may_not_contain_text
+		end
+	end
+end

data/lib/rbbcode/tree_maker.rb ADDED Viewed

@@ -0,0 +1,346 @@
+require 'pp'
+module RbbCode
+	module CharCodes
+		CR_CODE = 13
+		LF_CODE = 10
+		L_BRACK_CODE = 91
+		R_BRACK_CODE = 93
+		SLASH_CODE = 47
+		LOWER_A_CODE = 97
+		LOWER_Z_CODE = 122
+		UPPER_A_CODE = 65
+		UPPER_Z_CODE = 90
+	end
+	class Node
+		def << (child)
+			@children << child
+		end
+		attr_accessor :children
+		def initialize(parent)
+			@parent = parent
+			@children = []
+		end
+		attr_accessor :parent
+	end
+	class RootNode < Node
+		def initialize
+			@children = []
+		end
+	end
+	class TextNode < Node
+		undef_method '<<'.to_sym
+		undef_method :children
+		def initialize(parent, text)
+			@parent = parent
+			@text = text
+		end
+		attr_accessor :text
+		def to_bb_code
+			@text
+		end
+	end
+	class TagNode < Node
+		def self.from_opening_bb_code(parent, bb_code)
+			if equal_index = bb_code.index('=')
+				tag_name = bb_code[1, equal_index - 1]
+				value = bb_code[(equal_index + 1)..-2]
+			else
+				tag_name = bb_code[1..-2]
+				value = nil
+			end
+			new(parent, tag_name, value)
+		end
+		def initialize(parent, tag_name, value = nil)
+			super(parent)
+			@tag_name = tag_name
+			@value = value
+		end
+		def inner_bb_code
+			@children.inject('') do |output, child|
+				output << child.to_bb_code
+			end
+		end
+		def to_bb_code
+			if @value.nil?
+				output = "[#{@tag_name}]"
+			else
+				output = "[#{@tag_name}=#{@value}]"
+			end
+			output << inner_bb_code << "[/#{@tag_name}]"
+		end
+		attr_reader :tag_name
+		attr_reader :value
+	end
+	class TreeMaker
+		include CharCodes
+		def initialize(schema)
+			@schema = schema
+		end
+		def make_tree(str)
+			delete_empty_paragraphs!(
+				delete_invalid_empty_tags!(
+					parse_str(str)
+				)
+			)
+		end
+		protected
+		def ancestor_list(parent)
+			ancestors = []
+			while parent.is_a?(TagNode)
+				ancestors << parent.tag_name
+				parent = parent.parent
+			end
+			ancestors
+		end
+		def break_type(break_str)
+			if break_str.length > 2
+				:paragraph
+			elsif break_str.length == 1
+				:line_break
+			elsif break_str == "\r\n"
+				:line_break
+			else
+				:paragraph
+			end
+		end
+		def delete_empty_paragraphs!(node)
+			node.children.reject! do |child|
+				if child.is_a?(TagNode)
+					if !child.children.empty?
+						delete_empty_paragraphs!(child)
+						false
+					elsif child.tag_name == @schema.paragraph_tag_name
+						# It's an empty paragraph tag, so the reject! block should return true
+						true
+					else
+						false
+					end
+				else
+					false
+				end
+			end
+			node
+		end
+		# The schema defines some tags that may not be empty. This method removes any such empty tags from the tree.
+		def delete_invalid_empty_tags!(node)
+			node.children.reject! do |child|
+				if child.is_a?(TagNode)
+					if child.children.empty? and !@schema.tag_may_be_empty?(child.tag_name)
+						true
+					else
+						delete_invalid_empty_tags!(child)
+						false
+					end
+				end
+			end
+			node
+		end
+		def parse_str(str)
+			tree = RootNode.new
+			# Initially, we open a paragraph tag. If it turns out that the first thing we encounter
+			# is a block-level element, no problem: we'll be calling promote_block_level_elements
+			# later anyway.
+			current_parent = TagNode.new(tree, @schema.paragraph_tag_name)
+			tree << current_parent
+			current_token = ''
+			current_token_type = :unknown
+			# It may seem naive to use each_byte. What about Unicode? So long as we're using UTF-8, none of the
+			# BB Code control characters will appear as part of multibyte characters, because UTF-8 doesn't allow
+			# the range 0x00-0x7F in multibyte chars. As for the multibyte characters themselves, yes, they will
+			# be temporarily split up as we append bytes onto the text nodes. But as of yet, I haven't found
+			# a way that this could cause a problem. The bytes always come back together again. (It would be a problem
+			# if we tried to count the characters for some reason, but we don't do that.)
+			str.each_byte do |char_code|
+				char = char_code.chr
+				case current_token_type
+				when :unknown
+					case char
+					when '['
+						current_token_type = :possible_tag
+						current_token << char
+					when "\r", "\n"
+						current_token_type = :break
+						current_token << char
+					else
+						if current_parent.is_a?(RootNode)
+							new_paragraph_tag = TagNode.new(current_parent, @schema.paragraph_tag_name)
+							current_parent << new_paragraph_tag
+							current_parent = new_paragraph_tag
+						end
+						current_token_type = :text
+						current_token << char
+					end
+				when :text
+					case char
+					when "["
+						if @schema.text_valid_in_context?(*ancestor_list(current_parent))
+							current_parent << TextNode.new(current_parent, current_token)
+						end
+						current_token = '['
+						current_token_type = :possible_tag
+					when "\r", "\n"
+						if @schema.text_valid_in_context?(*ancestor_list(current_parent))
+							current_parent << TextNode.new(current_parent, current_token)
+						end
+						current_token = char
+						current_token_type = :break
+					else
+						current_token << char
+					end
+				when :break
+					if char == CR_CODE or char_code == LF_CODE
+						current_token << char
+					else
+						if break_type(current_token) == :paragraph
+							while current_parent.is_a?(TagNode) and !@schema.block_level?(current_parent.tag_name) and current_parent.tag_name != @schema.paragraph_tag_name
+								current_parent = current_parent.parent
+							end
+							# The current parent might be a paragraph tag, in which case we should move up one more level.
+							# Otherwise, it might be a block-level element or a root node, in which case we should not move up.
+							if current_parent.is_a?(TagNode) and current_parent.tag_name == @schema.paragraph_tag_name
+								current_parent = current_parent.parent
+							end
+							# Regardless of whether the current parent is a block-level element, we need to open a new paragraph.
+							new_paragraph_node = TagNode.new(current_parent, @schema.paragraph_tag_name)
+							current_parent << new_paragraph_node
+							current_parent = new_paragraph_node
+						else # line break
+							prev_sibling = current_parent.children.last
+							if prev_sibling.is_a?(TagNode) and @schema.block_level?(prev_sibling.tag_name)
+								# Although the input only contains a single newline, we should
+								# interpret is as the start of a new paragraph, because the last
+								# thing we encountered was a block-level element.
+								new_paragraph_node = TagNode.new(current_parent, @schema.paragraph_tag_name)
+								current_parent << new_paragraph_node
+								current_parent = new_paragraph_node
+							elsif @schema.tag(@schema.line_break_tag_name).valid_in_context?(*ancestor_list(current_parent))
+								current_parent << TagNode.new(current_parent, @schema.line_break_tag_name)
+							end
+						end
+						if char == '['
+							current_token = '['
+							current_token_type = :possible_tag
+						else
+							current_token = char
+							current_token_type = :text
+						end
+					end
+				when :possible_tag
+					case char
+					when '['
+						current_parent << TextNode.new(current_parent, '[')
+						# No need to reset current_token or current_token_type
+					when '/'
+						current_token_type = :closing_tag
+						current_token << '/'
+					else
+						if tag_name_char?(char_code)
+							current_token_type = :opening_tag
+							current_token << char
+						else
+							current_token_type = :text
+							current_token << char
+						end
+					end
+				when :opening_tag
+					if tag_name_char?(char_code) or char == '='
+						current_token << char
+					elsif char == ']'
+						current_token << ']'
+						tag_node = TagNode.from_opening_bb_code(current_parent, current_token)
+						if @schema.block_level?(tag_node.tag_name) and current_parent.tag_name == @schema.paragraph_tag_name
+							# If there is a line break before this, it's superfluous and should be deleted
+							prev_sibling = current_parent.children.last
+							if prev_sibling.is_a?(TagNode) and prev_sibling.tag_name == @schema.line_break_tag_name
+								current_parent.children.pop
+							end
+							# Promote a block-level element
+							current_parent = current_parent.parent
+							tag_node.parent = current_parent
+							current_parent << tag_node
+							current_parent = tag_node
+							# If all of this results in empty paragraph tags, no worries: they will be deleted later.
+						elsif @schema.tag(tag_node.tag_name).valid_in_context?(*ancestor_list(current_parent))
+							current_parent << tag_node
+							current_parent = tag_node
+						end # else, don't do anything--the tag is invalid and will be ignored
+						current_token_type = :unknown
+						current_token = ''
+					elsif char == "\r" or char == "\n"
+						current_parent << TextNode.new(current_parent, current_token)
+						current_token = char
+						current_token_type = :break
+					elsif current_token.include?('=')
+						current_token << char
+					else
+						current_token_type = :text
+						current_token << char
+					end
+				when :closing_tag
+					if tag_name_char?(char_code)
+						current_token << char
+					elsif char == ']'
+						original_parent = current_parent
+						while current_parent.is_a?(TagNode) and current_parent.tag_name != current_token[2..-1]
+							current_parent = current_parent.parent
+						end
+						if current_parent.is_a?(TagNode)
+							current_parent = current_parent.parent
+						else # current_parent is a RootNode
+							# we made it to the top of the tree, and never found the tag to close
+							# so we'll just ignore the closing tag altogether
+							current_parent = original_parent
+						end
+						current_token_type = :unknown
+						current_token = ''
+					elsif char == "\r" or char == "\n"
+						current_parent << TextNode.new(current_parent, current_token)
+						current_token = char
+						current_token_type = :break
+					else
+						current_token_type = :text
+						current_token << char
+					end
+				end
+			end
+			# Handle whatever's left in the current token
+			if current_token_type != :break and !current_token.empty?
+				current_parent << TextNode.new(current_parent, current_token)
+			end
+			tree
+		end
+		def tag_name_char?(char_code)
+			(char_code >= LOWER_A_CODE and char_code <= LOWER_Z_CODE) or (char_code >= UPPER_A_CODE and char_code <= UPPER_Z_CODE) or char_code.chr == '*'
+		end
+	end
+end

data/lib/rbbcode.rb ADDED Viewed

@@ -0,0 +1,6 @@
+$: << File.expand_path(File.dirname(__FILE__))
+require 'rbbcode/parser'
+require 'rbbcode/schema'
+require 'rbbcode/tree_maker'
+require 'rbbcode/html_maker'

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: rbbcode
 version: !ruby/object:Gem::Version
-  version: 0.1.3
+  version: 0.1.4
 platform: ruby
 authors:
 - Jarrett Colby
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-12-31 00:00:00 -06:00
+date: 2010-02-17 00:00:00 -06:00
 default_executable:
 dependencies: []
@@ -20,9 +20,14 @@ executables: []
 extensions: []
 extra_rdoc_files:
-- README
+- README.markdown
 files:
-- README
+- lib/rbbcode.rb
+- lib/rbbcode/html_maker.rb
+- lib/rbbcode/parser.rb
+- lib/rbbcode/schema.rb
+- lib/rbbcode/tree_maker.rb
+- README.markdown
 has_rdoc: true
 homepage: http://github.com/jarrett/rbbcode
 licenses: []