RubyGems - rind - Versions diffs - 0.1.1 → 0.1.2 - Mend

rind 0.1.1 → 0.1.2

Files changed (3) hide show

data/CHANGELOG.rdoc CHANGED Viewed

@@ -1,5 +1,10 @@
+== 0.1.2 - 2010.06.17
+* When parsing HTML, script and style tags will not break because of a "<" in their content.
+* Tag names can now have numbers. Previously heading tags would not parse correctly.
 == 0.1.1 - 2010.06.14
 * Attributes without a value or with a value containing a space broke the parser.
+* Text nodes with newlines were being cleared when they should not have been.
 == 0.1.0 - 2010.06.12
 * Initial release.

data/lib/rind/parser.rb CHANGED Viewed

@@ -15,18 +15,25 @@ module Rind
 		content = File.read(file_name)
 		# tag types
-		name = /[a-zA-Z_]/
+		name = /[a-zA-Z_0-9]/
 		cdata = /<!\[CDATA\[(.*?)\]\]>/m
 		comment = /<!--(.*?)-->/m
 		doctype = /<!DOCTYPE(.*?)>/m
 		processing_instruction = /<\?(.*?)>/m
+		full_tag = /<\s*(script|style)\s*(.*?)>(.*?)<\s*\/\s*\5\s*>/m
 		end_tag = /<\s*\/\s*((?:#{name}+:)?#{name}+)\s*>/m
-		start_tag = /<\s*((?:#{name}+:)?#{name}+)\s*(.*?)?\/?>/m
+		start_tag = /<\s*((?:#{name}+:)?#{name}+)\s*(.*?)\/?\s*>/m
+		if type == 'html'
+			scan_regex = /#{cdata}|#{comment}|#{doctype}|#{processing_instruction}|#{full_tag}|#{end_tag}|#{start_tag}/o
+		else # xml
+			scan_regex = /#{cdata}|#{comment}|#{doctype}|#{processing_instruction}|#{end_tag}|#{start_tag}/o
+		end
 		# extract tokens from the file content
 		tokens = Array.new
 		text_start = 0
-		content.scan(/#{cdata}|#{comment}|#{doctype}|#{processing_instruction}|#{end_tag}|#{start_tag}/o) do |token|
+		content.scan(scan_regex) do |token|
 			# remove nil entries from the unmatched tag checks
 			token.compact!
 			# get match object
@@ -41,17 +48,42 @@ module Rind
 			text_start = match.end(0)
 			# create a token for the appropriate tag
-			if match.begin(1) # cdata
+			if match.begin(1)
 				tokens.push([CDATA, token].flatten)
-			elsif match.begin(2) # comment
+			elsif match.begin(2)
 				tokens.push([COMMENT, token].flatten)
-			elsif match.begin(3) # doctype tag
+			elsif match.begin(3)
 				tokens.push([DOCTYPE, token].flatten)
-			elsif match.begin(4) # processing instruction
+			elsif match.begin(4)
 				tokens.push([PRO_INST, token].flatten)
-			elsif match.begin(5) # end tag
+			# from here things vary a little
+			#
+			# html => full tag = 5, end tag = 8, start tag = 9
+			# xml => end tag = 5, start tag = 6
+			elsif match.begin(5)
+				if type == 'html'
+					if token[2].nil?
+						attr = nil
+						text = token[1]
+					else
+						attr = token[1]
+						text = token[2]
+					end
+					tokens.push([START_TAG, token[0], attr])
+					if text.sub!(/\A\s*#{comment}\s*\z/o, '\1')
+						tokens.push([COMMENT, text])
+					elsif text !~ /\A\s*\z/
+						tokens.push([TEXT, text])
+					end
+					tokens.push([END_TAG, token[0]])
+				else
+					tokens.push([END_TAG, token].flatten)
+				end
+			elsif match.begin(6)
+				tokens.push([START_TAG, token].flatten)
+			elsif match.begin(8)
 				tokens.push([END_TAG, token].flatten)
-			elsif match.begin(6) # start tag
+			elsif match.begin(9)
 				tokens.push([START_TAG, token].flatten)
 			end
 		end

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: rind
 version: !ruby/object:Gem::Version
-  hash: 25
+  hash: 31
   prerelease: false
   segments:
   - 0
   - 1
-  - 1
-  version: 0.1.1
+  - 2
+  version: 0.1.2
 platform: ruby
 authors:
 - Aaron Lasseigne
@@ -15,7 +15,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-06-14 00:00:00 -05:00
+date: 2010-06-17 00:00:00 -05:00
 default_executable:
 dependencies: []