RubyGems - rind - Versions diffs - 0.1.1 → 0.1.2 - Mend

rind 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

data/CHANGELOG.rdoc CHANGED Viewed

@@ -1,5 +1,10 @@
+== 0.1.2 - 2010.06.17
+* When parsing HTML, script and style tags will not break because of a "<" in their content.
+* Tag names can now have numbers. Previously heading tags would not parse correctly.
 == 0.1.1 - 2010.06.14
 * Attributes without a value or with a value containing a space broke the parser.
+* Text nodes with newlines were being cleared when they should not have been.
 == 0.1.0 - 2010.06.12
 * Initial release.

data/lib/rind/parser.rb CHANGED Viewed

@@ -15,18 +15,25 @@ module Rind
 		content = File.read(file_name)
 		# tag types
-		name = /[a-zA-Z_]/
+		name = /[a-zA-Z_0-9]/
 		cdata = /<!\[CDATA\[(.*?)\]\]>/m
 		comment = /<!--(.*?)-->/m
 		doctype = /<!DOCTYPE(.*?)>/m
 		processing_instruction = /<\?(.*?)>/m
+		full_tag = /<\s*(script|style)\s*(.*?)>(.*?)<\s*\/\s*\5\s*>/m
 		end_tag = /<\s*\/\s*((?:#{name}+:)?#{name}+)\s*>/m
-		start_tag = /<\s*((?:#{name}+:)?#{name}+)\s*(.*?)?\/?>/m
+		start_tag = /<\s*((?:#{name}+:)?#{name}+)\s*(.*?)\/?\s*>/m
+		if type == 'html'
+			scan_regex = /#{cdata}|#{comment}|#{doctype}|#{processing_instruction}|#{full_tag}|#{end_tag}|#{start_tag}/o
+		else # xml
+			scan_regex = /#{cdata}|#{comment}|#{doctype}|#{processing_instruction}|#{end_tag}|#{start_tag}/o
+		end
 		# extract tokens from the file content
 		tokens = Array.new
 		text_start = 0
-		content.scan(/#{cdata}|#{comment}|#{doctype}|#{processing_instruction}|#{end_tag}|#{start_tag}/o) do |token|
+		content.scan(scan_regex) do |token|
 			# remove nil entries from the unmatched tag checks
 			token.compact!
 			# get match object
@@ -41,17 +48,42 @@ module Rind
 			text_start = match.end(0)
 			# create a token for the appropriate tag
-			if match.begin(1) # cdata
+			if match.begin(1)
 				tokens.push([CDATA, token].flatten)
-			elsif match.begin(2) # comment
+			elsif match.begin(2)
 				tokens.push([COMMENT, token].flatten)
-			elsif match.begin(3) # doctype tag
+			elsif match.begin(3)
 				tokens.push([DOCTYPE, token].flatten)
-			elsif match.begin(4) # processing instruction
+			elsif match.begin(4)
 				tokens.push([PRO_INST, token].flatten)
-			elsif match.begin(5) # end tag
+			# from here things vary a little
+			#
+			# html => full tag = 5, end tag = 8, start tag = 9
+			# xml => end tag = 5, start tag = 6
+			elsif match.begin(5)
+				if type == 'html'
+					if token[2].nil?
+						attr = nil
+						text = token[1]
+					else
+						attr = token[1]
+						text = token[2]
+					end
+					tokens.push([START_TAG, token[0], attr])
+					if text.sub!(/\A\s*#{comment}\s*\z/o, '\1')
+						tokens.push([COMMENT, text])
+					elsif text !~ /\A\s*\z/
+						tokens.push([TEXT, text])
+					end
+					tokens.push([END_TAG, token[0]])
+				else
+					tokens.push([END_TAG, token].flatten)
+				end
+			elsif match.begin(6)
+				tokens.push([START_TAG, token].flatten)
+			elsif match.begin(8)
 				tokens.push([END_TAG, token].flatten)
-			elsif match.begin(6) # start tag
+			elsif match.begin(9)
 				tokens.push([START_TAG, token].flatten)
 			end
 		end

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: rind
 version: !ruby/object:Gem::Version
-  hash: 25
+  hash: 31
   prerelease: false
   segments:
   - 0
   - 1
-  - 1
-  version: 0.1.1
+  - 2
+  version: 0.1.2
 platform: ruby
 authors:
 - Aaron Lasseigne
@@ -15,7 +15,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-06-14 00:00:00 -05:00
+date: 2010-06-17 00:00:00 -05:00
 default_executable:
 dependencies: []