rind 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +5 -0
- data/lib/rind/parser.rb +41 -9
- metadata +4 -4
data/CHANGELOG.rdoc
CHANGED
@@ -1,5 +1,10 @@
|
|
1
|
+
== 0.1.2 - 2010.06.17
|
2
|
+
* When parsing HTML, script and style tags will not break because of a "<" in their content.
|
3
|
+
* Tag names can now have numbers. Previously heading tags would not parse correctly.
|
4
|
+
|
1
5
|
== 0.1.1 - 2010.06.14
|
2
6
|
* Attributes without a value or with a value containing a space broke the parser.
|
7
|
+
* Text nodes with newlines were being cleared when they should not have been.
|
3
8
|
|
4
9
|
== 0.1.0 - 2010.06.12
|
5
10
|
* Initial release.
|
data/lib/rind/parser.rb
CHANGED
@@ -15,18 +15,25 @@ module Rind
|
|
15
15
|
content = File.read(file_name)
|
16
16
|
|
17
17
|
# tag types
|
18
|
-
name = /[a-zA-
|
18
|
+
name = /[a-zA-Z_0-9]/
|
19
19
|
cdata = /<!\[CDATA\[(.*?)\]\]>/m
|
20
20
|
comment = /<!--(.*?)-->/m
|
21
21
|
doctype = /<!DOCTYPE(.*?)>/m
|
22
22
|
processing_instruction = /<\?(.*?)>/m
|
23
|
+
full_tag = /<\s*(script|style)\s*(.*?)>(.*?)<\s*\/\s*\5\s*>/m
|
23
24
|
end_tag = /<\s*\/\s*((?:#{name}+:)?#{name}+)\s*>/m
|
24
|
-
start_tag = /<\s*((?:#{name}+:)?#{name}+)\s*(.*?)
|
25
|
+
start_tag = /<\s*((?:#{name}+:)?#{name}+)\s*(.*?)\/?\s*>/m
|
26
|
+
|
27
|
+
if type == 'html'
|
28
|
+
scan_regex = /#{cdata}|#{comment}|#{doctype}|#{processing_instruction}|#{full_tag}|#{end_tag}|#{start_tag}/o
|
29
|
+
else # xml
|
30
|
+
scan_regex = /#{cdata}|#{comment}|#{doctype}|#{processing_instruction}|#{end_tag}|#{start_tag}/o
|
31
|
+
end
|
25
32
|
|
26
33
|
# extract tokens from the file content
|
27
34
|
tokens = Array.new
|
28
35
|
text_start = 0
|
29
|
-
content.scan(
|
36
|
+
content.scan(scan_regex) do |token|
|
30
37
|
# remove nil entries from the unmatched tag checks
|
31
38
|
token.compact!
|
32
39
|
# get match object
|
@@ -41,17 +48,42 @@ module Rind
|
|
41
48
|
text_start = match.end(0)
|
42
49
|
|
43
50
|
# create a token for the appropriate tag
|
44
|
-
if match.begin(1)
|
51
|
+
if match.begin(1)
|
45
52
|
tokens.push([CDATA, token].flatten)
|
46
|
-
elsif match.begin(2)
|
53
|
+
elsif match.begin(2)
|
47
54
|
tokens.push([COMMENT, token].flatten)
|
48
|
-
elsif match.begin(3)
|
55
|
+
elsif match.begin(3)
|
49
56
|
tokens.push([DOCTYPE, token].flatten)
|
50
|
-
elsif match.begin(4)
|
57
|
+
elsif match.begin(4)
|
51
58
|
tokens.push([PRO_INST, token].flatten)
|
52
|
-
|
59
|
+
# from here things vary a little
|
60
|
+
#
|
61
|
+
# html => full tag = 5, end tag = 8, start tag = 9
|
62
|
+
# xml => end tag = 5, start tag = 6
|
63
|
+
elsif match.begin(5)
|
64
|
+
if type == 'html'
|
65
|
+
if token[2].nil?
|
66
|
+
attr = nil
|
67
|
+
text = token[1]
|
68
|
+
else
|
69
|
+
attr = token[1]
|
70
|
+
text = token[2]
|
71
|
+
end
|
72
|
+
tokens.push([START_TAG, token[0], attr])
|
73
|
+
if text.sub!(/\A\s*#{comment}\s*\z/o, '\1')
|
74
|
+
tokens.push([COMMENT, text])
|
75
|
+
elsif text !~ /\A\s*\z/
|
76
|
+
tokens.push([TEXT, text])
|
77
|
+
end
|
78
|
+
tokens.push([END_TAG, token[0]])
|
79
|
+
else
|
80
|
+
tokens.push([END_TAG, token].flatten)
|
81
|
+
end
|
82
|
+
elsif match.begin(6)
|
83
|
+
tokens.push([START_TAG, token].flatten)
|
84
|
+
elsif match.begin(8)
|
53
85
|
tokens.push([END_TAG, token].flatten)
|
54
|
-
elsif match.begin(
|
86
|
+
elsif match.begin(9)
|
55
87
|
tokens.push([START_TAG, token].flatten)
|
56
88
|
end
|
57
89
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rind
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
9
|
+
- 2
|
10
|
+
version: 0.1.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Aaron Lasseigne
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-06-
|
18
|
+
date: 2010-06-17 00:00:00 -05:00
|
19
19
|
default_executable:
|
20
20
|
dependencies: []
|
21
21
|
|