html_gen 0.0.1 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/html_gen.gemspec +5 -2
- data/lib/html_gen_element.rb +41 -0
- data/lib/html_gen_parser.rb +185 -0
- data/lib/html_gen_text_ele.rb +12 -0
- data/spec/html_gen_parser_spec.rb +25 -0
- metadata +6 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.3
|
data/html_gen.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{html_gen}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Kasper Johansen"]
|
12
|
-
s.date = %q{2012-08-
|
12
|
+
s.date = %q{2012-08-11}
|
13
13
|
s.description = %q{A small framework for generating HTML.}
|
14
14
|
s.email = %q{k@spernj.org}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -28,6 +28,9 @@ Gem::Specification.new do |s|
|
|
28
28
|
"html_gen.gemspec",
|
29
29
|
"lib/html_gen.rb",
|
30
30
|
"lib/html_gen_element.rb",
|
31
|
+
"lib/html_gen_parser.rb",
|
32
|
+
"lib/html_gen_text_ele.rb",
|
33
|
+
"spec/html_gen_parser_spec.rb",
|
31
34
|
"spec/html_gen_spec.rb",
|
32
35
|
"spec/spec_helper.rb"
|
33
36
|
]
|
data/lib/html_gen_element.rb
CHANGED
@@ -40,6 +40,12 @@ class Html_gen::Element
|
|
40
40
|
# element.html #=> "<b>Te<i>s</i>t</b>"
|
41
41
|
attr_accessor :str_html
|
42
42
|
|
43
|
+
#An array holding all the sub-elements of this element.
|
44
|
+
attr_accessor :eles
|
45
|
+
|
46
|
+
#The name of the element. "a" for <a> and such.
|
47
|
+
attr_accessor :name
|
48
|
+
|
43
49
|
#You can give various arguments as shortcuts to calling the methods. You can also decide what should be used for newline and indentation.
|
44
50
|
# Html_gen::Element.new(:b, {
|
45
51
|
# :css => {"font-weight" => "bold"},
|
@@ -177,4 +183,39 @@ class Html_gen::Element
|
|
177
183
|
#Returns the string.
|
178
184
|
return str
|
179
185
|
end
|
186
|
+
|
187
|
+
#Returns the names of all sub-elements in an array.
|
188
|
+
def eles_names
|
189
|
+
names = []
|
190
|
+
@eles.each do |ele|
|
191
|
+
names << ele.name
|
192
|
+
end
|
193
|
+
|
194
|
+
return names
|
195
|
+
end
|
196
|
+
|
197
|
+
#Converts the content of the 'style'-attribute to css-hash-content.
|
198
|
+
def convert_style_to_css
|
199
|
+
if !@attr[:style].to_s.strip.empty?
|
200
|
+
style = @attr[:style]
|
201
|
+
elsif !@attr["style"].to_s.strip.empty?
|
202
|
+
style = @attr["style"]
|
203
|
+
else
|
204
|
+
raise "No style set in element."
|
205
|
+
end
|
206
|
+
|
207
|
+
loop do
|
208
|
+
if match = style.match(/\A\s*(\S+?):\s*(.+?)\s*(;|\Z)/)
|
209
|
+
style.gsub!(match[0], "")
|
210
|
+
key = match[1]
|
211
|
+
val = match[2]
|
212
|
+
raise "Such a key already exists in CSS-hash: '#{key}'." if @css.key?(key)
|
213
|
+
@css[key] = val
|
214
|
+
elsif match = style.slice!(/\A\s*\Z/)
|
215
|
+
break
|
216
|
+
else
|
217
|
+
raise "Dont know what to do with style-variable: '#{style}'."
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
180
221
|
end
|
@@ -0,0 +1,185 @@
|
|
1
|
+
#A simple, lightweight and pure-Ruby class for parsing HTML-strings into elements.
|
2
|
+
#===Examples
|
3
|
+
# doc = Html_gen::Parser.new(:str => a_html_variable)
|
4
|
+
# html_ele = doc.eles.first
|
5
|
+
# html_ele.name #=> "html"
|
6
|
+
class Html_gen::Parser
|
7
|
+
#An array that holds all the parsed root-elements.
|
8
|
+
attr_reader :eles
|
9
|
+
|
10
|
+
#The constructor. See class documentation for usage of this.
|
11
|
+
def initialize(args)
|
12
|
+
if args[:io]
|
13
|
+
@io = args[:io]
|
14
|
+
elsif args[:str]
|
15
|
+
@io = StringIO.new(args[:str])
|
16
|
+
else
|
17
|
+
raise "Dont know how to handle given arguments."
|
18
|
+
end
|
19
|
+
|
20
|
+
raise "No ':io' was given." if !@io
|
21
|
+
@eof = false
|
22
|
+
@buffer = ""
|
23
|
+
@eles = []
|
24
|
+
@eles_t = []
|
25
|
+
@debug = args[:debug]
|
26
|
+
|
27
|
+
while !@eof or !@buffer.empty?
|
28
|
+
parse_tag
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
#Ensures at least 16kb of data is loaded into the buffer.
|
35
|
+
def ensure_buffer
|
36
|
+
while @buffer.length < 16384 and !@eof
|
37
|
+
str = @io.gets(16384)
|
38
|
+
if !str
|
39
|
+
@eof = true
|
40
|
+
else
|
41
|
+
@buffer << str
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
#Searches for a given regex. If found the contents is removed from the buffer.
|
47
|
+
def search(regex)
|
48
|
+
ensure_buffer
|
49
|
+
|
50
|
+
if match = @buffer.match(regex)
|
51
|
+
@buffer.gsub!(regex, "")
|
52
|
+
ensure_buffer
|
53
|
+
return match
|
54
|
+
end
|
55
|
+
|
56
|
+
return false
|
57
|
+
end
|
58
|
+
|
59
|
+
#Asumes a tag is the next to be parsed and adds it to document-data.
|
60
|
+
def parse_tag(args = {})
|
61
|
+
if match = search(/\A\s*<\s*(\/|)\s*(\S+?)(\s+|\/\s*>|>)/)
|
62
|
+
tag_name = match[2].to_s.strip.downcase
|
63
|
+
start_sign = match[1].to_s.strip.downcase
|
64
|
+
end_sign = match[3].to_s.strip.downcase
|
65
|
+
|
66
|
+
raise "Dont know how to handle start-sign: '#{start_sign}' for tag: '#{tag_name}'." if !start_sign.empty?
|
67
|
+
|
68
|
+
ele = Html_gen::Element.new(tag_name)
|
69
|
+
|
70
|
+
if @eles_t.empty?
|
71
|
+
puts "Adding element '#{tag_name}' to root elements." if @debug
|
72
|
+
@eles << ele
|
73
|
+
else
|
74
|
+
puts "Adding element '#{tag_name}' to last t-element: '#{@eles_t.last.name}'." if @debug
|
75
|
+
@eles_t.last.eles << ele
|
76
|
+
end
|
77
|
+
|
78
|
+
@eles_t << ele
|
79
|
+
puts "New element-match: #{match.to_a}" if @debug
|
80
|
+
|
81
|
+
if end_sign.match(/^\/\s*>$/)
|
82
|
+
puts "End of element '#{tag_name}' for '#{@eles_t.last.name}'." if @debug
|
83
|
+
ele = @eles_t.pop
|
84
|
+
raise "Expected ele-name to be: '#{tag_name}' but it wasnt: '#{ele.name}'." if ele.name.to_s != tag_name
|
85
|
+
return ele
|
86
|
+
elsif end_sign.to_s.strip.empty?
|
87
|
+
parse_attr_of_tag(ele, tag_name)
|
88
|
+
ele.convert_style_to_css if ele.attr.key?("style") or ele.attr.key?(:style)
|
89
|
+
return ele
|
90
|
+
else
|
91
|
+
parse_content_of_tag(ele, tag_name)
|
92
|
+
return ele
|
93
|
+
end
|
94
|
+
else
|
95
|
+
if args[:false]
|
96
|
+
return false
|
97
|
+
else
|
98
|
+
raise "Dont know what to do with buffer: '#{@buffer}'."
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def parse_attr_of_tag(ele, tag_name)
|
104
|
+
loop do
|
105
|
+
if match = search(/\A\s*(\S+)=(\"|'|)/)
|
106
|
+
attr_name = match[1]
|
107
|
+
raise "Attribute already exists on element: '#{attr_name}'." if ele.attr.key?(attr_name)
|
108
|
+
|
109
|
+
if match[2].to_s.empty?
|
110
|
+
quote_char = /\s+/
|
111
|
+
quote_val = :whitespace
|
112
|
+
else
|
113
|
+
quote_char = /#{Regexp.escape(match[2])}/
|
114
|
+
quote_val = :normal
|
115
|
+
end
|
116
|
+
|
117
|
+
attr_val = parse_attr_until_quote(quote_char, quote_val)
|
118
|
+
|
119
|
+
puts "Parsed attribute '#{attr_name}' with value '#{attr_val}'." if @debug
|
120
|
+
ele.attr[attr_name] = attr_val
|
121
|
+
elsif search(/\A\s*>/)
|
122
|
+
parse_content_of_tag(ele, tag_name)
|
123
|
+
break
|
124
|
+
else
|
125
|
+
raise "Dont know what to do with buffer when parsing attributes: '#{@buffer}'."
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def parse_attr_until_quote(quote_char, quote_val)
|
131
|
+
val = ""
|
132
|
+
|
133
|
+
loop do
|
134
|
+
ensure_buffer
|
135
|
+
char = @buffer.slice!(0)
|
136
|
+
break if !char
|
137
|
+
|
138
|
+
if char == "\\"
|
139
|
+
val << char
|
140
|
+
val << @buffer.slice!(0)
|
141
|
+
elsif char =~ quote_char
|
142
|
+
break
|
143
|
+
elsif char == ">" and quote_val == :whitespace
|
144
|
+
@buffer = char + @buffer
|
145
|
+
break
|
146
|
+
else
|
147
|
+
val << char
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
return val
|
152
|
+
end
|
153
|
+
|
154
|
+
#Assumes some content of a tag is next to be parsed and parses it.
|
155
|
+
def parse_content_of_tag(ele, tag_name)
|
156
|
+
raise "Empty tag-name given: '#{tag_name}'." if tag_name.to_s.strip.empty?
|
157
|
+
raise "No 'ele' was given." if !ele
|
158
|
+
|
159
|
+
loop do
|
160
|
+
if search(/\A\s*\Z/)
|
161
|
+
raise "Could not find end of tag: '#{tag_name}'."
|
162
|
+
elsif match = search(/\A\s*<\s*\/\s*#{Regexp.escape(tag_name)}\s*>\s*/i)
|
163
|
+
puts "Found end: '#{match.to_a}' for '#{@eles_t.last.name}'." if @debug
|
164
|
+
ele = @eles_t.pop
|
165
|
+
raise "Expected ele-name to be: '#{tag_name}' but it wasnt: '#{ele.name}'." if ele.name.to_s != tag_name
|
166
|
+
|
167
|
+
break
|
168
|
+
elsif new_ele = parse_tag(:false => true)
|
169
|
+
puts "Found new element '#{new_ele.name}' and adding it to '#{ele.name}'." if @debug
|
170
|
+
#ele.eles << new_ele
|
171
|
+
elsif match = search(/\A(.+?)(<|\Z)/)
|
172
|
+
puts "Text-content-match: '#{match.to_a}'." if @debug
|
173
|
+
|
174
|
+
#Put end back into buffer.
|
175
|
+
@buffer = match[2] + @buffer
|
176
|
+
puts "Buffer after text-match: #{@buffer}" if @debug
|
177
|
+
|
178
|
+
#Add text element to list as finished.
|
179
|
+
ele.eles << Html_gen::Text_ele.new(:str => match[1])
|
180
|
+
else
|
181
|
+
raise "Dont know what to do with buffer: '#{@buffer}'."
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe "Parser" do
|
4
|
+
it "should be able generate elements from HTML" do
|
5
|
+
parser = Html_gen::Parser.new(:str => "<html><head><title>Test</title></head><body>This is the body</body></html>")
|
6
|
+
raise "Expected 1 root element but got: '#{parser.eles.length}'." if parser.eles.length != 1
|
7
|
+
|
8
|
+
html = parser.eles.first
|
9
|
+
raise "Expected 2 elements of HTML element but got: '#{html.eles.length}'. #{html.eles_names}" if html.eles.length != 2
|
10
|
+
|
11
|
+
head = html.eles.first
|
12
|
+
title = head.eles.first
|
13
|
+
raise "Expected name to be 'title' but it wasnt: '#{title.name}'." if title.name != "title"
|
14
|
+
|
15
|
+
doc = Html_gen::Parser.new(:str => "<td colspan=\"2\" style=\"font-weight: bold;\" width='100px' height=50px>test</td>")
|
16
|
+
td = doc.eles.first
|
17
|
+
|
18
|
+
raise "Expected name of element to be 'td' but it wasnt: '#{td.name}'." if td.name != "td"
|
19
|
+
raise "Expected colspan to be '2' but it wasnt: '#{td.attr["colspan"]}'." if td.attr["colspan"] != "2"
|
20
|
+
raise "Expected width to be '100px' but it wasnt: '#{td.attr["width"]}'." if td.attr["width"] != "100px"
|
21
|
+
raise "Expected height to be '50px' but it wasnt: '#{td.attr["height"]}'." if td.attr["height"] != "50px"
|
22
|
+
raise "Expected CSS-font-weight to be 'bold' but it wasnt: '#{td.css["font-weight"]}'." if td.css["font-weight"] != "bold"
|
23
|
+
raise "Expected style to be empty but it wasnt: '#{td.attr["style"]}'." if !td.attr["style"].to_s.empty?
|
24
|
+
end
|
25
|
+
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: html_gen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.3
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Kasper Johansen
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2012-08-
|
13
|
+
date: 2012-08-11 00:00:00 +02:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -78,6 +78,9 @@ files:
|
|
78
78
|
- html_gen.gemspec
|
79
79
|
- lib/html_gen.rb
|
80
80
|
- lib/html_gen_element.rb
|
81
|
+
- lib/html_gen_parser.rb
|
82
|
+
- lib/html_gen_text_ele.rb
|
83
|
+
- spec/html_gen_parser_spec.rb
|
81
84
|
- spec/html_gen_spec.rb
|
82
85
|
- spec/spec_helper.rb
|
83
86
|
has_rdoc: true
|
@@ -94,7 +97,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
94
97
|
requirements:
|
95
98
|
- - ">="
|
96
99
|
- !ruby/object:Gem::Version
|
97
|
-
hash:
|
100
|
+
hash: 4461972116095674458
|
98
101
|
segments:
|
99
102
|
- 0
|
100
103
|
version: "0"
|