html_gen 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/html_gen.gemspec +5 -2
- data/lib/html_gen_element.rb +41 -0
- data/lib/html_gen_parser.rb +185 -0
- data/lib/html_gen_text_ele.rb +12 -0
- data/spec/html_gen_parser_spec.rb +25 -0
- metadata +6 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.3
|
data/html_gen.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{html_gen}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Kasper Johansen"]
|
12
|
-
s.date = %q{2012-08-
|
12
|
+
s.date = %q{2012-08-11}
|
13
13
|
s.description = %q{A small framework for generating HTML.}
|
14
14
|
s.email = %q{k@spernj.org}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -28,6 +28,9 @@ Gem::Specification.new do |s|
|
|
28
28
|
"html_gen.gemspec",
|
29
29
|
"lib/html_gen.rb",
|
30
30
|
"lib/html_gen_element.rb",
|
31
|
+
"lib/html_gen_parser.rb",
|
32
|
+
"lib/html_gen_text_ele.rb",
|
33
|
+
"spec/html_gen_parser_spec.rb",
|
31
34
|
"spec/html_gen_spec.rb",
|
32
35
|
"spec/spec_helper.rb"
|
33
36
|
]
|
data/lib/html_gen_element.rb
CHANGED
@@ -40,6 +40,12 @@ class Html_gen::Element
|
|
40
40
|
# element.html #=> "<b>Te<i>s</i>t</b>"
|
41
41
|
attr_accessor :str_html
|
42
42
|
|
43
|
+
#An array holding all the sub-elements of this element.
|
44
|
+
attr_accessor :eles
|
45
|
+
|
46
|
+
#The name of the element. "a" for <a> and such.
|
47
|
+
attr_accessor :name
|
48
|
+
|
43
49
|
#You can give various arguments as shortcuts to calling the methods. You can also decide what should be used for newline and indentation.
|
44
50
|
# Html_gen::Element.new(:b, {
|
45
51
|
# :css => {"font-weight" => "bold"},
|
@@ -177,4 +183,39 @@ class Html_gen::Element
|
|
177
183
|
#Returns the string.
|
178
184
|
return str
|
179
185
|
end
|
186
|
+
|
187
|
+
#Returns the names of all sub-elements in an array.
|
188
|
+
def eles_names
|
189
|
+
names = []
|
190
|
+
@eles.each do |ele|
|
191
|
+
names << ele.name
|
192
|
+
end
|
193
|
+
|
194
|
+
return names
|
195
|
+
end
|
196
|
+
|
197
|
+
#Converts the content of the 'style'-attribute to css-hash-content.
|
198
|
+
def convert_style_to_css
|
199
|
+
if !@attr[:style].to_s.strip.empty?
|
200
|
+
style = @attr[:style]
|
201
|
+
elsif !@attr["style"].to_s.strip.empty?
|
202
|
+
style = @attr["style"]
|
203
|
+
else
|
204
|
+
raise "No style set in element."
|
205
|
+
end
|
206
|
+
|
207
|
+
loop do
|
208
|
+
if match = style.match(/\A\s*(\S+?):\s*(.+?)\s*(;|\Z)/)
|
209
|
+
style.gsub!(match[0], "")
|
210
|
+
key = match[1]
|
211
|
+
val = match[2]
|
212
|
+
raise "Such a key already exists in CSS-hash: '#{key}'." if @css.key?(key)
|
213
|
+
@css[key] = val
|
214
|
+
elsif match = style.slice!(/\A\s*\Z/)
|
215
|
+
break
|
216
|
+
else
|
217
|
+
raise "Dont know what to do with style-variable: '#{style}'."
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
180
221
|
end
|
@@ -0,0 +1,185 @@
|
|
1
|
+
#A simple, lightweight and pure-Ruby class for parsing HTML-strings into elements.
|
2
|
+
#===Examples
|
3
|
+
# doc = Html_gen::Parser.new(:str => a_html_variable)
|
4
|
+
# html_ele = doc.eles.first
|
5
|
+
# html_ele.name #=> "html"
|
6
|
+
class Html_gen::Parser
|
7
|
+
#An array that holds all the parsed root-elements.
|
8
|
+
attr_reader :eles
|
9
|
+
|
10
|
+
#The constructor. See class documentation for usage of this.
|
11
|
+
def initialize(args)
|
12
|
+
if args[:io]
|
13
|
+
@io = args[:io]
|
14
|
+
elsif args[:str]
|
15
|
+
@io = StringIO.new(args[:str])
|
16
|
+
else
|
17
|
+
raise "Dont know how to handle given arguments."
|
18
|
+
end
|
19
|
+
|
20
|
+
raise "No ':io' was given." if !@io
|
21
|
+
@eof = false
|
22
|
+
@buffer = ""
|
23
|
+
@eles = []
|
24
|
+
@eles_t = []
|
25
|
+
@debug = args[:debug]
|
26
|
+
|
27
|
+
while !@eof or !@buffer.empty?
|
28
|
+
parse_tag
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
#Ensures at least 16kb of data is loaded into the buffer.
|
35
|
+
def ensure_buffer
|
36
|
+
while @buffer.length < 16384 and !@eof
|
37
|
+
str = @io.gets(16384)
|
38
|
+
if !str
|
39
|
+
@eof = true
|
40
|
+
else
|
41
|
+
@buffer << str
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
#Searches for a given regex. If found the contents is removed from the buffer.
|
47
|
+
def search(regex)
|
48
|
+
ensure_buffer
|
49
|
+
|
50
|
+
if match = @buffer.match(regex)
|
51
|
+
@buffer.gsub!(regex, "")
|
52
|
+
ensure_buffer
|
53
|
+
return match
|
54
|
+
end
|
55
|
+
|
56
|
+
return false
|
57
|
+
end
|
58
|
+
|
59
|
+
#Asumes a tag is the next to be parsed and adds it to document-data.
|
60
|
+
def parse_tag(args = {})
|
61
|
+
if match = search(/\A\s*<\s*(\/|)\s*(\S+?)(\s+|\/\s*>|>)/)
|
62
|
+
tag_name = match[2].to_s.strip.downcase
|
63
|
+
start_sign = match[1].to_s.strip.downcase
|
64
|
+
end_sign = match[3].to_s.strip.downcase
|
65
|
+
|
66
|
+
raise "Dont know how to handle start-sign: '#{start_sign}' for tag: '#{tag_name}'." if !start_sign.empty?
|
67
|
+
|
68
|
+
ele = Html_gen::Element.new(tag_name)
|
69
|
+
|
70
|
+
if @eles_t.empty?
|
71
|
+
puts "Adding element '#{tag_name}' to root elements." if @debug
|
72
|
+
@eles << ele
|
73
|
+
else
|
74
|
+
puts "Adding element '#{tag_name}' to last t-element: '#{@eles_t.last.name}'." if @debug
|
75
|
+
@eles_t.last.eles << ele
|
76
|
+
end
|
77
|
+
|
78
|
+
@eles_t << ele
|
79
|
+
puts "New element-match: #{match.to_a}" if @debug
|
80
|
+
|
81
|
+
if end_sign.match(/^\/\s*>$/)
|
82
|
+
puts "End of element '#{tag_name}' for '#{@eles_t.last.name}'." if @debug
|
83
|
+
ele = @eles_t.pop
|
84
|
+
raise "Expected ele-name to be: '#{tag_name}' but it wasnt: '#{ele.name}'." if ele.name.to_s != tag_name
|
85
|
+
return ele
|
86
|
+
elsif end_sign.to_s.strip.empty?
|
87
|
+
parse_attr_of_tag(ele, tag_name)
|
88
|
+
ele.convert_style_to_css if ele.attr.key?("style") or ele.attr.key?(:style)
|
89
|
+
return ele
|
90
|
+
else
|
91
|
+
parse_content_of_tag(ele, tag_name)
|
92
|
+
return ele
|
93
|
+
end
|
94
|
+
else
|
95
|
+
if args[:false]
|
96
|
+
return false
|
97
|
+
else
|
98
|
+
raise "Dont know what to do with buffer: '#{@buffer}'."
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def parse_attr_of_tag(ele, tag_name)
|
104
|
+
loop do
|
105
|
+
if match = search(/\A\s*(\S+)=(\"|'|)/)
|
106
|
+
attr_name = match[1]
|
107
|
+
raise "Attribute already exists on element: '#{attr_name}'." if ele.attr.key?(attr_name)
|
108
|
+
|
109
|
+
if match[2].to_s.empty?
|
110
|
+
quote_char = /\s+/
|
111
|
+
quote_val = :whitespace
|
112
|
+
else
|
113
|
+
quote_char = /#{Regexp.escape(match[2])}/
|
114
|
+
quote_val = :normal
|
115
|
+
end
|
116
|
+
|
117
|
+
attr_val = parse_attr_until_quote(quote_char, quote_val)
|
118
|
+
|
119
|
+
puts "Parsed attribute '#{attr_name}' with value '#{attr_val}'." if @debug
|
120
|
+
ele.attr[attr_name] = attr_val
|
121
|
+
elsif search(/\A\s*>/)
|
122
|
+
parse_content_of_tag(ele, tag_name)
|
123
|
+
break
|
124
|
+
else
|
125
|
+
raise "Dont know what to do with buffer when parsing attributes: '#{@buffer}'."
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def parse_attr_until_quote(quote_char, quote_val)
|
131
|
+
val = ""
|
132
|
+
|
133
|
+
loop do
|
134
|
+
ensure_buffer
|
135
|
+
char = @buffer.slice!(0)
|
136
|
+
break if !char
|
137
|
+
|
138
|
+
if char == "\\"
|
139
|
+
val << char
|
140
|
+
val << @buffer.slice!(0)
|
141
|
+
elsif char =~ quote_char
|
142
|
+
break
|
143
|
+
elsif char == ">" and quote_val == :whitespace
|
144
|
+
@buffer = char + @buffer
|
145
|
+
break
|
146
|
+
else
|
147
|
+
val << char
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
return val
|
152
|
+
end
|
153
|
+
|
154
|
+
#Assumes some content of a tag is next to be parsed and parses it.
|
155
|
+
def parse_content_of_tag(ele, tag_name)
|
156
|
+
raise "Empty tag-name given: '#{tag_name}'." if tag_name.to_s.strip.empty?
|
157
|
+
raise "No 'ele' was given." if !ele
|
158
|
+
|
159
|
+
loop do
|
160
|
+
if search(/\A\s*\Z/)
|
161
|
+
raise "Could not find end of tag: '#{tag_name}'."
|
162
|
+
elsif match = search(/\A\s*<\s*\/\s*#{Regexp.escape(tag_name)}\s*>\s*/i)
|
163
|
+
puts "Found end: '#{match.to_a}' for '#{@eles_t.last.name}'." if @debug
|
164
|
+
ele = @eles_t.pop
|
165
|
+
raise "Expected ele-name to be: '#{tag_name}' but it wasnt: '#{ele.name}'." if ele.name.to_s != tag_name
|
166
|
+
|
167
|
+
break
|
168
|
+
elsif new_ele = parse_tag(:false => true)
|
169
|
+
puts "Found new element '#{new_ele.name}' and adding it to '#{ele.name}'." if @debug
|
170
|
+
#ele.eles << new_ele
|
171
|
+
elsif match = search(/\A(.+?)(<|\Z)/)
|
172
|
+
puts "Text-content-match: '#{match.to_a}'." if @debug
|
173
|
+
|
174
|
+
#Put end back into buffer.
|
175
|
+
@buffer = match[2] + @buffer
|
176
|
+
puts "Buffer after text-match: #{@buffer}" if @debug
|
177
|
+
|
178
|
+
#Add text element to list as finished.
|
179
|
+
ele.eles << Html_gen::Text_ele.new(:str => match[1])
|
180
|
+
else
|
181
|
+
raise "Dont know what to do with buffer: '#{@buffer}'."
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe "Parser" do
|
4
|
+
it "should be able generate elements from HTML" do
|
5
|
+
parser = Html_gen::Parser.new(:str => "<html><head><title>Test</title></head><body>This is the body</body></html>")
|
6
|
+
raise "Expected 1 root element but got: '#{parser.eles.length}'." if parser.eles.length != 1
|
7
|
+
|
8
|
+
html = parser.eles.first
|
9
|
+
raise "Expected 2 elements of HTML element but got: '#{html.eles.length}'. #{html.eles_names}" if html.eles.length != 2
|
10
|
+
|
11
|
+
head = html.eles.first
|
12
|
+
title = head.eles.first
|
13
|
+
raise "Expected name to be 'title' but it wasnt: '#{title.name}'." if title.name != "title"
|
14
|
+
|
15
|
+
doc = Html_gen::Parser.new(:str => "<td colspan=\"2\" style=\"font-weight: bold;\" width='100px' height=50px>test</td>")
|
16
|
+
td = doc.eles.first
|
17
|
+
|
18
|
+
raise "Expected name of element to be 'td' but it wasnt: '#{td.name}'." if td.name != "td"
|
19
|
+
raise "Expected colspan to be '2' but it wasnt: '#{td.attr["colspan"]}'." if td.attr["colspan"] != "2"
|
20
|
+
raise "Expected width to be '100px' but it wasnt: '#{td.attr["width"]}'." if td.attr["width"] != "100px"
|
21
|
+
raise "Expected height to be '50px' but it wasnt: '#{td.attr["height"]}'." if td.attr["height"] != "50px"
|
22
|
+
raise "Expected CSS-font-weight to be 'bold' but it wasnt: '#{td.css["font-weight"]}'." if td.css["font-weight"] != "bold"
|
23
|
+
raise "Expected style to be empty but it wasnt: '#{td.attr["style"]}'." if !td.attr["style"].to_s.empty?
|
24
|
+
end
|
25
|
+
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: html_gen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.3
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Kasper Johansen
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2012-08-
|
13
|
+
date: 2012-08-11 00:00:00 +02:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -78,6 +78,9 @@ files:
|
|
78
78
|
- html_gen.gemspec
|
79
79
|
- lib/html_gen.rb
|
80
80
|
- lib/html_gen_element.rb
|
81
|
+
- lib/html_gen_parser.rb
|
82
|
+
- lib/html_gen_text_ele.rb
|
83
|
+
- spec/html_gen_parser_spec.rb
|
81
84
|
- spec/html_gen_spec.rb
|
82
85
|
- spec/spec_helper.rb
|
83
86
|
has_rdoc: true
|
@@ -94,7 +97,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
94
97
|
requirements:
|
95
98
|
- - ">="
|
96
99
|
- !ruby/object:Gem::Version
|
97
|
-
hash:
|
100
|
+
hash: 4461972116095674458
|
98
101
|
segments:
|
99
102
|
- 0
|
100
103
|
version: "0"
|