html_gen 0.0.1 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.3
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{html_gen}
8
- s.version = "0.0.1"
8
+ s.version = "0.0.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Kasper Johansen"]
12
- s.date = %q{2012-08-10}
12
+ s.date = %q{2012-08-11}
13
13
  s.description = %q{A small framework for generating HTML.}
14
14
  s.email = %q{k@spernj.org}
15
15
  s.extra_rdoc_files = [
@@ -28,6 +28,9 @@ Gem::Specification.new do |s|
28
28
  "html_gen.gemspec",
29
29
  "lib/html_gen.rb",
30
30
  "lib/html_gen_element.rb",
31
+ "lib/html_gen_parser.rb",
32
+ "lib/html_gen_text_ele.rb",
33
+ "spec/html_gen_parser_spec.rb",
31
34
  "spec/html_gen_spec.rb",
32
35
  "spec/spec_helper.rb"
33
36
  ]
@@ -40,6 +40,12 @@ class Html_gen::Element
40
40
  # element.html #=> "<b>Te<i>s</i>t</b>"
41
41
  attr_accessor :str_html
42
42
 
43
+ #An array holding all the sub-elements of this element.
44
+ attr_accessor :eles
45
+
46
+ #The name of the element. "a" for <a> and such.
47
+ attr_accessor :name
48
+
43
49
  #You can give various arguments as shortcuts to calling the methods. You can also decide what should be used for newline and indentation.
44
50
  # Html_gen::Element.new(:b, {
45
51
  # :css => {"font-weight" => "bold"},
@@ -177,4 +183,39 @@ class Html_gen::Element
177
183
  #Returns the string.
178
184
  return str
179
185
  end
186
+
187
+ #Returns the names of all sub-elements in an array.
188
+ def eles_names
189
+ names = []
190
+ @eles.each do |ele|
191
+ names << ele.name
192
+ end
193
+
194
+ return names
195
+ end
196
+
197
+ #Converts the content of the 'style'-attribute to css-hash-content.
198
+ def convert_style_to_css
199
+ if !@attr[:style].to_s.strip.empty?
200
+ style = @attr[:style]
201
+ elsif !@attr["style"].to_s.strip.empty?
202
+ style = @attr["style"]
203
+ else
204
+ raise "No style set in element."
205
+ end
206
+
207
+ loop do
208
+ if match = style.match(/\A\s*(\S+?):\s*(.+?)\s*(;|\Z)/)
209
+ style.gsub!(match[0], "")
210
+ key = match[1]
211
+ val = match[2]
212
+ raise "Such a key already exists in CSS-hash: '#{key}'." if @css.key?(key)
213
+ @css[key] = val
214
+ elsif match = style.slice!(/\A\s*\Z/)
215
+ break
216
+ else
217
+ raise "Dont know what to do with style-variable: '#{style}'."
218
+ end
219
+ end
220
+ end
180
221
  end
@@ -0,0 +1,185 @@
1
+ #A simple, lightweight and pure-Ruby class for parsing HTML-strings into elements.
2
+ #===Examples
3
+ # doc = Html_gen::Parser.new(:str => a_html_variable)
4
+ # html_ele = doc.eles.first
5
+ # html_ele.name #=> "html"
6
+ class Html_gen::Parser
7
+ #An array that holds all the parsed root-elements.
8
+ attr_reader :eles
9
+
10
+ #The constructor. See class documentation for usage of this.
11
+ def initialize(args)
12
+ if args[:io]
13
+ @io = args[:io]
14
+ elsif args[:str]
15
+ @io = StringIO.new(args[:str])
16
+ else
17
+ raise "Dont know how to handle given arguments."
18
+ end
19
+
20
+ raise "No ':io' was given." if !@io
21
+ @eof = false
22
+ @buffer = ""
23
+ @eles = []
24
+ @eles_t = []
25
+ @debug = args[:debug]
26
+
27
+ while !@eof or !@buffer.empty?
28
+ parse_tag
29
+ end
30
+ end
31
+
32
+ private
33
+
34
+ #Ensures at least 16kb of data is loaded into the buffer.
35
+ def ensure_buffer
36
+ while @buffer.length < 16384 and !@eof
37
+ str = @io.gets(16384)
38
+ if !str
39
+ @eof = true
40
+ else
41
+ @buffer << str
42
+ end
43
+ end
44
+ end
45
+
46
+ #Searches for a given regex. If found the contents is removed from the buffer.
47
+ def search(regex)
48
+ ensure_buffer
49
+
50
+ if match = @buffer.match(regex)
51
+ @buffer.gsub!(regex, "")
52
+ ensure_buffer
53
+ return match
54
+ end
55
+
56
+ return false
57
+ end
58
+
59
+ #Asumes a tag is the next to be parsed and adds it to document-data.
60
+ def parse_tag(args = {})
61
+ if match = search(/\A\s*<\s*(\/|)\s*(\S+?)(\s+|\/\s*>|>)/)
62
+ tag_name = match[2].to_s.strip.downcase
63
+ start_sign = match[1].to_s.strip.downcase
64
+ end_sign = match[3].to_s.strip.downcase
65
+
66
+ raise "Dont know how to handle start-sign: '#{start_sign}' for tag: '#{tag_name}'." if !start_sign.empty?
67
+
68
+ ele = Html_gen::Element.new(tag_name)
69
+
70
+ if @eles_t.empty?
71
+ puts "Adding element '#{tag_name}' to root elements." if @debug
72
+ @eles << ele
73
+ else
74
+ puts "Adding element '#{tag_name}' to last t-element: '#{@eles_t.last.name}'." if @debug
75
+ @eles_t.last.eles << ele
76
+ end
77
+
78
+ @eles_t << ele
79
+ puts "New element-match: #{match.to_a}" if @debug
80
+
81
+ if end_sign.match(/^\/\s*>$/)
82
+ puts "End of element '#{tag_name}' for '#{@eles_t.last.name}'." if @debug
83
+ ele = @eles_t.pop
84
+ raise "Expected ele-name to be: '#{tag_name}' but it wasnt: '#{ele.name}'." if ele.name.to_s != tag_name
85
+ return ele
86
+ elsif end_sign.to_s.strip.empty?
87
+ parse_attr_of_tag(ele, tag_name)
88
+ ele.convert_style_to_css if ele.attr.key?("style") or ele.attr.key?(:style)
89
+ return ele
90
+ else
91
+ parse_content_of_tag(ele, tag_name)
92
+ return ele
93
+ end
94
+ else
95
+ if args[:false]
96
+ return false
97
+ else
98
+ raise "Dont know what to do with buffer: '#{@buffer}'."
99
+ end
100
+ end
101
+ end
102
+
103
+ def parse_attr_of_tag(ele, tag_name)
104
+ loop do
105
+ if match = search(/\A\s*(\S+)=(\"|'|)/)
106
+ attr_name = match[1]
107
+ raise "Attribute already exists on element: '#{attr_name}'." if ele.attr.key?(attr_name)
108
+
109
+ if match[2].to_s.empty?
110
+ quote_char = /\s+/
111
+ quote_val = :whitespace
112
+ else
113
+ quote_char = /#{Regexp.escape(match[2])}/
114
+ quote_val = :normal
115
+ end
116
+
117
+ attr_val = parse_attr_until_quote(quote_char, quote_val)
118
+
119
+ puts "Parsed attribute '#{attr_name}' with value '#{attr_val}'." if @debug
120
+ ele.attr[attr_name] = attr_val
121
+ elsif search(/\A\s*>/)
122
+ parse_content_of_tag(ele, tag_name)
123
+ break
124
+ else
125
+ raise "Dont know what to do with buffer when parsing attributes: '#{@buffer}'."
126
+ end
127
+ end
128
+ end
129
+
130
+ def parse_attr_until_quote(quote_char, quote_val)
131
+ val = ""
132
+
133
+ loop do
134
+ ensure_buffer
135
+ char = @buffer.slice!(0)
136
+ break if !char
137
+
138
+ if char == "\\"
139
+ val << char
140
+ val << @buffer.slice!(0)
141
+ elsif char =~ quote_char
142
+ break
143
+ elsif char == ">" and quote_val == :whitespace
144
+ @buffer = char + @buffer
145
+ break
146
+ else
147
+ val << char
148
+ end
149
+ end
150
+
151
+ return val
152
+ end
153
+
154
+ #Assumes some content of a tag is next to be parsed and parses it.
155
+ def parse_content_of_tag(ele, tag_name)
156
+ raise "Empty tag-name given: '#{tag_name}'." if tag_name.to_s.strip.empty?
157
+ raise "No 'ele' was given." if !ele
158
+
159
+ loop do
160
+ if search(/\A\s*\Z/)
161
+ raise "Could not find end of tag: '#{tag_name}'."
162
+ elsif match = search(/\A\s*<\s*\/\s*#{Regexp.escape(tag_name)}\s*>\s*/i)
163
+ puts "Found end: '#{match.to_a}' for '#{@eles_t.last.name}'." if @debug
164
+ ele = @eles_t.pop
165
+ raise "Expected ele-name to be: '#{tag_name}' but it wasnt: '#{ele.name}'." if ele.name.to_s != tag_name
166
+
167
+ break
168
+ elsif new_ele = parse_tag(:false => true)
169
+ puts "Found new element '#{new_ele.name}' and adding it to '#{ele.name}'." if @debug
170
+ #ele.eles << new_ele
171
+ elsif match = search(/\A(.+?)(<|\Z)/)
172
+ puts "Text-content-match: '#{match.to_a}'." if @debug
173
+
174
+ #Put end back into buffer.
175
+ @buffer = match[2] + @buffer
176
+ puts "Buffer after text-match: #{@buffer}" if @debug
177
+
178
+ #Add text element to list as finished.
179
+ ele.eles << Html_gen::Text_ele.new(:str => match[1])
180
+ else
181
+ raise "Dont know what to do with buffer: '#{@buffer}'."
182
+ end
183
+ end
184
+ end
185
+ end
@@ -0,0 +1,12 @@
1
+ class Html_gen::Text_ele
2
+ attr_reader :args
3
+
4
+ def initialize(args)
5
+ @str = args[:str]
6
+ end
7
+
8
+ #Returns the text that this element holds.
9
+ def str
10
+ return @args[:str]
11
+ end
12
+ end
@@ -0,0 +1,25 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "Parser" do
4
+ it "should be able generate elements from HTML" do
5
+ parser = Html_gen::Parser.new(:str => "<html><head><title>Test</title></head><body>This is the body</body></html>")
6
+ raise "Expected 1 root element but got: '#{parser.eles.length}'." if parser.eles.length != 1
7
+
8
+ html = parser.eles.first
9
+ raise "Expected 2 elements of HTML element but got: '#{html.eles.length}'. #{html.eles_names}" if html.eles.length != 2
10
+
11
+ head = html.eles.first
12
+ title = head.eles.first
13
+ raise "Expected name to be 'title' but it wasnt: '#{title.name}'." if title.name != "title"
14
+
15
+ doc = Html_gen::Parser.new(:str => "<td colspan=\"2\" style=\"font-weight: bold;\" width='100px' height=50px>test</td>")
16
+ td = doc.eles.first
17
+
18
+ raise "Expected name of element to be 'td' but it wasnt: '#{td.name}'." if td.name != "td"
19
+ raise "Expected colspan to be '2' but it wasnt: '#{td.attr["colspan"]}'." if td.attr["colspan"] != "2"
20
+ raise "Expected width to be '100px' but it wasnt: '#{td.attr["width"]}'." if td.attr["width"] != "100px"
21
+ raise "Expected height to be '50px' but it wasnt: '#{td.attr["height"]}'." if td.attr["height"] != "50px"
22
+ raise "Expected CSS-font-weight to be 'bold' but it wasnt: '#{td.css["font-weight"]}'." if td.css["font-weight"] != "bold"
23
+ raise "Expected style to be empty but it wasnt: '#{td.attr["style"]}'." if !td.attr["style"].to_s.empty?
24
+ end
25
+ end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: html_gen
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.1
5
+ version: 0.0.3
6
6
  platform: ruby
7
7
  authors:
8
8
  - Kasper Johansen
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2012-08-10 00:00:00 +02:00
13
+ date: 2012-08-11 00:00:00 +02:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -78,6 +78,9 @@ files:
78
78
  - html_gen.gemspec
79
79
  - lib/html_gen.rb
80
80
  - lib/html_gen_element.rb
81
+ - lib/html_gen_parser.rb
82
+ - lib/html_gen_text_ele.rb
83
+ - spec/html_gen_parser_spec.rb
81
84
  - spec/html_gen_spec.rb
82
85
  - spec/spec_helper.rb
83
86
  has_rdoc: true
@@ -94,7 +97,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
94
97
  requirements:
95
98
  - - ">="
96
99
  - !ruby/object:Gem::Version
97
- hash: 3676011512365822361
100
+ hash: 4461972116095674458
98
101
  segments:
99
102
  - 0
100
103
  version: "0"