html_gen 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.3
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{html_gen}
8
- s.version = "0.0.1"
8
+ s.version = "0.0.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Kasper Johansen"]
12
- s.date = %q{2012-08-10}
12
+ s.date = %q{2012-08-11}
13
13
  s.description = %q{A small framework for generating HTML.}
14
14
  s.email = %q{k@spernj.org}
15
15
  s.extra_rdoc_files = [
@@ -28,6 +28,9 @@ Gem::Specification.new do |s|
28
28
  "html_gen.gemspec",
29
29
  "lib/html_gen.rb",
30
30
  "lib/html_gen_element.rb",
31
+ "lib/html_gen_parser.rb",
32
+ "lib/html_gen_text_ele.rb",
33
+ "spec/html_gen_parser_spec.rb",
31
34
  "spec/html_gen_spec.rb",
32
35
  "spec/spec_helper.rb"
33
36
  ]
@@ -40,6 +40,12 @@ class Html_gen::Element
40
40
  # element.html #=> "<b>Te<i>s</i>t</b>"
41
41
  attr_accessor :str_html
42
42
 
43
+ #An array holding all the sub-elements of this element.
44
+ attr_accessor :eles
45
+
46
+ #The name of the element. "a" for <a> and such.
47
+ attr_accessor :name
48
+
43
49
  #You can give various arguments as shortcuts to calling the methods. You can also decide what should be used for newline and indentation.
44
50
  # Html_gen::Element.new(:b, {
45
51
  # :css => {"font-weight" => "bold"},
@@ -177,4 +183,39 @@ class Html_gen::Element
177
183
  #Returns the string.
178
184
  return str
179
185
  end
186
+
187
+ #Returns the names of all sub-elements in an array.
188
+ def eles_names
189
+ names = []
190
+ @eles.each do |ele|
191
+ names << ele.name
192
+ end
193
+
194
+ return names
195
+ end
196
+
197
+ #Converts the content of the 'style'-attribute to css-hash-content.
198
+ def convert_style_to_css
199
+ if !@attr[:style].to_s.strip.empty?
200
+ style = @attr[:style]
201
+ elsif !@attr["style"].to_s.strip.empty?
202
+ style = @attr["style"]
203
+ else
204
+ raise "No style set in element."
205
+ end
206
+
207
+ loop do
208
+ if match = style.match(/\A\s*(\S+?):\s*(.+?)\s*(;|\Z)/)
209
+ style.gsub!(match[0], "")
210
+ key = match[1]
211
+ val = match[2]
212
+ raise "Such a key already exists in CSS-hash: '#{key}'." if @css.key?(key)
213
+ @css[key] = val
214
+ elsif match = style.slice!(/\A\s*\Z/)
215
+ break
216
+ else
217
+ raise "Dont know what to do with style-variable: '#{style}'."
218
+ end
219
+ end
220
+ end
180
221
  end
@@ -0,0 +1,185 @@
1
+ #A simple, lightweight and pure-Ruby class for parsing HTML-strings into elements.
2
+ #===Examples
3
+ # doc = Html_gen::Parser.new(:str => a_html_variable)
4
+ # html_ele = doc.eles.first
5
+ # html_ele.name #=> "html"
6
+ class Html_gen::Parser
7
+ #An array that holds all the parsed root-elements.
8
+ attr_reader :eles
9
+
10
+ #The constructor. See class documentation for usage of this.
11
+ def initialize(args)
12
+ if args[:io]
13
+ @io = args[:io]
14
+ elsif args[:str]
15
+ @io = StringIO.new(args[:str])
16
+ else
17
+ raise "Dont know how to handle given arguments."
18
+ end
19
+
20
+ raise "No ':io' was given." if !@io
21
+ @eof = false
22
+ @buffer = ""
23
+ @eles = []
24
+ @eles_t = []
25
+ @debug = args[:debug]
26
+
27
+ while !@eof or !@buffer.empty?
28
+ parse_tag
29
+ end
30
+ end
31
+
32
+ private
33
+
34
+ #Ensures at least 16kb of data is loaded into the buffer.
35
+ def ensure_buffer
36
+ while @buffer.length < 16384 and !@eof
37
+ str = @io.gets(16384)
38
+ if !str
39
+ @eof = true
40
+ else
41
+ @buffer << str
42
+ end
43
+ end
44
+ end
45
+
46
+ #Searches for a given regex. If found the contents is removed from the buffer.
47
+ def search(regex)
48
+ ensure_buffer
49
+
50
+ if match = @buffer.match(regex)
51
+ @buffer.gsub!(regex, "")
52
+ ensure_buffer
53
+ return match
54
+ end
55
+
56
+ return false
57
+ end
58
+
59
+ #Asumes a tag is the next to be parsed and adds it to document-data.
60
+ def parse_tag(args = {})
61
+ if match = search(/\A\s*<\s*(\/|)\s*(\S+?)(\s+|\/\s*>|>)/)
62
+ tag_name = match[2].to_s.strip.downcase
63
+ start_sign = match[1].to_s.strip.downcase
64
+ end_sign = match[3].to_s.strip.downcase
65
+
66
+ raise "Dont know how to handle start-sign: '#{start_sign}' for tag: '#{tag_name}'." if !start_sign.empty?
67
+
68
+ ele = Html_gen::Element.new(tag_name)
69
+
70
+ if @eles_t.empty?
71
+ puts "Adding element '#{tag_name}' to root elements." if @debug
72
+ @eles << ele
73
+ else
74
+ puts "Adding element '#{tag_name}' to last t-element: '#{@eles_t.last.name}'." if @debug
75
+ @eles_t.last.eles << ele
76
+ end
77
+
78
+ @eles_t << ele
79
+ puts "New element-match: #{match.to_a}" if @debug
80
+
81
+ if end_sign.match(/^\/\s*>$/)
82
+ puts "End of element '#{tag_name}' for '#{@eles_t.last.name}'." if @debug
83
+ ele = @eles_t.pop
84
+ raise "Expected ele-name to be: '#{tag_name}' but it wasnt: '#{ele.name}'." if ele.name.to_s != tag_name
85
+ return ele
86
+ elsif end_sign.to_s.strip.empty?
87
+ parse_attr_of_tag(ele, tag_name)
88
+ ele.convert_style_to_css if ele.attr.key?("style") or ele.attr.key?(:style)
89
+ return ele
90
+ else
91
+ parse_content_of_tag(ele, tag_name)
92
+ return ele
93
+ end
94
+ else
95
+ if args[:false]
96
+ return false
97
+ else
98
+ raise "Dont know what to do with buffer: '#{@buffer}'."
99
+ end
100
+ end
101
+ end
102
+
103
+ def parse_attr_of_tag(ele, tag_name)
104
+ loop do
105
+ if match = search(/\A\s*(\S+)=(\"|'|)/)
106
+ attr_name = match[1]
107
+ raise "Attribute already exists on element: '#{attr_name}'." if ele.attr.key?(attr_name)
108
+
109
+ if match[2].to_s.empty?
110
+ quote_char = /\s+/
111
+ quote_val = :whitespace
112
+ else
113
+ quote_char = /#{Regexp.escape(match[2])}/
114
+ quote_val = :normal
115
+ end
116
+
117
+ attr_val = parse_attr_until_quote(quote_char, quote_val)
118
+
119
+ puts "Parsed attribute '#{attr_name}' with value '#{attr_val}'." if @debug
120
+ ele.attr[attr_name] = attr_val
121
+ elsif search(/\A\s*>/)
122
+ parse_content_of_tag(ele, tag_name)
123
+ break
124
+ else
125
+ raise "Dont know what to do with buffer when parsing attributes: '#{@buffer}'."
126
+ end
127
+ end
128
+ end
129
+
130
+ def parse_attr_until_quote(quote_char, quote_val)
131
+ val = ""
132
+
133
+ loop do
134
+ ensure_buffer
135
+ char = @buffer.slice!(0)
136
+ break if !char
137
+
138
+ if char == "\\"
139
+ val << char
140
+ val << @buffer.slice!(0)
141
+ elsif char =~ quote_char
142
+ break
143
+ elsif char == ">" and quote_val == :whitespace
144
+ @buffer = char + @buffer
145
+ break
146
+ else
147
+ val << char
148
+ end
149
+ end
150
+
151
+ return val
152
+ end
153
+
154
+ #Assumes some content of a tag is next to be parsed and parses it.
155
+ def parse_content_of_tag(ele, tag_name)
156
+ raise "Empty tag-name given: '#{tag_name}'." if tag_name.to_s.strip.empty?
157
+ raise "No 'ele' was given." if !ele
158
+
159
+ loop do
160
+ if search(/\A\s*\Z/)
161
+ raise "Could not find end of tag: '#{tag_name}'."
162
+ elsif match = search(/\A\s*<\s*\/\s*#{Regexp.escape(tag_name)}\s*>\s*/i)
163
+ puts "Found end: '#{match.to_a}' for '#{@eles_t.last.name}'." if @debug
164
+ ele = @eles_t.pop
165
+ raise "Expected ele-name to be: '#{tag_name}' but it wasnt: '#{ele.name}'." if ele.name.to_s != tag_name
166
+
167
+ break
168
+ elsif new_ele = parse_tag(:false => true)
169
+ puts "Found new element '#{new_ele.name}' and adding it to '#{ele.name}'." if @debug
170
+ #ele.eles << new_ele
171
+ elsif match = search(/\A(.+?)(<|\Z)/)
172
+ puts "Text-content-match: '#{match.to_a}'." if @debug
173
+
174
+ #Put end back into buffer.
175
+ @buffer = match[2] + @buffer
176
+ puts "Buffer after text-match: #{@buffer}" if @debug
177
+
178
+ #Add text element to list as finished.
179
+ ele.eles << Html_gen::Text_ele.new(:str => match[1])
180
+ else
181
+ raise "Dont know what to do with buffer: '#{@buffer}'."
182
+ end
183
+ end
184
+ end
185
+ end
@@ -0,0 +1,12 @@
1
+ class Html_gen::Text_ele
2
+ attr_reader :args
3
+
4
+ def initialize(args)
5
+ @str = args[:str]
6
+ end
7
+
8
+ #Returns the text that this element holds.
9
+ def str
10
+ return @args[:str]
11
+ end
12
+ end
@@ -0,0 +1,25 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "Parser" do
4
+ it "should be able generate elements from HTML" do
5
+ parser = Html_gen::Parser.new(:str => "<html><head><title>Test</title></head><body>This is the body</body></html>")
6
+ raise "Expected 1 root element but got: '#{parser.eles.length}'." if parser.eles.length != 1
7
+
8
+ html = parser.eles.first
9
+ raise "Expected 2 elements of HTML element but got: '#{html.eles.length}'. #{html.eles_names}" if html.eles.length != 2
10
+
11
+ head = html.eles.first
12
+ title = head.eles.first
13
+ raise "Expected name to be 'title' but it wasnt: '#{title.name}'." if title.name != "title"
14
+
15
+ doc = Html_gen::Parser.new(:str => "<td colspan=\"2\" style=\"font-weight: bold;\" width='100px' height=50px>test</td>")
16
+ td = doc.eles.first
17
+
18
+ raise "Expected name of element to be 'td' but it wasnt: '#{td.name}'." if td.name != "td"
19
+ raise "Expected colspan to be '2' but it wasnt: '#{td.attr["colspan"]}'." if td.attr["colspan"] != "2"
20
+ raise "Expected width to be '100px' but it wasnt: '#{td.attr["width"]}'." if td.attr["width"] != "100px"
21
+ raise "Expected height to be '50px' but it wasnt: '#{td.attr["height"]}'." if td.attr["height"] != "50px"
22
+ raise "Expected CSS-font-weight to be 'bold' but it wasnt: '#{td.css["font-weight"]}'." if td.css["font-weight"] != "bold"
23
+ raise "Expected style to be empty but it wasnt: '#{td.attr["style"]}'." if !td.attr["style"].to_s.empty?
24
+ end
25
+ end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: html_gen
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.1
5
+ version: 0.0.3
6
6
  platform: ruby
7
7
  authors:
8
8
  - Kasper Johansen
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2012-08-10 00:00:00 +02:00
13
+ date: 2012-08-11 00:00:00 +02:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -78,6 +78,9 @@ files:
78
78
  - html_gen.gemspec
79
79
  - lib/html_gen.rb
80
80
  - lib/html_gen_element.rb
81
+ - lib/html_gen_parser.rb
82
+ - lib/html_gen_text_ele.rb
83
+ - spec/html_gen_parser_spec.rb
81
84
  - spec/html_gen_spec.rb
82
85
  - spec/spec_helper.rb
83
86
  has_rdoc: true
@@ -94,7 +97,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
94
97
  requirements:
95
98
  - - ">="
96
99
  - !ruby/object:Gem::Version
97
- hash: 3676011512365822361
100
+ hash: 4461972116095674458
98
101
  segments:
99
102
  - 0
100
103
  version: "0"