hashtml 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .idea
6
+ coverage
7
+ InstalledFiles
8
+ lib/bundler/man
9
+ pkg
10
+ rdoc
11
+ spec/reports
12
+ test/tmp
13
+ test/version_tmp
14
+ tmp
15
+
16
+ # YARD artifacts
17
+ .yardoc
18
+ _yardoc
19
+ doc/
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,4 @@
1
+ hashtml
2
+ =======
3
+
4
+ HashTML is a gem for parsing HTML documents to Ruby Hash-like objects
data/hashtml.gemspec ADDED
@@ -0,0 +1,17 @@
1
+ require File.join(File.dirname(__FILE__), 'lib', 'hashtml', 'version')
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.authors = ["Mauro Rodrigues"]
5
+ gem.email = ["maurorodrigues15@gmail.com"]
6
+ gem.description = %q{HashTML is a gem for parsing HTML documents to Ruby Hash-like objects.}
7
+ gem.summary = %q{A HTML to Hash to HTML helper.}
8
+ gem.homepage = 'https://github.com/MRod15/hashtml'
9
+
10
+ gem.files = `git ls-files`.split("\n")
11
+ gem.name = "hashtml"
12
+ gem.require_paths = ['lib']
13
+ gem.version = HashTML::VERSION
14
+ gem.license = "MIT"
15
+
16
+ gem.add_dependency 'nokogiri', '~> 1.5.5'
17
+ end
data/lib/hashtml.rb ADDED
@@ -0,0 +1,242 @@
1
+ require 'nokogiri'
2
+ require File.join(File.dirname(__FILE__), 'hashtml', 'hash.rb')
3
+ #
4
+ # HashTML translates between HTML documents and Ruby Hash-like objects.
5
+ # This work is loosely inspired on the work done by CobraVsMongoose.
6
+ # (see http://cobravsmongoose.rubyforge.org/)
7
+ #
8
+ class HashTML
9
+
10
+ attr_reader :root_node
11
+
12
+ # Returns a Hash corresponding to the data structure of the given HTML,
13
+ # which should be a Nokogiri::HTML::Document or anything that responds to to_s
14
+ # with a string of valid HTML.
15
+ #@param html [Nokogiri::HTML::Document], or
16
+ #@param html [String] document to parse
17
+ #@return [Hash]
18
+ #@example
19
+ # html = '<span id="row_29" class="step param">true</span>'
20
+ # HashTML.new(xml).to_h
21
+ # # => {
22
+ # "span" => {
23
+ # :children => [
24
+ # {
25
+ # :text => "true"
26
+ # }
27
+ # ],
28
+ # :attributes => {
29
+ # "id" => "row_29",
30
+ # "class" => "step param"
31
+ # }
32
+ # }
33
+ # }
34
+ #
35
+ def initialize(html)
36
+ doc = (html.is_a?(Nokogiri::HTML::Document) ? html : Nokogiri::HTML(html.to_s))
37
+ @root_node = HashTML::Node.new(doc)
38
+ end
39
+
40
+
41
+ # Returns an HTML string corresponding to the data structure of the given Hash.
42
+ #@return [String]
43
+ #@example
44
+ # hash = { "span" =>
45
+ # {
46
+ # :children => [ { :text => "true" } ],
47
+ # :attributes => { "id" => "row_29", "class" => "step param" }
48
+ # }
49
+ # }
50
+ # HTMLParser.hash_to_html(hash)
51
+ # # => "<span id="row_29" class="step param">true</span>"
52
+ #
53
+ def to_html
54
+ @root_node.to_html
55
+ end
56
+
57
+ def to_h
58
+ @root_node.to_h
59
+ end
60
+
61
+ def method_missing(method, *args)
62
+ method = method.to_s
63
+ attributes, _nil = args
64
+ attributes ||= {}
65
+ if method.end_with?("?")
66
+ key = method[0..-2]
67
+ _check_for_presence(key, attributes)
68
+ else
69
+ _get_value(method, attributes)
70
+ end
71
+ end
72
+
73
+ private
74
+
75
+ def _check_for_presence(key, attributes={})
76
+ !!_get_value(key, attributes)
77
+ end
78
+
79
+ def _get_value(key, attributes={})
80
+ #$logger.debug("Looking for '#{key}'")
81
+ #$logger.debug('It\'s the root node!')
82
+ return nil unless @root_node.name == key
83
+ return @root_node unless attributes
84
+ return ((@root_node.attributes and @root_node.attributes.include_pairs?(attributes)) ? @root_node : nil)
85
+ end
86
+
87
+ class InvalidAttributeValuePairError < StandardError
88
+ end
89
+
90
+ public
91
+
92
+ class << self
93
+ def to_hashtml(hash)
94
+ convert_to_hashtml(hash)
95
+ end
96
+
97
+ def to_html(hash)
98
+ convert_to_hashtml(hash).to_html
99
+ end
100
+
101
+ private
102
+ def convert_to_hashtml(hash)
103
+ #$logger.warn(hash)
104
+ hashtml = nil
105
+ hash.each do |key, value|
106
+ return HashTML::Text.new(value) if key == :text
107
+ hashtml = HashTML::Node.new
108
+ hashtml.name = key
109
+ hashtml.attributes = (value[:attributes] or {})
110
+ hashtml.children = value[:children].map { |child| convert_to_hashtml(child) }
111
+ end
112
+ #$logger.debug hashtml
113
+ hashtml
114
+ end
115
+ end
116
+
117
+ class Node
118
+ attr_accessor :name, :attributes, :children
119
+
120
+ def initialize(node=nil)
121
+ return unless node
122
+ #$logger.warn "Analysing node: #{node.name}\n#{node}"
123
+ @name = node.name
124
+ @attributes = node.respond_to?(:attributes) ? get_html_node_attributes(node) : {}
125
+ @children = get_html_node_children(node)
126
+ #$logger.debug(@children, 'Children:')
127
+ end
128
+
129
+ def to_h
130
+ { @name => { children: @children.map { |child| child.to_h }, attributes: @attributes } }
131
+ end
132
+
133
+ def to_html
134
+ space = (@attributes.any? ? ' ' : '')
135
+ children_html = @children.map { |child| child.to_html }.join
136
+ attribute_list = @attributes.map { |k, v| "#{k}=\"#{v}\"" }.join(' ')
137
+ "<#{@name}#{space}#{attribute_list}>#{children_html}</#{@name}>"
138
+ end
139
+
140
+ def method_missing(method, *args)
141
+ method = method.to_s
142
+ #$logger.debug(method)
143
+ #$logger.debug(args)
144
+ attributes, new_value, _nil = args
145
+ attributes ||= {}
146
+ if method.end_with?("?")
147
+ key = method[0..-2]
148
+ _check_for_presence(key, attributes)
149
+ elsif method.end_with?("=")
150
+ key = method[0..-2]
151
+ new_value, attributes = attributes, {} if new_value.nil?
152
+ _change_value(key, attributes, new_value)
153
+ else
154
+ _get_value(method, attributes)
155
+ end
156
+ end
157
+
158
+ private
159
+
160
+ def _check_for_presence(key, attributes={})
161
+ !!_get_value(key, attributes)
162
+ end
163
+
164
+ def _get_value(key, attributes={})
165
+ #$logger.debug("Looking for '#{key}'")
166
+ #$logger.debug('It\'s a child node!')
167
+ if key == 'text'
168
+ #$logger.debug('Getting node text...')
169
+ return @children.map { |child| child.text if child.is_a?(HashTML::Text) }.reject(&:nil?).join
170
+ else
171
+ @children.each do |child|
172
+ next if child.is_a?(HashTML::Text)
173
+ #$logger.debug child.attributes
174
+ return child if (child.name == key and child.attributes.include_pairs?(attributes))
175
+ end
176
+ end
177
+ nil
178
+ end
179
+
180
+ def _change_value(key, attributes, new_value)
181
+ #$logger.debug("Looking for '#{key}'")
182
+ #$logger.debug('It\'s a child node!')
183
+ if key == 'text'
184
+ #$logger.debug("Changing node text to '#{new_value}'...")
185
+ #$logger.warn(@children)
186
+ new_children = @children.select { |child| !child.is_a?(HashTML::Text) }
187
+ @children = new_children.empty? ? [HashTML::Text.new(new_value)] : [new_children, HashTML::Text.new(new_value)]
188
+ #$logger.warn(@children)
189
+ else
190
+ #$logger.debug('Changing node value...')
191
+ @children.each_with_index do |child, index|
192
+ next if child.is_a?(HashTML::Text)
193
+ if child.name == key and child.attributes.include_pairs?(attributes)
194
+ @children[index] = new_value
195
+ end
196
+ end
197
+ end
198
+ end
199
+
200
+ def get_html_node_children(node)
201
+ #$logger.debug "Node children:\n#{node.children}"
202
+ node.children.map do |child|
203
+ #$logger.info("Child:\n#{child}\nChild class: '#{child.class}'")
204
+ #$logger.info(child)
205
+ case child.class.to_s
206
+ when 'Nokogiri::XML::Text', 'Nokogiri::XML::CDATA'
207
+ HashTML::Text.new(child.to_s)
208
+ when 'Nokogiri::XML::Element'
209
+ HashTML::Node.new(child)
210
+ else
211
+ nil
212
+ end
213
+ end.reject(&:nil?)
214
+ end
215
+
216
+ def get_html_node_attributes(node)
217
+ #$logger.debug "Node attributes: #{node.attributes}"
218
+ Hash[node.attributes.map { |name, value| [name, value.value] }]
219
+ end
220
+
221
+ end
222
+
223
+ class Text
224
+ attr_accessor :text
225
+
226
+ def initialize(text)
227
+ @text = text
228
+ end
229
+
230
+ def to_h
231
+ { text: @text }
232
+ end
233
+
234
+ def to_html
235
+ @text
236
+ end
237
+ end
238
+
239
+ class ParseError < RuntimeError
240
+ end
241
+
242
+ end
@@ -0,0 +1,10 @@
1
+
2
+ class Hash
3
+
4
+ # Checks if an 'hash' is a subset of the object
5
+ #@param hash [Hash] pairs to verify
6
+ #@return [Boolean]
7
+ def include_pairs?(hash)
8
+ hash.select { |k, v| k if self[k] != v }.empty?
9
+ end
10
+ end
@@ -0,0 +1,3 @@
1
+ module HashTML
2
+ VERSION = '0.0.1'
3
+ end
metadata ADDED
@@ -0,0 +1,70 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hashtml
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Mauro Rodrigues
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-02-26 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 1.5.5
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 1.5.5
30
+ description: HashTML is a gem for parsing HTML documents to Ruby Hash-like objects.
31
+ email:
32
+ - maurorodrigues15@gmail.com
33
+ executables: []
34
+ extensions: []
35
+ extra_rdoc_files: []
36
+ files:
37
+ - .gitignore
38
+ - LICENSE
39
+ - README.md
40
+ - hashtml.gemspec
41
+ - lib/hashtml.rb
42
+ - lib/hashtml/hash.rb
43
+ - lib/hashtml/version.rb
44
+ homepage: https://github.com/MRod15/hashtml
45
+ licenses:
46
+ - MIT
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ requirements: []
64
+ rubyforge_project:
65
+ rubygems_version: 1.8.23
66
+ signing_key:
67
+ specification_version: 3
68
+ summary: A HTML to Hash to HTML helper.
69
+ test_files: []
70
+ has_rdoc: