hashtml 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .idea
6
+ coverage
7
+ InstalledFiles
8
+ lib/bundler/man
9
+ pkg
10
+ rdoc
11
+ spec/reports
12
+ test/tmp
13
+ test/version_tmp
14
+ tmp
15
+
16
+ # YARD artifacts
17
+ .yardoc
18
+ _yardoc
19
+ doc/
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,4 @@
1
+ hashtml
2
+ =======
3
+
4
+ HashTML is a gem for parsing HTML documents to Ruby Hash-like objects
data/hashtml.gemspec ADDED
@@ -0,0 +1,17 @@
1
+ require File.join(File.dirname(__FILE__), 'lib', 'hashtml', 'version')
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.authors = ["Mauro Rodrigues"]
5
+ gem.email = ["maurorodrigues15@gmail.com"]
6
+ gem.description = %q{HashTML is a gem for parsing HTML documents to Ruby Hash-like objects.}
7
+ gem.summary = %q{A HTML to Hash to HTML helper.}
8
+ gem.homepage = 'https://github.com/MRod15/hashtml'
9
+
10
+ gem.files = `git ls-files`.split("\n")
11
+ gem.name = "hashtml"
12
+ gem.require_paths = ['lib']
13
+ gem.version = HashTML::VERSION
14
+ gem.license = "MIT"
15
+
16
+ gem.add_dependency 'nokogiri', '~> 1.5.5'
17
+ end
data/lib/hashtml.rb ADDED
@@ -0,0 +1,242 @@
1
+ require 'nokogiri'
2
+ require File.join(File.dirname(__FILE__), 'hashtml', 'hash.rb')
3
+ #
4
+ # HashTML translates between HTML documents and Ruby Hash-like objects.
5
+ # This work is loosely inspired on the work done by CobraVsMongoose.
6
+ # (see http://cobravsmongoose.rubyforge.org/)
7
+ #
8
+ class HashTML
9
+
10
+ attr_reader :root_node
11
+
12
+ # Returns a Hash corresponding to the data structure of the given HTML,
13
+ # which should be a Nokogiri::HTML::Document or anything that responds to to_s
14
+ # with a string of valid HTML.
15
+ #@param html [Nokogiri::HTML::Document], or
16
+ #@param html [String] document to parse
17
+ #@return [Hash]
18
+ #@example
19
+ # html = '<span id="row_29" class="step param">true</span>'
20
+ # HashTML.new(xml).to_h
21
+ # # => {
22
+ # "span" => {
23
+ # :children => [
24
+ # {
25
+ # :text => "true"
26
+ # }
27
+ # ],
28
+ # :attributes => {
29
+ # "id" => "row_29",
30
+ # "class" => "step param"
31
+ # }
32
+ # }
33
+ # }
34
+ #
35
+ def initialize(html)
36
+ doc = (html.is_a?(Nokogiri::HTML::Document) ? html : Nokogiri::HTML(html.to_s))
37
+ @root_node = HashTML::Node.new(doc)
38
+ end
39
+
40
+
41
+ # Returns an HTML string corresponding to the data structure of the given Hash.
42
+ #@return [String]
43
+ #@example
44
+ # hash = { "span" =>
45
+ # {
46
+ # :children => [ { :text => "true" } ],
47
+ # :attributes => { "id" => "row_29", "class" => "step param" }
48
+ # }
49
+ # }
50
+ # HTMLParser.hash_to_html(hash)
51
+ # # => "<span id="row_29" class="step param">true</span>"
52
+ #
53
+ def to_html
54
+ @root_node.to_html
55
+ end
56
+
57
+ def to_h
58
+ @root_node.to_h
59
+ end
60
+
61
+ def method_missing(method, *args)
62
+ method = method.to_s
63
+ attributes, _nil = args
64
+ attributes ||= {}
65
+ if method.end_with?("?")
66
+ key = method[0..-2]
67
+ _check_for_presence(key, attributes)
68
+ else
69
+ _get_value(method, attributes)
70
+ end
71
+ end
72
+
73
+ private
74
+
75
+ def _check_for_presence(key, attributes={})
76
+ !!_get_value(key, attributes)
77
+ end
78
+
79
+ def _get_value(key, attributes={})
80
+ #$logger.debug("Looking for '#{key}'")
81
+ #$logger.debug('It\'s the root node!')
82
+ return nil unless @root_node.name == key
83
+ return @root_node unless attributes
84
+ return ((@root_node.attributes and @root_node.attributes.include_pairs?(attributes)) ? @root_node : nil)
85
+ end
86
+
87
+ class InvalidAttributeValuePairError < StandardError
88
+ end
89
+
90
+ public
91
+
92
+ class << self
93
+ def to_hashtml(hash)
94
+ convert_to_hashtml(hash)
95
+ end
96
+
97
+ def to_html(hash)
98
+ convert_to_hashtml(hash).to_html
99
+ end
100
+
101
+ private
102
+ def convert_to_hashtml(hash)
103
+ #$logger.warn(hash)
104
+ hashtml = nil
105
+ hash.each do |key, value|
106
+ return HashTML::Text.new(value) if key == :text
107
+ hashtml = HashTML::Node.new
108
+ hashtml.name = key
109
+ hashtml.attributes = (value[:attributes] or {})
110
+ hashtml.children = value[:children].map { |child| convert_to_hashtml(child) }
111
+ end
112
+ #$logger.debug hashtml
113
+ hashtml
114
+ end
115
+ end
116
+
117
+ class Node
118
+ attr_accessor :name, :attributes, :children
119
+
120
+ def initialize(node=nil)
121
+ return unless node
122
+ #$logger.warn "Analysing node: #{node.name}\n#{node}"
123
+ @name = node.name
124
+ @attributes = node.respond_to?(:attributes) ? get_html_node_attributes(node) : {}
125
+ @children = get_html_node_children(node)
126
+ #$logger.debug(@children, 'Children:')
127
+ end
128
+
129
+ def to_h
130
+ { @name => { children: @children.map { |child| child.to_h }, attributes: @attributes } }
131
+ end
132
+
133
+ def to_html
134
+ space = (@attributes.any? ? ' ' : '')
135
+ children_html = @children.map { |child| child.to_html }.join
136
+ attribute_list = @attributes.map { |k, v| "#{k}=\"#{v}\"" }.join(' ')
137
+ "<#{@name}#{space}#{attribute_list}>#{children_html}</#{@name}>"
138
+ end
139
+
140
+ def method_missing(method, *args)
141
+ method = method.to_s
142
+ #$logger.debug(method)
143
+ #$logger.debug(args)
144
+ attributes, new_value, _nil = args
145
+ attributes ||= {}
146
+ if method.end_with?("?")
147
+ key = method[0..-2]
148
+ _check_for_presence(key, attributes)
149
+ elsif method.end_with?("=")
150
+ key = method[0..-2]
151
+ new_value, attributes = attributes, {} if new_value.nil?
152
+ _change_value(key, attributes, new_value)
153
+ else
154
+ _get_value(method, attributes)
155
+ end
156
+ end
157
+
158
+ private
159
+
160
+ def _check_for_presence(key, attributes={})
161
+ !!_get_value(key, attributes)
162
+ end
163
+
164
+ def _get_value(key, attributes={})
165
+ #$logger.debug("Looking for '#{key}'")
166
+ #$logger.debug('It\'s a child node!')
167
+ if key == 'text'
168
+ #$logger.debug('Getting node text...')
169
+ return @children.map { |child| child.text if child.is_a?(HashTML::Text) }.reject(&:nil?).join
170
+ else
171
+ @children.each do |child|
172
+ next if child.is_a?(HashTML::Text)
173
+ #$logger.debug child.attributes
174
+ return child if (child.name == key and child.attributes.include_pairs?(attributes))
175
+ end
176
+ end
177
+ nil
178
+ end
179
+
180
+ def _change_value(key, attributes, new_value)
181
+ #$logger.debug("Looking for '#{key}'")
182
+ #$logger.debug('It\'s a child node!')
183
+ if key == 'text'
184
+ #$logger.debug("Changing node text to '#{new_value}'...")
185
+ #$logger.warn(@children)
186
+ new_children = @children.select { |child| !child.is_a?(HashTML::Text) }
187
+ @children = new_children.empty? ? [HashTML::Text.new(new_value)] : [new_children, HashTML::Text.new(new_value)]
188
+ #$logger.warn(@children)
189
+ else
190
+ #$logger.debug('Changing node value...')
191
+ @children.each_with_index do |child, index|
192
+ next if child.is_a?(HashTML::Text)
193
+ if child.name == key and child.attributes.include_pairs?(attributes)
194
+ @children[index] = new_value
195
+ end
196
+ end
197
+ end
198
+ end
199
+
200
+ def get_html_node_children(node)
201
+ #$logger.debug "Node children:\n#{node.children}"
202
+ node.children.map do |child|
203
+ #$logger.info("Child:\n#{child}\nChild class: '#{child.class}'")
204
+ #$logger.info(child)
205
+ case child.class.to_s
206
+ when 'Nokogiri::XML::Text', 'Nokogiri::XML::CDATA'
207
+ HashTML::Text.new(child.to_s)
208
+ when 'Nokogiri::XML::Element'
209
+ HashTML::Node.new(child)
210
+ else
211
+ nil
212
+ end
213
+ end.reject(&:nil?)
214
+ end
215
+
216
+ def get_html_node_attributes(node)
217
+ #$logger.debug "Node attributes: #{node.attributes}"
218
+ Hash[node.attributes.map { |name, value| [name, value.value] }]
219
+ end
220
+
221
+ end
222
+
223
+ class Text
224
+ attr_accessor :text
225
+
226
+ def initialize(text)
227
+ @text = text
228
+ end
229
+
230
+ def to_h
231
+ { text: @text }
232
+ end
233
+
234
+ def to_html
235
+ @text
236
+ end
237
+ end
238
+
239
+ class ParseError < RuntimeError
240
+ end
241
+
242
+ end
@@ -0,0 +1,10 @@
1
+
2
+ class Hash
3
+
4
+ # Checks if an 'hash' is a subset of the object
5
+ #@param hash [Hash] pairs to verify
6
+ #@return [Boolean]
7
+ def include_pairs?(hash)
8
+ hash.select { |k, v| k if self[k] != v }.empty?
9
+ end
10
+ end
@@ -0,0 +1,3 @@
1
+ module HashTML
2
+ VERSION = '0.0.1'
3
+ end
metadata ADDED
@@ -0,0 +1,70 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hashtml
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Mauro Rodrigues
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-02-26 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 1.5.5
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 1.5.5
30
+ description: HashTML is a gem for parsing HTML documents to Ruby Hash-like objects.
31
+ email:
32
+ - maurorodrigues15@gmail.com
33
+ executables: []
34
+ extensions: []
35
+ extra_rdoc_files: []
36
+ files:
37
+ - .gitignore
38
+ - LICENSE
39
+ - README.md
40
+ - hashtml.gemspec
41
+ - lib/hashtml.rb
42
+ - lib/hashtml/hash.rb
43
+ - lib/hashtml/version.rb
44
+ homepage: https://github.com/MRod15/hashtml
45
+ licenses:
46
+ - MIT
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ requirements: []
64
+ rubyforge_project:
65
+ rubygems_version: 1.8.23
66
+ signing_key:
67
+ specification_version: 3
68
+ summary: A HTML to Hash to HTML helper.
69
+ test_files: []
70
+ has_rdoc: