hashtml 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZTI0MTEyNDE3MGY3NDg1NWFkMjkyNDdkNjU0YTFiM2MxNTRjN2FmNg==
5
+ data.tar.gz: !binary |-
6
+ YTBmYzVjMjVkOGQ1YWVlMGNmNmFkZDE4NTYzYjIwODk4NTFiYWI2Yg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ MWY5NjhjZDE5MTc5MzcyM2E2NzFhM2ZhMGFmNDc0NDgyMjRkZDY3NTAxM2Iy
10
+ NzNkNWRlOTg0Njg0N2YxZmQyY2RmMWE0YmQzYTcyZTVmMzZhNWU5Yjc1YjEw
11
+ MTA3ZTFhODMxZmVmNDE1NDlmM2ZhNzE4ZjAwMzc3NGE1ZTg4YzQ=
12
+ data.tar.gz: !binary |-
13
+ NjI1YTY0ZjNkMDY3YmUzNTNmZWRjYWQ1YTFkNjg4YThjZjA0N2E0MDNhYjUy
14
+ ZGFlZDNjOGEwYTcwMTg2YjE4ZmFkODA5Y2UxYjAyYWI5NjIzOTg2M2VjYjgw
15
+ NjA1NDI2YzRiYTJjNTUxYmY2ZTI1Y2M2MGJiMWVjNmVmZWIzNGU=
data/.travis.yml ADDED
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - 2.0.0
5
+ - 2.1.0
6
+ - 2.2.0
7
+ - jruby-19mode
8
+ - ruby-head
9
+ - jruby-head
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'http://rubygems.org'
2
+ gem 'nokogiri'
3
+ gem 'rake'
4
+ gem 'yard'
5
+ gem 'kramdown'
data/README.md CHANGED
@@ -1,4 +1,117 @@
1
- hashtml
1
+ hashtml [ ![Codeship Status for MRod15/hashtml](https://codeship.io/projects/6dd49080-19a4-0132-515f-0a39251edeca/status)](https://codeship.io/projects/34440) [![Build Status](https://travis-ci.org/MRod15/hashtml.svg?branch=master)](https://travis-ci.org/MRod15/hashtml)
2
2
  =======
3
3
 
4
4
  HashTML is a gem for parsing HTML documents to Ruby Hash-like objects
5
+
6
+ ## Installation
7
+
8
+ HashTML is available as a RubyGem:
9
+
10
+ gem install hashtml
11
+
12
+ ## Usage
13
+
14
+ HashTML parses a Nokogiri::HTML::Document or anything that responds
15
+ to to_s with a string of valid HTML.
16
+ A HashTML object corresponding to the data structure of the given HTML
17
+ is generated.
18
+
19
+ ### Example:
20
+
21
+ html = <<-HTML
22
+ <html>
23
+ <body>
24
+ <div id="d1" style="color: blue">
25
+ <h1>hello world!</h1>
26
+ </div>
27
+ </body>
28
+ </html>
29
+ HTML
30
+ hashtml = HashTML.new(html)
31
+ hashtml.inspect # => #<HashTML:0x00000001328650 @root_node=#<HashTML::Node:0x000000013283f8 @name="document", @attributes={}, @children=[#<HashTML::Node:0x00000001327ef8 @name="html", @attributes={}, @children=[#<HashTML::Node:0x00000001327a20 @name="body", @attributes={}, @children=[#<HashTML::Text:0x00000001326300 @text="\n ">, #<HashTML::Node:0x00000001326288 @name="div", @attributes={"id"=>"d1", "style"=>"color: blue"}, @children=[#<HashTML::Text:0x0000000132c8b8 @text="\n ">, #<HashTML::Node:0x0000000132c728 @name="h1", @attributes={}, @children=[#<HashTML::Text:0x0000000132b4e0 @text="hello world!">]>, #<HashTML::Text:0x0000000132a7c0 @text="\n ">]>, #<HashTML::Text:0x00000001329a50 @text="\n ">, #<HashTML::Node:0x000000013299d8 @name="div", @attributes={"id"=>"d2", "style"=>"color: green"}, @children=[#<HashTML::Text:0x000000013306c0 @text="\n ">, #<HashTML::Node:0x00000001330620 @name="p", @attributes={}, @children=[#<HashTML::Text:0x0000000132ef00 @text="Lorem ipsum dolor sit amet, consectetur adipiscing elit.">]>, #<HashTML::Text:0x0000000132e5c8 @text="\n ">]>, #<HashTML::Text:0x0000000132d6f0 @text="\n ">]>]>]>>
32
+
33
+
34
+ HashTML allows you to convert the object to a Ruby Hash with to_h.
35
+
36
+ ### Example:
37
+
38
+ html = <<-HTML
39
+ <html>
40
+ <body>
41
+ <div id="d1" style="color: blue">
42
+ <h1>hello world!</h1>
43
+ </div>
44
+ </body>
45
+ </html>
46
+ HTML
47
+ hashtml = HashTML.new(html)
48
+ hashtml.to_h # => {"document"=>{:attributes=>{}, :children=>[{"html"=>{:attributes=>{}, :children=>[{"body"=>{:attributes=>{}, :children=>[{:text=>"\n "}, {"div"=>{:attributes=>{"id"=>"d1", "style"=>"color: blue"}, :children=>[{:text=>"\n "}, {"h1"=>{:attributes=>{}, :children=>[{:text=>"hello world!"}]}}, {:text=>"\n "}]}}, {:text=>"\n "}, {"div"=>{:attributes=>{"id"=>"d2", "style"=>"color: green"}, :children=>[{:text=>"\n "}, {"p"=>{:attributes=>{}, :children=>[{:text=>"Lorem ipsum dolor sit amet, consectetur adipiscing elit."}]}}, {:text=>"\n "}]}}, {:text=>"\n "}]}}]}}]}}
49
+
50
+
51
+ You can access elements and change them simply by "navigating" trough them.
52
+ And when you're done, simply regenerate your HTML by doing to_html!
53
+
54
+ ### Example:
55
+
56
+ html = <<-HTML
57
+ <html>
58
+ <body>
59
+ <div id="d1" style="color: blue">
60
+ <h1>hello world!</h1>
61
+ </div>
62
+ </body>
63
+ </html>
64
+ HTML
65
+
66
+ hashtml = HashTML.new(html)
67
+ hashtml.document.hmtl.body.div.inspect # => #<HashTML::Node:0x00000000b6c128 @name="div", @attributes={"id"=>"d1", "style"=>"color: blue"}, @children=[#<HashTML::Text:0x00000000b72528 @text="\n ">, #<HashTML::Node:0x00000000b72348 @name="h1", @attributes={}, @children=[#<HashTML::Text:0x00000000b71268 @text="hello world!">]>, #<HashTML::Text:0x00000000b704a8 @text="\n ">]>
68
+
69
+ hashtml.document.hmtl.body.div.attributes['id'] = 'new_id1'
70
+ hashtml.document.hmtl.body.div.inspect # => #<HashTML::Node:0x00000000b6c128 @name="div", @attributes={"id"=>"new_id1", "style"=>"color: blue"}, @children=[#<HashTML::Text:0x00000000b72528 @text="\n ">, #<HashTML::Node:0x00000000b72348 @name="h1", @attributes={}, @children=[#<HashTML::Text:0x00000000b71268 @text="hello world!">]>, #<HashTML::Text:0x00000000b704a8 @text="\n ">]>
71
+
72
+ hashtml.document.hmtl.body.div.h1.text # => 'hello world!'
73
+ hashtml.document.hmtl.body.div.h1.text = 'such edit! wow'
74
+ hashtml.document.hmtl.body.div.h1.text # => 'such edit! wow'
75
+
76
+ hashtml.to_html # => <document><html><body>
77
+ <div id="new_id1" style="color: blue">
78
+ <h1>such edit! wow</h1>
79
+ </div>
80
+ </body></html></document>
81
+
82
+
83
+ Worried about navigating and having tons of elements with the same tag at the same level?
84
+ That's not a problem! Just identify the node by it's attributes!
85
+
86
+ ### Example:
87
+
88
+ html = <<-HTML
89
+ <html>
90
+ <body>
91
+ <div class="main">
92
+ <span id="s1" style="color: blue">
93
+ <h1>hello world!</h1>
94
+ </span>
95
+ <span id="s2" style="color: green">
96
+ <p>Lorem ipsum dolor sit amet, consectetur adipiscing elit.</p>
97
+ </span>
98
+ </div>
99
+ </body>
100
+ </html>
101
+ HTML
102
+
103
+ hashtml = HashTML.new(html)
104
+ hashtml.document.html.body.div.span({'id' => 's2'}).attributes['id'] = 'new_id2'
105
+ hashtml.document.html.body.div.span({'id' => 's1'}).h1.text = 'such edit! much navigation! wow'
106
+
107
+ hashtml.to_html # => <document><html><body>
108
+ <div class="main">
109
+ <span id="s1" style="color: blue">
110
+ <h1>such edit! much navigation! wow</h1>
111
+ </span>
112
+ <span id="new_id2" style="color: green">
113
+ <p>Lorem ipsum dolor sit amet, consectetur adipiscing elit.</p>
114
+ </span>
115
+ </div>
116
+ </body></html></document>
117
+
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ # encoding: utf-8
2
+ require 'yard'
3
+ require 'rake/testtask'
4
+ task :default => 'hashtml:yard'
5
+
6
+ namespace :hashtml do
7
+ YARD::Rake::YardocTask.new do |t|
8
+ t.files = %w[lib/**/*.rb features/**/*.feature features/**/*.rb - README.md]
9
+ t.options = %w(-M kramdown)
10
+ end
11
+ end
data/hashtml.gemspec CHANGED
@@ -1,17 +1,20 @@
1
- require File.join(File.dirname(__FILE__), 'lib', 'hashtml', 'version')
2
-
3
1
  Gem::Specification.new do |gem|
4
- gem.authors = ["Mauro Rodrigues"]
5
- gem.email = ["maurorodrigues15@gmail.com"]
2
+ gem.authors = ['Mauro Rodrigues']
3
+ gem.email = ['maurorodrigues15@gmail.com']
6
4
  gem.description = %q{HashTML is a gem for parsing HTML documents to Ruby Hash-like objects.}
7
5
  gem.summary = %q{A HTML to Hash to HTML helper.}
8
6
  gem.homepage = 'https://github.com/MRod15/hashtml'
9
7
 
10
8
  gem.files = `git ls-files`.split("\n")
11
- gem.name = "hashtml"
9
+ gem.name = 'hashtml'
12
10
  gem.require_paths = ['lib']
13
- gem.version = HashTML::VERSION
14
- gem.license = "MIT"
11
+ gem.version = '0.0.2'
12
+ gem.license = 'MIT'
13
+
14
+ gem.add_runtime_dependency('nokogiri', '~> 1.5')
15
15
 
16
- gem.add_dependency 'nokogiri', '~> 1.5.5'
17
- end
16
+ gem.add_development_dependency('rake', '~> 10.1')
17
+ gem.add_development_dependency('yard', '~> 0.8')
18
+ gem.add_development_dependency('yard-cucumber', '~> 2.3')
19
+ gem.add_development_dependency('kramdown', '~> 1.3')
20
+ end
data/lib/hashtml.rb CHANGED
@@ -9,29 +9,12 @@ class HashTML
9
9
 
10
10
  attr_reader :root_node
11
11
 
12
- # Returns a Hash corresponding to the data structure of the given HTML,
12
+ # Returns a HashTML object corresponding to the data structure of the given HTML,
13
13
  # which should be a Nokogiri::HTML::Document or anything that responds to to_s
14
14
  # with a string of valid HTML.
15
15
  #@param html [Nokogiri::HTML::Document], or
16
16
  #@param html [String] document to parse
17
17
  #@return [Hash]
18
- #@example
19
- # html = '<span id="row_29" class="step param">true</span>'
20
- # HashTML.new(xml).to_h
21
- # # => {
22
- # "span" => {
23
- # :children => [
24
- # {
25
- # :text => "true"
26
- # }
27
- # ],
28
- # :attributes => {
29
- # "id" => "row_29",
30
- # "class" => "step param"
31
- # }
32
- # }
33
- # }
34
- #
35
18
  def initialize(html)
36
19
  doc = (html.is_a?(Nokogiri::HTML::Document) ? html : Nokogiri::HTML(html.to_s))
37
20
  @root_node = HashTML::Node.new(doc)
@@ -40,16 +23,6 @@ class HashTML
40
23
 
41
24
  # Returns an HTML string corresponding to the data structure of the given Hash.
42
25
  #@return [String]
43
- #@example
44
- # hash = { "span" =>
45
- # {
46
- # :children => [ { :text => "true" } ],
47
- # :attributes => { "id" => "row_29", "class" => "step param" }
48
- # }
49
- # }
50
- # HTMLParser.hash_to_html(hash)
51
- # # => "<span id="row_29" class="step param">true</span>"
52
- #
53
26
  def to_html
54
27
  @root_node.to_html
55
28
  end
@@ -77,8 +50,6 @@ class HashTML
77
50
  end
78
51
 
79
52
  def _get_value(key, attributes={})
80
- #$logger.debug("Looking for '#{key}'")
81
- #$logger.debug('It\'s the root node!')
82
53
  return nil unless @root_node.name == key
83
54
  return @root_node unless attributes
84
55
  return ((@root_node.attributes and @root_node.attributes.include_pairs?(attributes)) ? @root_node : nil)
@@ -90,17 +61,25 @@ class HashTML
90
61
  public
91
62
 
92
63
  class << self
64
+ # Converts a Hash to a HashTML object
65
+ #@param hash [Hash]
66
+ #@return [HashTML]
93
67
  def to_hashtml(hash)
94
68
  convert_to_hashtml(hash)
95
69
  end
96
70
 
97
71
  def to_html(hash)
72
+ # Converts a Hash to HTML
73
+ #@param hash [Hash]
74
+ #@return [String] HTML document
98
75
  convert_to_hashtml(hash).to_html
99
76
  end
100
77
 
101
78
  private
79
+ # Converts a Hash to a HashTML object
80
+ #@param hash [Hash]
81
+ #@return [HashTML]
102
82
  def convert_to_hashtml(hash)
103
- #$logger.warn(hash)
104
83
  hashtml = nil
105
84
  hash.each do |key, value|
106
85
  return HashTML::Text.new(value) if key == :text
@@ -109,7 +88,6 @@ class HashTML
109
88
  hashtml.attributes = (value[:attributes] or {})
110
89
  hashtml.children = value[:children].map { |child| convert_to_hashtml(child) }
111
90
  end
112
- #$logger.debug hashtml
113
91
  hashtml
114
92
  end
115
93
  end
@@ -119,11 +97,9 @@ class HashTML
119
97
 
120
98
  def initialize(node=nil)
121
99
  return unless node
122
- #$logger.warn "Analysing node: #{node.name}\n#{node}"
123
100
  @name = node.name
124
101
  @attributes = node.respond_to?(:attributes) ? get_html_node_attributes(node) : {}
125
102
  @children = get_html_node_children(node)
126
- #$logger.debug(@children, 'Children:')
127
103
  end
128
104
 
129
105
  def to_h
@@ -139,15 +115,13 @@ class HashTML
139
115
 
140
116
  def method_missing(method, *args)
141
117
  method = method.to_s
142
- #$logger.debug(method)
143
- #$logger.debug(args)
144
118
  attributes, new_value, _nil = args
145
119
  attributes ||= {}
146
120
  if method.end_with?("?")
147
121
  key = method[0..-2]
148
122
  _check_for_presence(key, attributes)
149
123
  elsif method.end_with?("=")
150
- key = method[0..-2]
124
+ key = method[0..-2]
151
125
  new_value, attributes = attributes, {} if new_value.nil?
152
126
  _change_value(key, attributes, new_value)
153
127
  else
@@ -162,15 +136,11 @@ class HashTML
162
136
  end
163
137
 
164
138
  def _get_value(key, attributes={})
165
- #$logger.debug("Looking for '#{key}'")
166
- #$logger.debug('It\'s a child node!')
167
139
  if key == 'text'
168
- #$logger.debug('Getting node text...')
169
140
  return @children.map { |child| child.text if child.is_a?(HashTML::Text) }.reject(&:nil?).join
170
141
  else
171
142
  @children.each do |child|
172
143
  next if child.is_a?(HashTML::Text)
173
- #$logger.debug child.attributes
174
144
  return child if (child.name == key and child.attributes.include_pairs?(attributes))
175
145
  end
176
146
  end
@@ -178,16 +148,10 @@ class HashTML
178
148
  end
179
149
 
180
150
  def _change_value(key, attributes, new_value)
181
- #$logger.debug("Looking for '#{key}'")
182
- #$logger.debug('It\'s a child node!')
183
151
  if key == 'text'
184
- #$logger.debug("Changing node text to '#{new_value}'...")
185
- #$logger.warn(@children)
186
152
  new_children = @children.select { |child| !child.is_a?(HashTML::Text) }
187
153
  @children = new_children.empty? ? [HashTML::Text.new(new_value)] : [new_children, HashTML::Text.new(new_value)]
188
- #$logger.warn(@children)
189
154
  else
190
- #$logger.debug('Changing node value...')
191
155
  @children.each_with_index do |child, index|
192
156
  next if child.is_a?(HashTML::Text)
193
157
  if child.name == key and child.attributes.include_pairs?(attributes)
@@ -198,10 +162,7 @@ class HashTML
198
162
  end
199
163
 
200
164
  def get_html_node_children(node)
201
- #$logger.debug "Node children:\n#{node.children}"
202
165
  node.children.map do |child|
203
- #$logger.info("Child:\n#{child}\nChild class: '#{child.class}'")
204
- #$logger.info(child)
205
166
  case child.class.to_s
206
167
  when 'Nokogiri::XML::Text', 'Nokogiri::XML::CDATA'
207
168
  HashTML::Text.new(child.to_s)
@@ -214,7 +175,6 @@ class HashTML
214
175
  end
215
176
 
216
177
  def get_html_node_attributes(node)
217
- #$logger.debug "Node attributes: #{node.attributes}"
218
178
  Hash[node.attributes.map { |name, value| [name, value.value] }]
219
179
  end
220
180
 
@@ -1,3 +1,3 @@
1
1
  module HashTML
2
- VERSION = '0.0.1'
3
- end
2
+ VERSION = '0.0.2'
3
+ end
metadata CHANGED
@@ -1,32 +1,85 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hashtml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
5
- prerelease:
4
+ version: 0.0.2
6
5
  platform: ruby
7
6
  authors:
8
7
  - Mauro Rodrigues
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2014-02-26 00:00:00.000000000 Z
11
+ date: 2015-01-28 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: nokogiri
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
17
  - - ~>
20
18
  - !ruby/object:Gem::Version
21
- version: 1.5.5
19
+ version: '1.5'
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
24
  - - ~>
28
25
  - !ruby/object:Gem::Version
29
- version: 1.5.5
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '10.1'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '10.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: yard
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '0.8'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '0.8'
55
+ - !ruby/object:Gem::Dependency
56
+ name: yard-cucumber
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '2.3'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '2.3'
69
+ - !ruby/object:Gem::Dependency
70
+ name: kramdown
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: '1.3'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: '1.3'
30
83
  description: HashTML is a gem for parsing HTML documents to Ruby Hash-like objects.
31
84
  email:
32
85
  - maurorodrigues15@gmail.com
@@ -35,8 +88,11 @@ extensions: []
35
88
  extra_rdoc_files: []
36
89
  files:
37
90
  - .gitignore
91
+ - .travis.yml
92
+ - Gemfile
38
93
  - LICENSE
39
94
  - README.md
95
+ - Rakefile
40
96
  - hashtml.gemspec
41
97
  - lib/hashtml.rb
42
98
  - lib/hashtml/hash.rb
@@ -44,27 +100,26 @@ files:
44
100
  homepage: https://github.com/MRod15/hashtml
45
101
  licenses:
46
102
  - MIT
103
+ metadata: {}
47
104
  post_install_message:
48
105
  rdoc_options: []
49
106
  require_paths:
50
107
  - lib
51
108
  required_ruby_version: !ruby/object:Gem::Requirement
52
- none: false
53
109
  requirements:
54
110
  - - ! '>='
55
111
  - !ruby/object:Gem::Version
56
112
  version: '0'
57
113
  required_rubygems_version: !ruby/object:Gem::Requirement
58
- none: false
59
114
  requirements:
60
115
  - - ! '>='
61
116
  - !ruby/object:Gem::Version
62
117
  version: '0'
63
118
  requirements: []
64
119
  rubyforge_project:
65
- rubygems_version: 1.8.23
120
+ rubygems_version: 2.4.3
66
121
  signing_key:
67
- specification_version: 3
122
+ specification_version: 4
68
123
  summary: A HTML to Hash to HTML helper.
69
124
  test_files: []
70
125
  has_rdoc: