hashtml 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZTI0MTEyNDE3MGY3NDg1NWFkMjkyNDdkNjU0YTFiM2MxNTRjN2FmNg==
5
+ data.tar.gz: !binary |-
6
+ YTBmYzVjMjVkOGQ1YWVlMGNmNmFkZDE4NTYzYjIwODk4NTFiYWI2Yg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ MWY5NjhjZDE5MTc5MzcyM2E2NzFhM2ZhMGFmNDc0NDgyMjRkZDY3NTAxM2Iy
10
+ NzNkNWRlOTg0Njg0N2YxZmQyY2RmMWE0YmQzYTcyZTVmMzZhNWU5Yjc1YjEw
11
+ MTA3ZTFhODMxZmVmNDE1NDlmM2ZhNzE4ZjAwMzc3NGE1ZTg4YzQ=
12
+ data.tar.gz: !binary |-
13
+ NjI1YTY0ZjNkMDY3YmUzNTNmZWRjYWQ1YTFkNjg4YThjZjA0N2E0MDNhYjUy
14
+ ZGFlZDNjOGEwYTcwMTg2YjE4ZmFkODA5Y2UxYjAyYWI5NjIzOTg2M2VjYjgw
15
+ NjA1NDI2YzRiYTJjNTUxYmY2ZTI1Y2M2MGJiMWVjNmVmZWIzNGU=
data/.travis.yml ADDED
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - 2.0.0
5
+ - 2.1.0
6
+ - 2.2.0
7
+ - jruby-19mode
8
+ - ruby-head
9
+ - jruby-head
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'http://rubygems.org'
2
+ gem 'nokogiri'
3
+ gem 'rake'
4
+ gem 'yard'
5
+ gem 'kramdown'
data/README.md CHANGED
@@ -1,4 +1,117 @@
1
- hashtml
1
+ hashtml [ ![Codeship Status for MRod15/hashtml](https://codeship.io/projects/6dd49080-19a4-0132-515f-0a39251edeca/status)](https://codeship.io/projects/34440) [![Build Status](https://travis-ci.org/MRod15/hashtml.svg?branch=master)](https://travis-ci.org/MRod15/hashtml)
2
2
  =======
3
3
 
4
4
  HashTML is a gem for parsing HTML documents to Ruby Hash-like objects
5
+
6
+ ## Installation
7
+
8
+ HashTML is available as a RubyGem:
9
+
10
+ gem install hashtml
11
+
12
+ ## Usage
13
+
14
+ HashTML parses a Nokogiri::HTML::Document or anything that responds
15
+ to to_s with a string of valid HTML.
16
+ A HashTML object corresponding to the data structure of the given HTML
17
+ is generated.
18
+
19
+ ### Example:
20
+
21
+ html = <<-HTML
22
+ <html>
23
+ <body>
24
+ <div id="d1" style="color: blue">
25
+ <h1>hello world!</h1>
26
+ </div>
27
+ </body>
28
+ </html>
29
+ HTML
30
+ hashtml = HashTML.new(html)
31
+ hashtml.inspect # => #<HashTML:0x00000001328650 @root_node=#<HashTML::Node:0x000000013283f8 @name="document", @attributes={}, @children=[#<HashTML::Node:0x00000001327ef8 @name="html", @attributes={}, @children=[#<HashTML::Node:0x00000001327a20 @name="body", @attributes={}, @children=[#<HashTML::Text:0x00000001326300 @text="\n ">, #<HashTML::Node:0x00000001326288 @name="div", @attributes={"id"=>"d1", "style"=>"color: blue"}, @children=[#<HashTML::Text:0x0000000132c8b8 @text="\n ">, #<HashTML::Node:0x0000000132c728 @name="h1", @attributes={}, @children=[#<HashTML::Text:0x0000000132b4e0 @text="hello world!">]>, #<HashTML::Text:0x0000000132a7c0 @text="\n ">]>, #<HashTML::Text:0x00000001329a50 @text="\n ">, #<HashTML::Node:0x000000013299d8 @name="div", @attributes={"id"=>"d2", "style"=>"color: green"}, @children=[#<HashTML::Text:0x000000013306c0 @text="\n ">, #<HashTML::Node:0x00000001330620 @name="p", @attributes={}, @children=[#<HashTML::Text:0x0000000132ef00 @text="Lorem ipsum dolor sit amet, consectetur adipiscing elit.">]>, #<HashTML::Text:0x0000000132e5c8 @text="\n ">]>, #<HashTML::Text:0x0000000132d6f0 @text="\n ">]>]>]>>
32
+
33
+
34
+ HashTML allows you to convert the object to a Ruby Hash with to_h.
35
+
36
+ ### Example:
37
+
38
+ html = <<-HTML
39
+ <html>
40
+ <body>
41
+ <div id="d1" style="color: blue">
42
+ <h1>hello world!</h1>
43
+ </div>
44
+ </body>
45
+ </html>
46
+ HTML
47
+ hashtml = HashTML.new(html)
48
+ hashtml.to_h # => {"document"=>{:attributes=>{}, :children=>[{"html"=>{:attributes=>{}, :children=>[{"body"=>{:attributes=>{}, :children=>[{:text=>"\n "}, {"div"=>{:attributes=>{"id"=>"d1", "style"=>"color: blue"}, :children=>[{:text=>"\n "}, {"h1"=>{:attributes=>{}, :children=>[{:text=>"hello world!"}]}}, {:text=>"\n "}]}}, {:text=>"\n "}, {"div"=>{:attributes=>{"id"=>"d2", "style"=>"color: green"}, :children=>[{:text=>"\n "}, {"p"=>{:attributes=>{}, :children=>[{:text=>"Lorem ipsum dolor sit amet, consectetur adipiscing elit."}]}}, {:text=>"\n "}]}}, {:text=>"\n "}]}}]}}]}}
49
+
50
+
51
+ You can access elements and change them simply by "navigating" trough them.
52
+ And when you're done, simply regenerate your HTML by doing to_html!
53
+
54
+ ### Example:
55
+
56
+ html = <<-HTML
57
+ <html>
58
+ <body>
59
+ <div id="d1" style="color: blue">
60
+ <h1>hello world!</h1>
61
+ </div>
62
+ </body>
63
+ </html>
64
+ HTML
65
+
66
+ hashtml = HashTML.new(html)
67
+ hashtml.document.hmtl.body.div.inspect # => #<HashTML::Node:0x00000000b6c128 @name="div", @attributes={"id"=>"d1", "style"=>"color: blue"}, @children=[#<HashTML::Text:0x00000000b72528 @text="\n ">, #<HashTML::Node:0x00000000b72348 @name="h1", @attributes={}, @children=[#<HashTML::Text:0x00000000b71268 @text="hello world!">]>, #<HashTML::Text:0x00000000b704a8 @text="\n ">]>
68
+
69
+ hashtml.document.hmtl.body.div.attributes['id'] = 'new_id1'
70
+ hashtml.document.hmtl.body.div.inspect # => #<HashTML::Node:0x00000000b6c128 @name="div", @attributes={"id"=>"new_id1", "style"=>"color: blue"}, @children=[#<HashTML::Text:0x00000000b72528 @text="\n ">, #<HashTML::Node:0x00000000b72348 @name="h1", @attributes={}, @children=[#<HashTML::Text:0x00000000b71268 @text="hello world!">]>, #<HashTML::Text:0x00000000b704a8 @text="\n ">]>
71
+
72
+ hashtml.document.hmtl.body.div.h1.text # => 'hello world!'
73
+ hashtml.document.hmtl.body.div.h1.text = 'such edit! wow'
74
+ hashtml.document.hmtl.body.div.h1.text # => 'such edit! wow'
75
+
76
+ hashtml.to_html # => <document><html><body>
77
+ <div id="new_id1" style="color: blue">
78
+ <h1>such edit! wow</h1>
79
+ </div>
80
+ </body></html></document>
81
+
82
+
83
+ Worried about navigating and having tons of elements with the same tag at the same level?
84
+ That's not a problem! Just identify the node by it's attributes!
85
+
86
+ ### Example:
87
+
88
+ html = <<-HTML
89
+ <html>
90
+ <body>
91
+ <div class="main">
92
+ <span id="s1" style="color: blue">
93
+ <h1>hello world!</h1>
94
+ </span>
95
+ <span id="s2" style="color: green">
96
+ <p>Lorem ipsum dolor sit amet, consectetur adipiscing elit.</p>
97
+ </span>
98
+ </div>
99
+ </body>
100
+ </html>
101
+ HTML
102
+
103
+ hashtml = HashTML.new(html)
104
+ hashtml.document.html.body.div.span({'id' => 's2'}).attributes['id'] = 'new_id2'
105
+ hashtml.document.html.body.div.span({'id' => 's1'}).h1.text = 'such edit! much navigation! wow'
106
+
107
+ hashtml.to_html # => <document><html><body>
108
+ <div class="main">
109
+ <span id="s1" style="color: blue">
110
+ <h1>such edit! much navigation! wow</h1>
111
+ </span>
112
+ <span id="new_id2" style="color: green">
113
+ <p>Lorem ipsum dolor sit amet, consectetur adipiscing elit.</p>
114
+ </span>
115
+ </div>
116
+ </body></html></document>
117
+
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ # encoding: utf-8
2
+ require 'yard'
3
+ require 'rake/testtask'
4
+ task :default => 'hashtml:yard'
5
+
6
+ namespace :hashtml do
7
+ YARD::Rake::YardocTask.new do |t|
8
+ t.files = %w[lib/**/*.rb features/**/*.feature features/**/*.rb - README.md]
9
+ t.options = %w(-M kramdown)
10
+ end
11
+ end
data/hashtml.gemspec CHANGED
@@ -1,17 +1,20 @@
1
- require File.join(File.dirname(__FILE__), 'lib', 'hashtml', 'version')
2
-
3
1
  Gem::Specification.new do |gem|
4
- gem.authors = ["Mauro Rodrigues"]
5
- gem.email = ["maurorodrigues15@gmail.com"]
2
+ gem.authors = ['Mauro Rodrigues']
3
+ gem.email = ['maurorodrigues15@gmail.com']
6
4
  gem.description = %q{HashTML is a gem for parsing HTML documents to Ruby Hash-like objects.}
7
5
  gem.summary = %q{A HTML to Hash to HTML helper.}
8
6
  gem.homepage = 'https://github.com/MRod15/hashtml'
9
7
 
10
8
  gem.files = `git ls-files`.split("\n")
11
- gem.name = "hashtml"
9
+ gem.name = 'hashtml'
12
10
  gem.require_paths = ['lib']
13
- gem.version = HashTML::VERSION
14
- gem.license = "MIT"
11
+ gem.version = '0.0.2'
12
+ gem.license = 'MIT'
13
+
14
+ gem.add_runtime_dependency('nokogiri', '~> 1.5')
15
15
 
16
- gem.add_dependency 'nokogiri', '~> 1.5.5'
17
- end
16
+ gem.add_development_dependency('rake', '~> 10.1')
17
+ gem.add_development_dependency('yard', '~> 0.8')
18
+ gem.add_development_dependency('yard-cucumber', '~> 2.3')
19
+ gem.add_development_dependency('kramdown', '~> 1.3')
20
+ end
data/lib/hashtml.rb CHANGED
@@ -9,29 +9,12 @@ class HashTML
9
9
 
10
10
  attr_reader :root_node
11
11
 
12
- # Returns a Hash corresponding to the data structure of the given HTML,
12
+ # Returns a HashTML object corresponding to the data structure of the given HTML,
13
13
  # which should be a Nokogiri::HTML::Document or anything that responds to to_s
14
14
  # with a string of valid HTML.
15
15
  #@param html [Nokogiri::HTML::Document], or
16
16
  #@param html [String] document to parse
17
17
  #@return [Hash]
18
- #@example
19
- # html = '<span id="row_29" class="step param">true</span>'
20
- # HashTML.new(xml).to_h
21
- # # => {
22
- # "span" => {
23
- # :children => [
24
- # {
25
- # :text => "true"
26
- # }
27
- # ],
28
- # :attributes => {
29
- # "id" => "row_29",
30
- # "class" => "step param"
31
- # }
32
- # }
33
- # }
34
- #
35
18
  def initialize(html)
36
19
  doc = (html.is_a?(Nokogiri::HTML::Document) ? html : Nokogiri::HTML(html.to_s))
37
20
  @root_node = HashTML::Node.new(doc)
@@ -40,16 +23,6 @@ class HashTML
40
23
 
41
24
  # Returns an HTML string corresponding to the data structure of the given Hash.
42
25
  #@return [String]
43
- #@example
44
- # hash = { "span" =>
45
- # {
46
- # :children => [ { :text => "true" } ],
47
- # :attributes => { "id" => "row_29", "class" => "step param" }
48
- # }
49
- # }
50
- # HTMLParser.hash_to_html(hash)
51
- # # => "<span id="row_29" class="step param">true</span>"
52
- #
53
26
  def to_html
54
27
  @root_node.to_html
55
28
  end
@@ -77,8 +50,6 @@ class HashTML
77
50
  end
78
51
 
79
52
  def _get_value(key, attributes={})
80
- #$logger.debug("Looking for '#{key}'")
81
- #$logger.debug('It\'s the root node!')
82
53
  return nil unless @root_node.name == key
83
54
  return @root_node unless attributes
84
55
  return ((@root_node.attributes and @root_node.attributes.include_pairs?(attributes)) ? @root_node : nil)
@@ -90,17 +61,25 @@ class HashTML
90
61
  public
91
62
 
92
63
  class << self
64
+ # Converts a Hash to a HashTML object
65
+ #@param hash [Hash]
66
+ #@return [HashTML]
93
67
  def to_hashtml(hash)
94
68
  convert_to_hashtml(hash)
95
69
  end
96
70
 
97
71
  def to_html(hash)
72
+ # Converts a Hash to HTML
73
+ #@param hash [Hash]
74
+ #@return [String] HTML document
98
75
  convert_to_hashtml(hash).to_html
99
76
  end
100
77
 
101
78
  private
79
+ # Converts a Hash to a HashTML object
80
+ #@param hash [Hash]
81
+ #@return [HashTML]
102
82
  def convert_to_hashtml(hash)
103
- #$logger.warn(hash)
104
83
  hashtml = nil
105
84
  hash.each do |key, value|
106
85
  return HashTML::Text.new(value) if key == :text
@@ -109,7 +88,6 @@ class HashTML
109
88
  hashtml.attributes = (value[:attributes] or {})
110
89
  hashtml.children = value[:children].map { |child| convert_to_hashtml(child) }
111
90
  end
112
- #$logger.debug hashtml
113
91
  hashtml
114
92
  end
115
93
  end
@@ -119,11 +97,9 @@ class HashTML
119
97
 
120
98
  def initialize(node=nil)
121
99
  return unless node
122
- #$logger.warn "Analysing node: #{node.name}\n#{node}"
123
100
  @name = node.name
124
101
  @attributes = node.respond_to?(:attributes) ? get_html_node_attributes(node) : {}
125
102
  @children = get_html_node_children(node)
126
- #$logger.debug(@children, 'Children:')
127
103
  end
128
104
 
129
105
  def to_h
@@ -139,15 +115,13 @@ class HashTML
139
115
 
140
116
  def method_missing(method, *args)
141
117
  method = method.to_s
142
- #$logger.debug(method)
143
- #$logger.debug(args)
144
118
  attributes, new_value, _nil = args
145
119
  attributes ||= {}
146
120
  if method.end_with?("?")
147
121
  key = method[0..-2]
148
122
  _check_for_presence(key, attributes)
149
123
  elsif method.end_with?("=")
150
- key = method[0..-2]
124
+ key = method[0..-2]
151
125
  new_value, attributes = attributes, {} if new_value.nil?
152
126
  _change_value(key, attributes, new_value)
153
127
  else
@@ -162,15 +136,11 @@ class HashTML
162
136
  end
163
137
 
164
138
  def _get_value(key, attributes={})
165
- #$logger.debug("Looking for '#{key}'")
166
- #$logger.debug('It\'s a child node!')
167
139
  if key == 'text'
168
- #$logger.debug('Getting node text...')
169
140
  return @children.map { |child| child.text if child.is_a?(HashTML::Text) }.reject(&:nil?).join
170
141
  else
171
142
  @children.each do |child|
172
143
  next if child.is_a?(HashTML::Text)
173
- #$logger.debug child.attributes
174
144
  return child if (child.name == key and child.attributes.include_pairs?(attributes))
175
145
  end
176
146
  end
@@ -178,16 +148,10 @@ class HashTML
178
148
  end
179
149
 
180
150
  def _change_value(key, attributes, new_value)
181
- #$logger.debug("Looking for '#{key}'")
182
- #$logger.debug('It\'s a child node!')
183
151
  if key == 'text'
184
- #$logger.debug("Changing node text to '#{new_value}'...")
185
- #$logger.warn(@children)
186
152
  new_children = @children.select { |child| !child.is_a?(HashTML::Text) }
187
153
  @children = new_children.empty? ? [HashTML::Text.new(new_value)] : [new_children, HashTML::Text.new(new_value)]
188
- #$logger.warn(@children)
189
154
  else
190
- #$logger.debug('Changing node value...')
191
155
  @children.each_with_index do |child, index|
192
156
  next if child.is_a?(HashTML::Text)
193
157
  if child.name == key and child.attributes.include_pairs?(attributes)
@@ -198,10 +162,7 @@ class HashTML
198
162
  end
199
163
 
200
164
  def get_html_node_children(node)
201
- #$logger.debug "Node children:\n#{node.children}"
202
165
  node.children.map do |child|
203
- #$logger.info("Child:\n#{child}\nChild class: '#{child.class}'")
204
- #$logger.info(child)
205
166
  case child.class.to_s
206
167
  when 'Nokogiri::XML::Text', 'Nokogiri::XML::CDATA'
207
168
  HashTML::Text.new(child.to_s)
@@ -214,7 +175,6 @@ class HashTML
214
175
  end
215
176
 
216
177
  def get_html_node_attributes(node)
217
- #$logger.debug "Node attributes: #{node.attributes}"
218
178
  Hash[node.attributes.map { |name, value| [name, value.value] }]
219
179
  end
220
180
 
@@ -1,3 +1,3 @@
1
1
  module HashTML
2
- VERSION = '0.0.1'
3
- end
2
+ VERSION = '0.0.2'
3
+ end
metadata CHANGED
@@ -1,32 +1,85 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hashtml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
5
- prerelease:
4
+ version: 0.0.2
6
5
  platform: ruby
7
6
  authors:
8
7
  - Mauro Rodrigues
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2014-02-26 00:00:00.000000000 Z
11
+ date: 2015-01-28 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: nokogiri
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
17
  - - ~>
20
18
  - !ruby/object:Gem::Version
21
- version: 1.5.5
19
+ version: '1.5'
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
24
  - - ~>
28
25
  - !ruby/object:Gem::Version
29
- version: 1.5.5
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '10.1'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '10.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: yard
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '0.8'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '0.8'
55
+ - !ruby/object:Gem::Dependency
56
+ name: yard-cucumber
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '2.3'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '2.3'
69
+ - !ruby/object:Gem::Dependency
70
+ name: kramdown
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: '1.3'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: '1.3'
30
83
  description: HashTML is a gem for parsing HTML documents to Ruby Hash-like objects.
31
84
  email:
32
85
  - maurorodrigues15@gmail.com
@@ -35,8 +88,11 @@ extensions: []
35
88
  extra_rdoc_files: []
36
89
  files:
37
90
  - .gitignore
91
+ - .travis.yml
92
+ - Gemfile
38
93
  - LICENSE
39
94
  - README.md
95
+ - Rakefile
40
96
  - hashtml.gemspec
41
97
  - lib/hashtml.rb
42
98
  - lib/hashtml/hash.rb
@@ -44,27 +100,26 @@ files:
44
100
  homepage: https://github.com/MRod15/hashtml
45
101
  licenses:
46
102
  - MIT
103
+ metadata: {}
47
104
  post_install_message:
48
105
  rdoc_options: []
49
106
  require_paths:
50
107
  - lib
51
108
  required_ruby_version: !ruby/object:Gem::Requirement
52
- none: false
53
109
  requirements:
54
110
  - - ! '>='
55
111
  - !ruby/object:Gem::Version
56
112
  version: '0'
57
113
  required_rubygems_version: !ruby/object:Gem::Requirement
58
- none: false
59
114
  requirements:
60
115
  - - ! '>='
61
116
  - !ruby/object:Gem::Version
62
117
  version: '0'
63
118
  requirements: []
64
119
  rubyforge_project:
65
- rubygems_version: 1.8.23
120
+ rubygems_version: 2.4.3
66
121
  signing_key:
67
- specification_version: 3
122
+ specification_version: 4
68
123
  summary: A HTML to Hash to HTML helper.
69
124
  test_files: []
70
125
  has_rdoc: