domparser 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +3 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +8 -0
- data/Guardfile +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +88 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/domparser.gemspec +33 -0
- data/img/Screenshot from 2016-08-07 14-49-36.png +0 -0
- data/img/Screenshot from 2016-08-07 14-58-06.png +0 -0
- data/lib/domparser.rb +25 -0
- data/lib/domparser/node_renderer.rb +77 -0
- data/lib/domparser/parser_script.rb +217 -0
- data/lib/domparser/test.html +44 -0
- data/lib/domparser/test2.html +415 -0
- data/lib/domparser/test3.html +10 -0
- data/lib/domparser/tree_searcher.rb +58 -0
- data/lib/domparser/version.rb +3 -0
- metadata +110 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c301c3ce2735f27e8fb1a3da33dd9572bcd3ed2b
|
4
|
+
data.tar.gz: c3b84e0333afac72879262a7ec99351c2f473f52
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 422843131bbe555802c12d6817c9ee20fa27abc3df13126c6c1e645747533a005b23c4896152f21c116484052dbd25e94575d79ac97e0246f559c0f5a53f8e64
|
7
|
+
data.tar.gz: a8f563f1fb79e3bc4b3eb08c09d6f4ffbd778c9436890d697c5459042d13cbb63019e53768ffa9689e702c01429d7be0e681b865611033bec9fb3c84c570e606
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# Contributor Code of Conduct
|
2
|
+
|
3
|
+
As contributors and maintainers of this project, and in the interest of
|
4
|
+
fostering an open and welcoming community, we pledge to respect all people who
|
5
|
+
contribute through reporting issues, posting feature requests, updating
|
6
|
+
documentation, submitting pull requests or patches, and other activities.
|
7
|
+
|
8
|
+
We are committed to making participation in this project a harassment-free
|
9
|
+
experience for everyone, regardless of level of experience, gender, gender
|
10
|
+
identity and expression, sexual orientation, disability, personal appearance,
|
11
|
+
body size, race, ethnicity, age, religion, or nationality.
|
12
|
+
|
13
|
+
Examples of unacceptable behavior by participants include:
|
14
|
+
|
15
|
+
* The use of sexualized language or imagery
|
16
|
+
* Personal attacks
|
17
|
+
* Trolling or insulting/derogatory comments
|
18
|
+
* Public or private harassment
|
19
|
+
* Publishing other's private information, such as physical or electronic
|
20
|
+
addresses, without explicit permission
|
21
|
+
* Other unethical or unprofessional conduct
|
22
|
+
|
23
|
+
Project maintainers have the right and responsibility to remove, edit, or
|
24
|
+
reject comments, commits, code, wiki edits, issues, and other contributions
|
25
|
+
that are not aligned to this Code of Conduct, or to ban temporarily or
|
26
|
+
permanently any contributor for other behaviors that they deem inappropriate,
|
27
|
+
threatening, offensive, or harmful.
|
28
|
+
|
29
|
+
By adopting this Code of Conduct, project maintainers commit themselves to
|
30
|
+
fairly and consistently applying these principles to every aspect of managing
|
31
|
+
this project. Project maintainers who do not follow or enforce the Code of
|
32
|
+
Conduct may be permanently removed from the project team.
|
33
|
+
|
34
|
+
This code of conduct applies both within project spaces and in public spaces
|
35
|
+
when an individual is representing the project or its community.
|
36
|
+
|
37
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
38
|
+
reported by contacting a project maintainer at lby89757@hotmail.com. All
|
39
|
+
complaints will be reviewed and investigated and will result in a response that
|
40
|
+
is deemed necessary and appropriate to the circumstances. Maintainers are
|
41
|
+
obligated to maintain confidentiality with regard to the reporter of an
|
42
|
+
incident.
|
43
|
+
|
44
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
45
|
+
version 1.3.0, available at
|
46
|
+
[http://contributor-covenant.org/version/1/3/0/][version]
|
47
|
+
|
48
|
+
[homepage]: http://contributor-covenant.org
|
49
|
+
[version]: http://contributor-covenant.org/version/1/3/0/
|
data/Gemfile
ADDED
data/Guardfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 BranLiang
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
# Domparser
|
2
|
+
|
3
|
+
##Beta version
|
4
|
+
|
5
|
+
A simple dom parser, which can take the raw html file as input. All the information there in the html file will be translated automatically and all data will be stored in a tree structure. It can transform the html, it is also capable of simple search according to the attributes. The data structure is also reversible using the built-in rebuild function.
|
6
|
+
|
7
|
+
For example, for the very simple html code below
|
8
|
+
|
9
|
+
```html
|
10
|
+
<div>
|
11
|
+
div text before
|
12
|
+
<p>
|
13
|
+
p text
|
14
|
+
</p>
|
15
|
+
<div>
|
16
|
+
more div text
|
17
|
+
</div>
|
18
|
+
div text after
|
19
|
+
</div>
|
20
|
+
```
|
21
|
+
|
22
|
+
After using the gem, you will get a new data strcuture similar to the below.
|
23
|
+
|
24
|
+
```ruby
|
25
|
+
<struct Node tag="DOCUMENT", offset=nil, type="general", depth=0, attributes={},
|
26
|
+
children=[<struct Node tag="<div>", offset=0, type=:div, depth=2, attributes={},
|
27
|
+
children=[<struct Node tag="div text before", offset=nil ......
|
28
|
+
....
|
29
|
+
....
|
30
|
+
```
|
31
|
+
|
32
|
+
## Installation
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
gem 'domparser'
|
36
|
+
```
|
37
|
+
|
38
|
+
And then execute:
|
39
|
+
|
40
|
+
$ bundle
|
41
|
+
|
42
|
+
Or install it yourself as:
|
43
|
+
|
44
|
+
$ gem install domparser
|
45
|
+
|
46
|
+
## Usage
|
47
|
+
After installation.
|
48
|
+
|
49
|
+
In the command shell.
|
50
|
+
|
51
|
+
```
|
52
|
+
require "domparser"
|
53
|
+
```
|
54
|
+
|
55
|
+
then locate your html file path, type the following command
|
56
|
+
|
57
|
+
```
|
58
|
+
Domparser.parser "desktop/test/index.html"
|
59
|
+
```
|
60
|
+
|
61
|
+
You will get a return from the previews command which is the new data structure. which looks like the following.
|
62
|
+
![data_structure](https://github.com/BranLiang/domparser/blob/master/img/Screenshot%20from%202016-08-07%2014-49-36.png)
|
63
|
+
|
64
|
+
You can also search a particular attribute, here the data is the data tree you generated. A example is as follows.
|
65
|
+
|
66
|
+
```
|
67
|
+
Domparser.search data, :class, 'container'
|
68
|
+
```
|
69
|
+
|
70
|
+
If you want to rebuild the html file. Just use the rebuild function as follows.
|
71
|
+
|
72
|
+
```
|
73
|
+
Domparser.rebuild data
|
74
|
+
```
|
75
|
+
|
76
|
+
After that, you will a html like structure as follows.
|
77
|
+
![rebuild](https://github.com/BranLiang/domparser/blob/master/img/Screenshot%20from%202016-08-07%2014-58-06.png)
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
## Contributing
|
82
|
+
|
83
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/lby89757/domparser. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
84
|
+
|
85
|
+
|
86
|
+
## License
|
87
|
+
|
88
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "domparser"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/domparser.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'domparser/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "domparser"
|
8
|
+
spec.version = Domparser::VERSION
|
9
|
+
spec.authors = ["BranLiang"]
|
10
|
+
spec.email = ["lby89757@hotmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Simple dom parser whihc translate html into tree data structure.}
|
13
|
+
spec.description = %q{There are three main function of this gem read html, search data, rebuild html.}
|
14
|
+
spec.homepage = "http://liangboyuan.pub"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
|
+
if spec.respond_to?(:metadata)
|
20
|
+
spec.metadata['allowed_push_host'] = 'https://rubygems.org'
|
21
|
+
else
|
22
|
+
raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
|
23
|
+
end
|
24
|
+
|
25
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
26
|
+
spec.bindir = "exe"
|
27
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
|
+
spec.require_paths = ["lib"]
|
29
|
+
|
30
|
+
spec.add_development_dependency "bundler", "~> 1.12"
|
31
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
32
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
33
|
+
end
|
Binary file
|
Binary file
|
data/lib/domparser.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require "domparser/version"
|
2
|
+
require "domparser/node_renderer"
|
3
|
+
require "domparser/parser_script"
|
4
|
+
require "domparser/tree_searcher"
|
5
|
+
|
6
|
+
module Domparser
|
7
|
+
def self.parser file_path
|
8
|
+
new_dom = DOMReader.new
|
9
|
+
tree = new_dom.parser_script file_path
|
10
|
+
new_render = NodeRenderer.new(tree)
|
11
|
+
new_render.render
|
12
|
+
return tree
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.rebuild data
|
16
|
+
new_dom = DOMReader.new
|
17
|
+
new_dom.simple_print_parser data
|
18
|
+
end
|
19
|
+
#
|
20
|
+
def self.search data, attr_name, attr_value
|
21
|
+
new_search = TreeSearcher.new(data)
|
22
|
+
new_search.search_descendents data, attr_name, attr_value
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
class NodeRenderer
|
2
|
+
def initialize tree
|
3
|
+
@tree = tree
|
4
|
+
@type_hash = nil
|
5
|
+
@general_hash = nil
|
6
|
+
end
|
7
|
+
|
8
|
+
def render node = nil
|
9
|
+
if node.nil?
|
10
|
+
print_all
|
11
|
+
else
|
12
|
+
print_node node
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def print_node node
|
17
|
+
@general_hash = Hash.new(0)
|
18
|
+
@type_hash = Hash.new(0)
|
19
|
+
detail_print_all node
|
20
|
+
general_print_all node
|
21
|
+
print_general_hash @general_hash
|
22
|
+
detail_print_hash @type_hash
|
23
|
+
end
|
24
|
+
|
25
|
+
def print_all
|
26
|
+
@general_hash = Hash.new(0)
|
27
|
+
@type_hash = Hash.new(0)
|
28
|
+
detail_print_all @tree
|
29
|
+
general_print_all @tree
|
30
|
+
print_general_hash @general_hash
|
31
|
+
detail_print_hash @type_hash
|
32
|
+
end
|
33
|
+
|
34
|
+
def general_print_all data
|
35
|
+
@general_hash[data.type] += 1 unless data.type.nil?
|
36
|
+
return if data.children.empty?
|
37
|
+
data.children.each do |child|
|
38
|
+
general_print_all child
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def print_general_hash hash
|
43
|
+
hash.each do |type, value|
|
44
|
+
puts "|#{type.to_s.center(8)} : #{value.to_s.center(4)}|"
|
45
|
+
end
|
46
|
+
puts "*" * 100
|
47
|
+
end
|
48
|
+
|
49
|
+
def detail_print_hash hash
|
50
|
+
hash.each do |key, type_count|
|
51
|
+
key.each do |type, attributes|
|
52
|
+
if attributes.empty?
|
53
|
+
puts "#{type_count} #{type}"
|
54
|
+
puts "-" * 100
|
55
|
+
else
|
56
|
+
puts "#{type_count} #{type} with following attributes:"
|
57
|
+
attributes.each do |attr_name, attr_value|
|
58
|
+
puts " #{attr_name} : #{attr_value}"
|
59
|
+
end
|
60
|
+
puts "-" * 100
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def detail_print_all data
|
67
|
+
unless data.type.nil?
|
68
|
+
type_hash = {}
|
69
|
+
type_hash[data.type] = data.attributes
|
70
|
+
@type_hash[type_hash] += 1
|
71
|
+
end
|
72
|
+
return if data.children.empty?
|
73
|
+
data.children.each do |child|
|
74
|
+
detail_print_all child
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,217 @@
|
|
1
|
+
Node = Struct.new :tag, :offset,:type, :depth, :attributes, :children, :parent do
|
2
|
+
def initialize(*)
|
3
|
+
super
|
4
|
+
self.attributes = {}
|
5
|
+
self.children = []
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class DOMReader
|
10
|
+
attr_accessor :root, :tag_count
|
11
|
+
|
12
|
+
TAG_COMMON = /<[\/]*(\w+)[^>]*>/ # Include both open and close tag conditions, <!doctype> is exclude here.
|
13
|
+
TAG_OPEN = /<(\w+)[^>]*>/
|
14
|
+
TAG_CLOSE = /<\/(\w+)[^>]*>/
|
15
|
+
TAG_ATTR = /(\S+)=["']?((?:.(?!["']?\s+(?:\S+)=|[>"']))+.)["']?/
|
16
|
+
TAG_TEXT = />([^<>]*[\w\d]+[^<>]*)</
|
17
|
+
TAG_SPECIAL = /(<img[^>]*>|<hr[^>]*>|<area[^>]*>|<base[^>]*>|<br[^>]*>|<col[^>]*>|<embed[^>]*>|<input[^>]*>|<link[^>]*>|<meta[^>]*>|<source[^>]*>|<param[^>]*>|<command[^>]*>|<track[^>]*>|<keygen[^>]*>|<wbr[^>]*>)/
|
18
|
+
|
19
|
+
# @Stack is only used to store open tag. Because only opentags can have children
|
20
|
+
# @Stack will be initialized with the DOCUMENT node
|
21
|
+
# @html is used to store html
|
22
|
+
# @index is used to store tag offset
|
23
|
+
# @root is the tag tree
|
24
|
+
# @tag_count counts the opentag + specialtag + texttag
|
25
|
+
def initialize
|
26
|
+
@stack = []
|
27
|
+
@html = nil
|
28
|
+
@index = 0
|
29
|
+
@root = Node.new('DOCUMENT', nil, 'general', 0)
|
30
|
+
@tag_count = 0
|
31
|
+
end
|
32
|
+
|
33
|
+
# Get new node, do proper process depends on its type, weather its opentag,
|
34
|
+
# close tag or special tag.
|
35
|
+
# Break the loop if the stack has one element left
|
36
|
+
def parser_script file_path
|
37
|
+
read_file file_path
|
38
|
+
@stack << @root # initialize the @stack
|
39
|
+
loop do
|
40
|
+
cur_node = get_new_tag @html, @index
|
41
|
+
processing cur_node
|
42
|
+
break if @stack.length == 1
|
43
|
+
end
|
44
|
+
# puts @root
|
45
|
+
@root
|
46
|
+
end
|
47
|
+
|
48
|
+
# Recursively print all the tags in the data structure.
|
49
|
+
# Simple cheat print, only use the tag in the data structure.
|
50
|
+
# def simple_print_parser data
|
51
|
+
# puts data.tag
|
52
|
+
# return if data.children.empty?
|
53
|
+
# data.children.each do |child|
|
54
|
+
# print " " * child.depth
|
55
|
+
# simple_print_parser child
|
56
|
+
# end
|
57
|
+
# end
|
58
|
+
|
59
|
+
def simple_print_parser data
|
60
|
+
if data.type.nil?
|
61
|
+
puts data.tag
|
62
|
+
elsif data.attributes.empty?
|
63
|
+
puts "<#{data.type}>"
|
64
|
+
else
|
65
|
+
string = ""
|
66
|
+
data.attributes.each do |key, value|
|
67
|
+
if key == :class
|
68
|
+
string << key.to_s << "='"
|
69
|
+
value.each do |class_value|
|
70
|
+
string << class_value << " "
|
71
|
+
end
|
72
|
+
string << "' "
|
73
|
+
else
|
74
|
+
string << key.to_s << "=" << "'" << value << "'" << " "
|
75
|
+
end
|
76
|
+
end
|
77
|
+
puts "<#{data.type} #{string.strip}>"
|
78
|
+
end
|
79
|
+
return if data.children.empty?
|
80
|
+
data.children.each do |child|
|
81
|
+
print " " * child.depth
|
82
|
+
simple_print_parser child
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
# Function: Get the next tag, index property will change for each run.
|
91
|
+
# Get the <Matchdata: ...>
|
92
|
+
# If find the match, get the string form from the original Matchdata
|
93
|
+
# Get the offset of the position of the tag(beginning)
|
94
|
+
# Create the new node of the tag
|
95
|
+
def get_new_tag html_string, index
|
96
|
+
new_tag = html_string[index..-1].match(TAG_COMMON)
|
97
|
+
new_tag = new_tag[0] unless new_tag.nil?
|
98
|
+
tag_offset = html_string[index..-1] =~ TAG_COMMON
|
99
|
+
new_node = Node.new(new_tag, tag_offset)
|
100
|
+
end
|
101
|
+
|
102
|
+
# Just read the file and strip off all the annoying \n
|
103
|
+
def read_file file_path
|
104
|
+
@html = File.read(file_path).gsub("\n", "")
|
105
|
+
end
|
106
|
+
|
107
|
+
# Seperate process the open_tag, close_tag and special tag
|
108
|
+
def processing node
|
109
|
+
if special? node
|
110
|
+
process_special node
|
111
|
+
elsif opentag? node
|
112
|
+
process_opentag node
|
113
|
+
else
|
114
|
+
process_closetag node
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# For special tag, add its previews text and setup relationship, add its type
|
119
|
+
# and attributes
|
120
|
+
|
121
|
+
|
122
|
+
def process_special node
|
123
|
+
add_text node
|
124
|
+
setup_relation node
|
125
|
+
add_tag_type node
|
126
|
+
add_attributes node
|
127
|
+
increment_index node
|
128
|
+
@tag_count += 1
|
129
|
+
increment_depth node
|
130
|
+
end
|
131
|
+
|
132
|
+
|
133
|
+
# For the open tag
|
134
|
+
# Setup the parent-child connection with last element in stack
|
135
|
+
def process_opentag node
|
136
|
+
process_special node
|
137
|
+
add_to_stack node
|
138
|
+
end
|
139
|
+
|
140
|
+
# If find a close tag, the last element in the stack must be a match to it.
|
141
|
+
# So we pop the last element in the stack.
|
142
|
+
# Then setup the relationship with the new last element in the stack.
|
143
|
+
# The add text step must be done before the @stack.pop so the text is connected to the previews open tag
|
144
|
+
def process_closetag node
|
145
|
+
add_text node
|
146
|
+
@stack.pop
|
147
|
+
setup_relation node
|
148
|
+
increment_index node
|
149
|
+
increment_depth node
|
150
|
+
end
|
151
|
+
|
152
|
+
def add_text node
|
153
|
+
text_match = @html[(@index - 1)..(@index + node.offset + 1)].match(TAG_TEXT)
|
154
|
+
unless text_match.nil?
|
155
|
+
text = text_match[1].strip
|
156
|
+
t_node = Node.new(text, nil, 'text')
|
157
|
+
setup_relation t_node
|
158
|
+
increment_depth t_node
|
159
|
+
@tag_count += 1
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def add_attributes node
|
164
|
+
attributes = node.tag.scan(TAG_ATTR) # Here I use the scan instead of match to get all attributes
|
165
|
+
unless attributes.nil?
|
166
|
+
attributes.each do |attribute|
|
167
|
+
attribute[0] == "class" ? set_class_attr(attribute, node) : set_normal_attr(attribute, node)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def set_class_attr attribute, node
|
173
|
+
node.attributes[:class] = []
|
174
|
+
classes = attribute[1]
|
175
|
+
classes.split(" ").each do |one_class|
|
176
|
+
node.attributes[:class] << one_class.strip
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def set_normal_attr attribute, node
|
181
|
+
name = attribute[0].to_sym # transform it to symbol
|
182
|
+
value = attribute[1]
|
183
|
+
node.attributes[name] = value
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
# Small helper methods
|
188
|
+
def increment_depth node
|
189
|
+
node.depth = node.parent.depth + 2
|
190
|
+
end
|
191
|
+
|
192
|
+
def setup_relation node
|
193
|
+
@stack.last.children << node
|
194
|
+
node.parent = @stack.last
|
195
|
+
end
|
196
|
+
|
197
|
+
def add_tag_type node
|
198
|
+
node.type = node.tag.match(TAG_OPEN)[1].to_sym
|
199
|
+
end
|
200
|
+
|
201
|
+
def add_to_stack node
|
202
|
+
@stack << node
|
203
|
+
end
|
204
|
+
|
205
|
+
def increment_index node
|
206
|
+
@index += node.tag.length + node.offset
|
207
|
+
end
|
208
|
+
|
209
|
+
def opentag? node
|
210
|
+
!!node.tag.match(TAG_OPEN)
|
211
|
+
end
|
212
|
+
|
213
|
+
def special? node
|
214
|
+
!!node.tag.match(TAG_SPECIAL)
|
215
|
+
end
|
216
|
+
|
217
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
<!doctype html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<title>
|
5
|
+
This is a test page
|
6
|
+
</title>
|
7
|
+
</head>
|
8
|
+
<body>
|
9
|
+
<div class="top-div test test2">
|
10
|
+
I'm an outer div!!!
|
11
|
+
<div class="inner-div">
|
12
|
+
I'm an inner div!!! I might just <em>emphasize</em> some text.
|
13
|
+
</div>
|
14
|
+
I am EVEN MORE TEXT for the SAME div!!!
|
15
|
+
</div>
|
16
|
+
<main id="main-area">
|
17
|
+
<header class="super-header">
|
18
|
+
<h1 class="emphasized">
|
19
|
+
Welcome to the test doc!
|
20
|
+
</h1>
|
21
|
+
<h2>
|
22
|
+
This document contains data
|
23
|
+
</h2>
|
24
|
+
</header>
|
25
|
+
<ul id="main-area" class="test">
|
26
|
+
Here is the data:
|
27
|
+
<img src="www.hello.com">
|
28
|
+
<li>Four list items</li>
|
29
|
+
<li class="bold funky important">One unordered list</li>
|
30
|
+
<li>One h1</li>
|
31
|
+
<li>One h2</li>
|
32
|
+
<li>One header</li>
|
33
|
+
<li>One main</li>
|
34
|
+
<li>One body</li>
|
35
|
+
<li>One html</li>
|
36
|
+
<li>One title</li>
|
37
|
+
<li>One head</li>
|
38
|
+
<li>One doctype</li>
|
39
|
+
<li>Two divs</li>
|
40
|
+
<li>And infinite fun!</li>
|
41
|
+
</ul>
|
42
|
+
</main>
|
43
|
+
</body>
|
44
|
+
</html>
|
@@ -0,0 +1,415 @@
|
|
1
|
+
<!doctype html>
|
2
|
+
<html lang="en">
|
3
|
+
<head>
|
4
|
+
<meta charset="utf-8">
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6
|
+
<title>HTML5 Test Page</title>
|
7
|
+
</head>
|
8
|
+
<body>
|
9
|
+
<div id="top" class="page" role="document">
|
10
|
+
<header role="banner">
|
11
|
+
<h1>HTML5 Test Page</h1>
|
12
|
+
<p>This is a test page filled with common HTML elements to be used to provide visual feedback whilst building CSS systems and frameworks.</p>
|
13
|
+
</header>
|
14
|
+
<nav role="navigation">
|
15
|
+
<ul>
|
16
|
+
<li>
|
17
|
+
<a href="#text">Text</a>
|
18
|
+
<ul>
|
19
|
+
<li><a href="#text__headings">Headings</a></li>
|
20
|
+
<li><a href="#text__paragraphs">Paragraphs</a></li>
|
21
|
+
<li><a href="#text__blockquotes">Blockquotes</a></li>
|
22
|
+
<li><a href="#text__lists">Lists</a></li>
|
23
|
+
<li><a href="#text__hr">Horizontal rules</a></li>
|
24
|
+
<li><a href="#text__tables">Tabular data</a></li>
|
25
|
+
<li><a href="#text__code">Code</a></li>
|
26
|
+
<li><a href="#text__inline">Inline elements</a></li>
|
27
|
+
</ul>
|
28
|
+
</li>
|
29
|
+
<li>
|
30
|
+
<a href="#embedded">Embedded content</a>
|
31
|
+
<ul>
|
32
|
+
<li><a href="#embedded__images">Images</a></li>
|
33
|
+
<li><a href="#embedded__audio">Audio</a></li>
|
34
|
+
<li><a href="#embedded__video">Video</a></li>
|
35
|
+
<li><a href="#embedded__canvas">Canvas</a></li>
|
36
|
+
<li><a href="#embedded__meter">Meter</a></li>
|
37
|
+
<li><a href="#embedded__progress">Progress</a></li>
|
38
|
+
<li><a href="#embedded__svg">Inline SVG</a></li>
|
39
|
+
<li><a href="#embedded__iframe">IFrames</a></li>
|
40
|
+
</ul>
|
41
|
+
</li>
|
42
|
+
<li>
|
43
|
+
<a href="#forms">Form elements</a>
|
44
|
+
<ul>
|
45
|
+
<li><a href="#forms__input">Input fields</a></li>
|
46
|
+
<li><a href="#forms__select">Select menus</a></li>
|
47
|
+
<li><a href="#forms__checkbox">Checkboxes</a></li>
|
48
|
+
<li><a href="#forms__radio">Radio buttons</a></li>
|
49
|
+
<li><a href="#forms__textareas">Textareas</a></li>
|
50
|
+
<li><a href="#forms__html5">HTML5 inputs</a></li>
|
51
|
+
<li><a href="#forms__action">Action buttons</a></li>
|
52
|
+
</ul>
|
53
|
+
</li>
|
54
|
+
</ul>
|
55
|
+
</nav>
|
56
|
+
<main role="main">
|
57
|
+
<section id="text">
|
58
|
+
<header><h1>Text</h1></header>
|
59
|
+
<article id="text__headings">
|
60
|
+
<header>
|
61
|
+
<h1>Headings</h1>
|
62
|
+
</header>
|
63
|
+
<div>
|
64
|
+
<h1>Heading 1</h1>
|
65
|
+
<h2>Heading 2</h2>
|
66
|
+
<h3>Heading 3</h3>
|
67
|
+
<h4>Heading 4</h4>
|
68
|
+
<h5>Heading 5</h5>
|
69
|
+
<h6>Heading 6</h6>
|
70
|
+
</div>
|
71
|
+
<footer><p><a href="#top">[Top]</a></p></footer>
|
72
|
+
</article>
|
73
|
+
<article id="text__paragraphs">
|
74
|
+
<header><h1>Paragraphs</h1></header>
|
75
|
+
<div>
|
76
|
+
<p>A paragraph (from the Greek paragraphos, “to write beside” or “written beside”) is a self-contained unit of a discourse in writing dealing with a particular point or idea. A paragraph consists of one or more sentences. Though not required by the syntax of any language, paragraphs are usually an expected part of formal writing, used to organize longer prose.</p>
|
77
|
+
</div>
|
78
|
+
<footer><p><a href="#top">[Top]</a></p></footer>
|
79
|
+
</article>
|
80
|
+
<article id="text__blockquotes">
|
81
|
+
<header><h1>Blockquotes</h1></header>
|
82
|
+
<div>
|
83
|
+
<blockquote>
|
84
|
+
<p>A block quotation (also known as a long quotation or extract) is a quotation in a written document, that is set off from the main text as a paragraph, or block of text.</p>
|
85
|
+
<p>It is typically distinguished visually using indentation and a different typeface or smaller size quotation. It may or may not include a citation, usually placed at the bottom.</p>
|
86
|
+
<cite><a href="#!">Said no one, ever.</a></cite>
|
87
|
+
</blockquote>
|
88
|
+
</div>
|
89
|
+
<footer><p><a href="#top">[Top]</a></p></footer>
|
90
|
+
</article>
|
91
|
+
<article id="text__lists">
|
92
|
+
<header><h1>Lists</h1></header>
|
93
|
+
<div>
|
94
|
+
<h3>Definition list</h3>
|
95
|
+
<dl>
|
96
|
+
<dt>Definition List Title</dt>
|
97
|
+
<dd>This is a definition list division.</dd>
|
98
|
+
</dl>
|
99
|
+
<h3>Ordered List</h3>
|
100
|
+
<ol>
|
101
|
+
<li>List Item 1</li>
|
102
|
+
<li>List Item 2</li>
|
103
|
+
<li>List Item 3</li>
|
104
|
+
</ol>
|
105
|
+
<h3>Unordered List</h3>
|
106
|
+
<ul>
|
107
|
+
<li>List Item 1</li>
|
108
|
+
<li>List Item 2</li>
|
109
|
+
<li>List Item 3</li>
|
110
|
+
</ul>
|
111
|
+
</div>
|
112
|
+
<footer><p><a href="#top">[Top]</a></p></footer>
|
113
|
+
</article>
|
114
|
+
<article id="text__hr">
|
115
|
+
<header><h1>Horizontal rules</h1></header>
|
116
|
+
<div>
|
117
|
+
<hr>
|
118
|
+
</div>
|
119
|
+
<footer><p><a href="#top">[Top]</a></p></footer>
|
120
|
+
</article>
|
121
|
+
<article id="text__tables">
|
122
|
+
<header><h1>Tabular data</h1></header>
|
123
|
+
<table>
|
124
|
+
<caption>Table Caption</caption>
|
125
|
+
<thead>
|
126
|
+
<tr>
|
127
|
+
<th>Table Heading 1</th>
|
128
|
+
<th>Table Heading 2</th>
|
129
|
+
<th>Table Heading 3</th>
|
130
|
+
<th>Table Heading 4</th>
|
131
|
+
<th>Table Heading 5</th>
|
132
|
+
</tr>
|
133
|
+
</thead>
|
134
|
+
<tfoot>
|
135
|
+
<tr>
|
136
|
+
<th>Table Footer 1</th>
|
137
|
+
<th>Table Footer 2</th>
|
138
|
+
<th>Table Footer 3</th>
|
139
|
+
<th>Table Footer 4</th>
|
140
|
+
<th>Table Footer 5</th>
|
141
|
+
</tr>
|
142
|
+
</tfoot>
|
143
|
+
<tbody>
|
144
|
+
<tr>
|
145
|
+
<td>Table Cell 1</td>
|
146
|
+
<td>Table Cell 2</td>
|
147
|
+
<td>Table Cell 3</td>
|
148
|
+
<td>Table Cell 4</td>
|
149
|
+
<td>Table Cell 5</td>
|
150
|
+
</tr>
|
151
|
+
<tr>
|
152
|
+
<td>Table Cell 1</td>
|
153
|
+
<td>Table Cell 2</td>
|
154
|
+
<td>Table Cell 3</td>
|
155
|
+
<td>Table Cell 4</td>
|
156
|
+
<td>Table Cell 5</td>
|
157
|
+
</tr>
|
158
|
+
<tr>
|
159
|
+
<td>Table Cell 1</td>
|
160
|
+
<td>Table Cell 2</td>
|
161
|
+
<td>Table Cell 3</td>
|
162
|
+
<td>Table Cell 4</td>
|
163
|
+
<td>Table Cell 5</td>
|
164
|
+
</tr>
|
165
|
+
<tr>
|
166
|
+
<td>Table Cell 1</td>
|
167
|
+
<td>Table Cell 2</td>
|
168
|
+
<td>Table Cell 3</td>
|
169
|
+
<td>Table Cell 4</td>
|
170
|
+
<td>Table Cell 5</td>
|
171
|
+
</tr>
|
172
|
+
</tbody>
|
173
|
+
</table>
|
174
|
+
<footer><p><a href="#top">[Top]</a></p></footer>
|
175
|
+
</article>
|
176
|
+
<article id="text__code">
|
177
|
+
<header><h1>Code</h1></header>
|
178
|
+
<div>
|
179
|
+
<p><strong>Keyboard input:</strong> <kbd>Cmd</kbd></p>
|
180
|
+
<p><strong>Inline code:</strong> <code><div>code</div></code></p>
|
181
|
+
<p><strong>Sample output:</strong> <samp>This is sample output from a computer program.</samp></p>
|
182
|
+
<h2>Pre-formatted text</h2>
|
183
|
+
<pre>P R E F O R M A T T E D T E X T
|
184
|
+
! " # $ % & ' ( ) * + , - . /
|
185
|
+
0 1 2 3 4 5 6 7 8 9 : ; < = > ?
|
186
|
+
@ A B C D E F G H I J K L M N O
|
187
|
+
P Q R S T U V W X Y Z [ \ ] ^ _
|
188
|
+
` a b c d e f g h i j k l m n o
|
189
|
+
p q r s t u v w x y z { | } ~ </pre>
|
190
|
+
</div>
|
191
|
+
<footer><p><a href="#top">[Top]</a></p></footer>
|
192
|
+
</article>
|
193
|
+
<article id="text__inline">
|
194
|
+
<header><h1>Inline elements</h1></header>
|
195
|
+
<div>
|
196
|
+
<p><a href="#!">This is a text link</a>.</p>
|
197
|
+
<p><strong>Strong is used to indicate strong importance.</strong></p>
|
198
|
+
<p><em>This text has added emphasis.</em></p>
|
199
|
+
<p>The <b>b element</b> is stylistically different text from normal text, without any special importance.</p>
|
200
|
+
<p>The <i>i element</i> is text that is offset from the normal text.</p>
|
201
|
+
<p>The <u>u element</u> is text with an unarticulated, though explicitly rendered, non-textual annotation.</p>
|
202
|
+
<p><del>This text is deleted</del> and <ins>This text is inserted</ins>.</p>
|
203
|
+
<p><s>This text has a strikethrough</s>.</p>
|
204
|
+
<p>Superscript<sup>®</sup>.</p>
|
205
|
+
<p>Subscript for things like H<sub>2</sub>O.</p>
|
206
|
+
<p><small>This small text is small for for fine print, etc.</small></p>
|
207
|
+
<p>Abbreviation: <abbr title="HyperText Markup Language">HTML</abbr></p>
|
208
|
+
<p><q cite="https://developer.mozilla.org/en-US/docs/HTML/Element/q">This text is a short inline quotation.</q></p>
|
209
|
+
<p><cite>This is a citation.</cite></p>
|
210
|
+
<p>The <dfn>dfn element</dfn> indicates a definition.</p>
|
211
|
+
<p>The <mark>mark element</mark> indicates a highlight.</p>
|
212
|
+
<p>The <var>variable element</var>, such as <var>x</var> = <var>y</var>.</p>
|
213
|
+
<p>The time element: <time datetime="2013-04-06T12:32+00:00">2 weeks ago</time></p>
|
214
|
+
</div>
|
215
|
+
<footer><p><a href="#top">[Top]</a></p></footer>
|
216
|
+
</article>
|
217
|
+
</section>
|
218
|
+
|
219
|
+
<section id="embedded">
|
220
|
+
<header><h1>Embedded content</h1></header>
|
221
|
+
<article id="embedded__images">
|
222
|
+
<header><h2>Images</h2></header>
|
223
|
+
<div>
|
224
|
+
<h3>No <code><figure></code> element</h3>
|
225
|
+
<p><img src="http://placekitten.com/480/480" alt="Image alt text"></p>
|
226
|
+
<h3>Wrapped in a <code><figure></code> element, no <code><figcaption></code></h3>
|
227
|
+
<figure><img src="http://placekitten.com/420/420" alt="Image alt text"></figure>
|
228
|
+
<h3>Wrapped in a <code><figure></code> element, with a <code><figcaption></code></h3>
|
229
|
+
<figure>
|
230
|
+
<img src="http://placekitten.com/420/420" alt="Image alt text">
|
231
|
+
<figcaption>Here is a caption for this image.</figcaption>
|
232
|
+
</figure>
|
233
|
+
</div>
|
234
|
+
<footer><p><a href="#top">[Top]</a></p></footer>
|
235
|
+
</article>
|
236
|
+
<article id="embedded__audio">
|
237
|
+
<header><h2>Audio</h2></header>
|
238
|
+
<div><audio controls="">audio</audio></div>
|
239
|
+
<footer><p><a href="#top">[Top]</a></p></footer>
|
240
|
+
</article>
|
241
|
+
<article id="embedded__video">
|
242
|
+
<header><h2>Video</h2></header>
|
243
|
+
<div><video controls="">video</video></div>
|
244
|
+
<footer><p><a href="#top">[Top]</a></p></footer>
|
245
|
+
</article>
|
246
|
+
<article id="embedded__canvas">
|
247
|
+
<header><h2>Canvas</h2></header>
|
248
|
+
<div><canvas>canvas</canvas></div>
|
249
|
+
<footer><p><a href="#top">[Top]</a></p></footer>
|
250
|
+
</article>
|
251
|
+
<article id="embedded__meter">
|
252
|
+
<header><h2>Meter</h2></header>
|
253
|
+
<div><meter value="2" min="0" max="10">2 out of 10</meter></div>
|
254
|
+
<footer><p><a href="#top">[Top]</a></p></footer>
|
255
|
+
</article>
|
256
|
+
<article id="embedded__progress">
|
257
|
+
<header><h2>Progress</h2></header>
|
258
|
+
<div><progress>progress</progress></div>
|
259
|
+
<footer><p><a href="#top">[Top]</a></p></footer>
|
260
|
+
</article>
|
261
|
+
<article id="embedded__svg">
|
262
|
+
<header><h2>Inline SVG</h2></header>
|
263
|
+
<div><svg width="100px" height="100px"><circle cx="100" cy="100" r="100" fill="#1fa3ec"></circle></svg></div>
|
264
|
+
<footer><p><a href="#top">[Top]</a></p></footer>
|
265
|
+
</article>
|
266
|
+
<article id="embedded__iframe">
|
267
|
+
<header><h2>IFrame</h2></header>
|
268
|
+
<div><iframe src="index.html" height="300"></iframe></div>
|
269
|
+
<footer><p><a href="#top">[Top]</a></p></footer>
|
270
|
+
</article>
|
271
|
+
</section>
|
272
|
+
<section id="forms">
|
273
|
+
<header><h1>Form elements</h1></header>
|
274
|
+
<form>
|
275
|
+
<fieldset id="forms__input">
|
276
|
+
<legend>Input fields</legend>
|
277
|
+
<p>
|
278
|
+
<label for="input__text">Text Input</label>
|
279
|
+
<input id="input__text" type="text" placeholder="Text Input">
|
280
|
+
</p>
|
281
|
+
<p>
|
282
|
+
<label for="input__password">Password</label>
|
283
|
+
<input id="input__password" type="password" placeholder="Type your Password">
|
284
|
+
</p>
|
285
|
+
<p>
|
286
|
+
<label for="input__webaddress">Web Address</label>
|
287
|
+
<input id="input__webaddress" type="url" placeholder="http://yoursite.com">
|
288
|
+
</p>
|
289
|
+
<p>
|
290
|
+
<label for="input__emailaddress">Email Address</label>
|
291
|
+
<input id="input__emailaddress" type="email" placeholder="name@email.com">
|
292
|
+
</p>
|
293
|
+
<p>
|
294
|
+
<label for="input__phone">Phone Number</label>
|
295
|
+
<input id="input__phone" type="tel" placeholder="(999) 999-9999">
|
296
|
+
</p>
|
297
|
+
<p>
|
298
|
+
<label for="input__search">Search</label>
|
299
|
+
<input id="input__search" type="search" placeholder="Enter Search Term">
|
300
|
+
</p>
|
301
|
+
<p>
|
302
|
+
<label for="input__text2">Number Input</label>
|
303
|
+
<input id="input__text2" type="number" placeholder="Enter a Number">
|
304
|
+
</p>
|
305
|
+
<p>
|
306
|
+
<label for="input__text3" class="error">Error</label>
|
307
|
+
<input id="input__text3" class="is-error" type="text" placeholder="Text Input">
|
308
|
+
</p>
|
309
|
+
<p>
|
310
|
+
<label for="input__text4" class="valid">Valid</label>
|
311
|
+
<input id="input__text4" class="is-valid" type="text" placeholder="Text Input">
|
312
|
+
</p>
|
313
|
+
</fieldset>
|
314
|
+
<p><a href="#top">[Top]</a></p>
|
315
|
+
<fieldset id="forms__select">
|
316
|
+
<legend>Select menus</legend>
|
317
|
+
<p>
|
318
|
+
<label for="select">Select</label>
|
319
|
+
<select id="select">
|
320
|
+
<optgroup label="Option Group">
|
321
|
+
<option>Option One</option>
|
322
|
+
<option>Option Two</option>
|
323
|
+
<option>Option Three</option>
|
324
|
+
</optgroup>
|
325
|
+
</select>
|
326
|
+
</p>
|
327
|
+
</fieldset>
|
328
|
+
<p><a href="#top">[Top]</a></p>
|
329
|
+
<fieldset id="forms__checkbox">
|
330
|
+
<legend>Checkboxes</legend>
|
331
|
+
<ul class="list list--bare">
|
332
|
+
<li><label for="checkbox1"><input id="checkbox1" name="checkbox" type="checkbox" checked="checked"> Choice A</label></li>
|
333
|
+
<li><label for="checkbox2"><input id="checkbox2" name="checkbox" type="checkbox"> Choice B</label></li>
|
334
|
+
<li><label for="checkbox3"><input id="checkbox3" name="checkbox" type="checkbox"> Choice C</label></li>
|
335
|
+
</ul>
|
336
|
+
</fieldset>
|
337
|
+
<p><a href="#top">[Top]</a></p>
|
338
|
+
<fieldset id="forms__radio">
|
339
|
+
<legend>Radio buttons</legend>
|
340
|
+
<ul class="list list--bare">
|
341
|
+
<li><label for="radio1"><input id="radio1" name="radio" type="radio" class="radio" checked="checked"> Option 1</label></li>
|
342
|
+
<li><label for="radio2"><input id="radio2" name="radio" type="radio" class="radio"> Option 2</label></li>
|
343
|
+
<li><label for="radio3"><input id="radio3" name="radio" type="radio" class="radio"> Option 3</label></li>
|
344
|
+
</ul>
|
345
|
+
</fieldset>
|
346
|
+
<p><a href="#top">[Top]</a></p>
|
347
|
+
<fieldset id="forms__textareas">
|
348
|
+
<legend>Textareas</legend>
|
349
|
+
<p>
|
350
|
+
<label for="textarea">Textarea</label>
|
351
|
+
<textarea id="textarea" rows="8" cols="48" placeholder="Enter your message here"></textarea>
|
352
|
+
</p>
|
353
|
+
</fieldset>
|
354
|
+
<p><a href="#top">[Top]</a></p>
|
355
|
+
<fieldset id="forms__html5">
|
356
|
+
<legend>HTML5 inputs</legend>
|
357
|
+
<p>
|
358
|
+
<label for="ic">Color input</label>
|
359
|
+
<input type="color" id="ic" value="#000000">
|
360
|
+
</p>
|
361
|
+
<p>
|
362
|
+
<label for="in">Number input</label>
|
363
|
+
<input type="number" id="in" min="0" max="10" value="5">
|
364
|
+
</p>
|
365
|
+
<p>
|
366
|
+
<label for="ir">Range input</label>
|
367
|
+
<input type="range" id="ir" value="10">
|
368
|
+
</p>
|
369
|
+
<p>
|
370
|
+
<label for="idd">Date input</label>
|
371
|
+
<input type="date" id="idd" value="1970-01-01">
|
372
|
+
</p>
|
373
|
+
<p>
|
374
|
+
<label for="idm">Month input</label>
|
375
|
+
<input type="month" id="idm" value="1970-01">
|
376
|
+
</p>
|
377
|
+
<p>
|
378
|
+
<label for="idw">Week input</label>
|
379
|
+
<input type="week" id="idw" value="1970-W01">
|
380
|
+
</p>
|
381
|
+
<p>
|
382
|
+
<label for="idt">Datetime input</label>
|
383
|
+
<input type="datetime" id="idt" value="1970-01-01T00:00:00Z">
|
384
|
+
</p>
|
385
|
+
<p>
|
386
|
+
<label for="idtl">Datetime-local input</label>
|
387
|
+
<input type="datetime-local" id="idtl" value="1970-01-01T00:00">
|
388
|
+
</p>
|
389
|
+
</fieldset>
|
390
|
+
<p><a href="#top">[Top]</a></p>
|
391
|
+
<fieldset id="forms__action">
|
392
|
+
<legend>Action buttons</legend>
|
393
|
+
<p>
|
394
|
+
<input type="submit" value="<input type=submit>">
|
395
|
+
<input type="button" value="<input type=button>">
|
396
|
+
<input type="reset" value="<input type=reset>">
|
397
|
+
<input type="submit" value="<input disabled>" disabled>
|
398
|
+
</p>
|
399
|
+
<p>
|
400
|
+
<button type="submit"><button type=submit></button>
|
401
|
+
<button type="button"><button type=button></button>
|
402
|
+
<button type="reset"><button type=reset></button>
|
403
|
+
<button type="button" disabled><button disabled></button>
|
404
|
+
</p>
|
405
|
+
</fieldset>
|
406
|
+
<p><a href="#top">[Top]</a></p>
|
407
|
+
</form>
|
408
|
+
</section>
|
409
|
+
</main>
|
410
|
+
<footer role="contentinfo">
|
411
|
+
<p>Made by <a href="http://twitter.com/cbracco">@cbracco</a>. Code on <a href="http://github.com/cbracco/html5-test-page">GitHub</a>.</p>
|
412
|
+
</footer>
|
413
|
+
</div>
|
414
|
+
</body>
|
415
|
+
</html>
|
@@ -0,0 +1,58 @@
|
|
1
|
+
class TreeSearcher
|
2
|
+
def initialize tree
|
3
|
+
@tree = tree
|
4
|
+
@node_array = []
|
5
|
+
end
|
6
|
+
|
7
|
+
def search_by(attr_name, attr_value)
|
8
|
+
result = search @tree, attr_name, attr_value
|
9
|
+
@node_array = []
|
10
|
+
puts result.empty? ? "No results found" : "Results found"
|
11
|
+
result
|
12
|
+
end
|
13
|
+
|
14
|
+
def search_descendents(node, attr_name, attr_value)
|
15
|
+
result = search node, attr_name, attr_value
|
16
|
+
@node_array = []
|
17
|
+
puts result.empty? ? "No results found" : "Results found"
|
18
|
+
result
|
19
|
+
end
|
20
|
+
|
21
|
+
def search_ancestors(node, attr_name, attr_value)
|
22
|
+
result = search_ancestor_helper node, attr_name, attr_value
|
23
|
+
@node_array = []
|
24
|
+
puts result.empty? ? "No results found" : "Results found"
|
25
|
+
result
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
def search data, attr_name, attr_value
|
31
|
+
collect_results data, attr_name, attr_value
|
32
|
+
return if data.children.empty?
|
33
|
+
data.children.each do |child|
|
34
|
+
search child, attr_name, attr_value
|
35
|
+
end
|
36
|
+
@node_array
|
37
|
+
end
|
38
|
+
|
39
|
+
def search_ancestor_helper data, attr_name, attr_value
|
40
|
+
collect_results data, attr_name, attr_value
|
41
|
+
return if data.parent.nil?
|
42
|
+
search_ancestor_helper data.parent, attr_name, attr_value
|
43
|
+
@node_array
|
44
|
+
end
|
45
|
+
|
46
|
+
def collect_results data, attr_name, attr_value
|
47
|
+
unless data.attributes.empty?
|
48
|
+
data.attributes.each do |name, value|
|
49
|
+
if attr_name == :class && name == :class
|
50
|
+
@node_array << data if value.any? { |class_name| attr_value == class_name }
|
51
|
+
else
|
52
|
+
@node_array << data if attr_name == name && attr_value == value
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
metadata
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: domparser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- BranLiang
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-08-07 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.12'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.12'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
description: There are three main function of this gem read html, search data, rebuild
|
56
|
+
html.
|
57
|
+
email:
|
58
|
+
- lby89757@hotmail.com
|
59
|
+
executables: []
|
60
|
+
extensions: []
|
61
|
+
extra_rdoc_files: []
|
62
|
+
files:
|
63
|
+
- ".gitignore"
|
64
|
+
- ".rspec"
|
65
|
+
- ".travis.yml"
|
66
|
+
- CODE_OF_CONDUCT.md
|
67
|
+
- Gemfile
|
68
|
+
- Guardfile
|
69
|
+
- LICENSE.txt
|
70
|
+
- README.md
|
71
|
+
- Rakefile
|
72
|
+
- bin/console
|
73
|
+
- bin/setup
|
74
|
+
- domparser.gemspec
|
75
|
+
- img/Screenshot from 2016-08-07 14-49-36.png
|
76
|
+
- img/Screenshot from 2016-08-07 14-58-06.png
|
77
|
+
- lib/domparser.rb
|
78
|
+
- lib/domparser/node_renderer.rb
|
79
|
+
- lib/domparser/parser_script.rb
|
80
|
+
- lib/domparser/test.html
|
81
|
+
- lib/domparser/test2.html
|
82
|
+
- lib/domparser/test3.html
|
83
|
+
- lib/domparser/tree_searcher.rb
|
84
|
+
- lib/domparser/version.rb
|
85
|
+
homepage: http://liangboyuan.pub
|
86
|
+
licenses:
|
87
|
+
- MIT
|
88
|
+
metadata:
|
89
|
+
allowed_push_host: https://rubygems.org
|
90
|
+
post_install_message:
|
91
|
+
rdoc_options: []
|
92
|
+
require_paths:
|
93
|
+
- lib
|
94
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
requirements: []
|
105
|
+
rubyforge_project:
|
106
|
+
rubygems_version: 2.5.1
|
107
|
+
signing_key:
|
108
|
+
specification_version: 4
|
109
|
+
summary: Simple dom parser whihc translate html into tree data structure.
|
110
|
+
test_files: []
|