gammo 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.travis.yml +6 -0
  4. data/Gemfile +9 -0
  5. data/Gemfile.lock +27 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +177 -0
  8. data/Rakefile +25 -0
  9. data/gammo.gemspec +23 -0
  10. data/lib/gammo.rb +15 -0
  11. data/lib/gammo/attribute.rb +17 -0
  12. data/lib/gammo/fragment_parser.rb +65 -0
  13. data/lib/gammo/node.rb +157 -0
  14. data/lib/gammo/parser.rb +524 -0
  15. data/lib/gammo/parser/constants.rb +94 -0
  16. data/lib/gammo/parser/foreign.rb +307 -0
  17. data/lib/gammo/parser/insertion_mode.rb +74 -0
  18. data/lib/gammo/parser/insertion_mode/after_after_body.rb +36 -0
  19. data/lib/gammo/parser/insertion_mode/after_after_frameset.rb +32 -0
  20. data/lib/gammo/parser/insertion_mode/after_body.rb +46 -0
  21. data/lib/gammo/parser/insertion_mode/after_frameset.rb +39 -0
  22. data/lib/gammo/parser/insertion_mode/after_head.rb +70 -0
  23. data/lib/gammo/parser/insertion_mode/before_head.rb +49 -0
  24. data/lib/gammo/parser/insertion_mode/before_html.rb +45 -0
  25. data/lib/gammo/parser/insertion_mode/in_body.rb +463 -0
  26. data/lib/gammo/parser/insertion_mode/in_caption.rb +47 -0
  27. data/lib/gammo/parser/insertion_mode/in_cell.rb +46 -0
  28. data/lib/gammo/parser/insertion_mode/in_column_group.rb +66 -0
  29. data/lib/gammo/parser/insertion_mode/in_frameset.rb +48 -0
  30. data/lib/gammo/parser/insertion_mode/in_head.rb +98 -0
  31. data/lib/gammo/parser/insertion_mode/in_head_noscript.rb +52 -0
  32. data/lib/gammo/parser/insertion_mode/in_row.rb +53 -0
  33. data/lib/gammo/parser/insertion_mode/in_select.rb +77 -0
  34. data/lib/gammo/parser/insertion_mode/in_select_in_table.rb +46 -0
  35. data/lib/gammo/parser/insertion_mode/in_table.rb +114 -0
  36. data/lib/gammo/parser/insertion_mode/in_table_body.rb +55 -0
  37. data/lib/gammo/parser/insertion_mode/in_template.rb +80 -0
  38. data/lib/gammo/parser/insertion_mode/initial.rb +152 -0
  39. data/lib/gammo/parser/insertion_mode/text.rb +32 -0
  40. data/lib/gammo/parser/insertion_mode_stack.rb +8 -0
  41. data/lib/gammo/parser/node_stack.rb +24 -0
  42. data/lib/gammo/tags.rb +9 -0
  43. data/lib/gammo/tags/table.rb +744 -0
  44. data/lib/gammo/tokenizer.rb +373 -0
  45. data/lib/gammo/tokenizer/debug.rb +34 -0
  46. data/lib/gammo/tokenizer/entity.rb +2240 -0
  47. data/lib/gammo/tokenizer/escape.rb +174 -0
  48. data/lib/gammo/tokenizer/script_scanner.rb +229 -0
  49. data/lib/gammo/tokenizer/tokens.rb +66 -0
  50. data/lib/gammo/version.rb +3 -0
  51. data/misc/html.yaml +384 -0
  52. data/misc/table.erubi +14 -0
  53. metadata +97 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: '009d6d5682151d83fe688e67ba57541bcccf5542b2865d8b85be77fae8156178'
4
+ data.tar.gz: 31a2f1d37e01a3c9e47db2b034965c75b0bc7ddd4d2f86826ae08fd37d199788
5
+ SHA512:
6
+ metadata.gz: c77bcb2f3cc9b25ac7400eff41819289980b1ff9a53481cca51b1444bca24dbdd114ead1c90f6cec219b0c844b0e63775338a7a7f74910b24c0ab6b0a00e2d54
7
+ data.tar.gz: a907e000dd8d4c01bcdb3f834ec17fbc20bdddcc91c38b77add44d3b56116eef8b7a64acdbd967fff65b1076df65871899816493ca012b98947266cc1ba51d8c
@@ -0,0 +1,8 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
@@ -0,0 +1,6 @@
1
+ ---
2
+ language: ruby
3
+ cache: bundler
4
+ rvm:
5
+ - 2.7.0
6
+ before_install: gem install bundler -v 2.1.2
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in gammo.gemspec
4
+ gemspec
5
+
6
+ gem 'yard'
7
+ gem 'rake', '~> 12.0'
8
+ gem 'test-unit', '~> 3.3.5'
9
+ gem 'erubi'
@@ -0,0 +1,27 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ gammo (0.1.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ erubi (1.9.0)
10
+ power_assert (1.1.5)
11
+ rake (12.3.3)
12
+ test-unit (3.3.5)
13
+ power_assert
14
+ yard (0.9.20)
15
+
16
+ PLATFORMS
17
+ ruby
18
+
19
+ DEPENDENCIES
20
+ erubi
21
+ gammo!
22
+ rake (~> 12.0)
23
+ test-unit (~> 3.3.5)
24
+ yard
25
+
26
+ BUNDLED WITH
27
+ 2.0.2
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020 namusyaka
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,177 @@
1
+ # Gammo - A pure-Ruby HTML5 parser
2
+
3
+ [![Build Status](https://travis-ci.org/namusyaka/gammo.svg?branch=master)](https://travis-ci.org/namusyaka/gammo)
4
+
5
+ Gammo is an implementation of the HTML5 parsing algorithm which conforms [the WHATWG specification](https://html.spec.whatwg.org/multipage/parsing.html), without any dependencies. Given an HTML string, Gammo parses it and builds DOM tree based on the tokenization and tree-construction algorithm defined in WHATWG parsing algorithm.
6
+
7
+ Gammo, its naming is inspired by [Gumbo](https://github.com/google/gumbo-parser). But Gammo is a fried tofu fritter made with vegetables.
8
+
9
+ ```ruby
10
+ require 'gammo'
11
+ require 'open-uri'
12
+
13
+ parser = Gammo.new(open('https://google.com'))
14
+ parser.parse #=> #<Gammo::Node::Document>
15
+ ```
16
+
17
+ ## Overview
18
+
19
+ ### Features
20
+
21
+ - [Tokenization](#tokenization): Gammo has a tokenizer for implementing [the tokenization algorithm](https://html.spec.whatwg.org/multipage/parsing.html#tokenization).
22
+ - [Parsing](#parsing): Gammo provides a parser which implements the parsing algorithm by the above tokenization and [the tree-construction algorithm](https://html.spec.whatwg.org/multipage/parsing.html#tree-construction).
23
+ - [Node](#node): Gammo provides the nodes which implement [WHATWG DOM specification](https://dom.spec.whatwg.org/) partially.
24
+ - [Performance](#performance): Gammo does not prioritize performance, and there are a few potential performance notes.
25
+
26
+ ## Tokenizaton
27
+
28
+ `Gammo::Tokenizer` implements the tokenization algorithm in WHATWG. You can get tokens in order by calling `Gammo::Tokenizer#next_token`.
29
+
30
+ Here is a simple example for performing only the tokenizer.
31
+
32
+ ```ruby
33
+ def dump_for(token)
34
+ puts "data: #{token.data}, class: #{token.class}"
35
+ end
36
+
37
+ tokenizer = Gammo::Tokenizer.new('<!doctype html><input type="button"><frameset>')
38
+ dump_for tokenizer.next_token #=> data: html, class: Gammo::Tokenizer::DoctypeToken
39
+ dump_for tokenizer.next_token #=> data: input, class: Gammo::Tokenizer::StartTagToken
40
+ dump_for tokenizer.next_token #=> data: frameset, class: Gammo::Tokenizer::StartTagToken
41
+ dump_for tokenizer.next_token #=> data: end of string, class: Gammo::Tokenizer::ErrorToken
42
+ ```
43
+
44
+ The parser described below depends on this tokenizer, it applies the WHATWG parsing algorithm to the tokens extracted by this tokenization in order.
45
+
46
+ ### Token types
47
+
48
+ The tokens generated by the tokenizer will be categorized into one of the following types:
49
+
50
+ <table>
51
+ <thead>
52
+ <tr>
53
+ <th>Token type</th>
54
+ <th>Description</th>
55
+ </tr>
56
+ </thead>
57
+ <tbody>
58
+ <tr>
59
+ <td><code>Gammo::Tokenizer::ErrorToken</code></td>
60
+ <td>Represents an error token, it usually means end-of-string.</td>
61
+ </tr>
62
+ <tr>
63
+ <td><code>Gammo::Tokenizer::TextToken</code></td>
64
+ <td>Represents a text token like "foo" which is inner text of elements.</td>
65
+ </tr>
66
+ <tr>
67
+ <td><code>Gammo::Tokenizer::StartTagToken</code></td>
68
+ <td>Represents a start tag token like <code>&lt;a&gt;</code>.</td>
69
+ </tr>
70
+ <tr>
71
+ <td><code>Gammo::Tokenizer::EndTagToken</code></td>
72
+ <td>Represents an end tag token like <code>&lt;/a&gt;</code>.</td>
73
+ </tr>
74
+ <tr>
75
+ <td><code>Gammo::Tokenizer::SelfClosingTagToken</code></td>
76
+ <td>Represents a self closing tag token like <code>&lt;img /&gt;</code></td>
77
+ </tr>
78
+ <tr>
79
+ <td><code>Gammo::Tokenizer::CommentToken</code></td>
80
+ <td>Represents a comment token like <code>&lt;!-- comment --&gt;</code>.</td>
81
+ </tr>
82
+ <tr>
83
+ <td><code>Gammo::Tokenizer::DoctypeToken</code></td>
84
+ <td>Represents a doctype token like <code>&lt;!doctype html&gt;</code>.</td>
85
+ </tr>
86
+ </tbody>
87
+ </table>
88
+
89
+ ## Parsing
90
+
91
+ `Gammo::Parser` implements processing in [the tree-construction stage](https://html.spec.whatwg.org/multipage/parsing.html#tree-construction) based on the tokenization described above.
92
+
93
+ A successfully parsed parser has the `document` accessor as the root document (this is the same as the return value of the `Gammo::Parser#parse`). From the `document` accessor, you can traverse the DOM tree constructed by the parser.
94
+
95
+ ```ruby
96
+ require 'gammo'
97
+ require 'pp'
98
+
99
+ document = Gammo.new('<!doctype html><input type="button">').parse
100
+
101
+ def dump_for(node, strm)
102
+ strm << node.to_h
103
+ return unless node && (child = node.first_child)
104
+ while child
105
+ dump_for(child, (strm.last[:children] ||= []))
106
+ child = child.next_sibling
107
+ end
108
+ strm
109
+ end
110
+
111
+ pp dump_for(document, [])
112
+ ```
113
+
114
+ ### Notes
115
+
116
+ Currently, it's not possible to traverse the DOM tree with css selector or xpath like [Nokogiri](https://nokogiri.org/).
117
+ However, Gammo plans to implement these features in the future.
118
+
119
+ ## Node
120
+
121
+ The nodes generated by the parser will be categorized into one of the following types:
122
+
123
+ <table>
124
+ <thead>
125
+ <tr>
126
+ <th>Node type</th>
127
+ <th>Description</th>
128
+ </tr>
129
+ </thead>
130
+ <tbody>
131
+ <tr>
132
+ <td><code>Gammo::Node::Error</code></td>
133
+ <td>Represents error node, it usually means end-of-string.</td>
134
+ </tr>
135
+ <tr>
136
+ <td><code>Gammo::Node::Text</code></td>
137
+ <td>Represents the text node like "foo" which is inner text of elements.</td>
138
+ </tr>
139
+ <tr>
140
+ <td><code>Gammo::Node::Document</code></td>
141
+ <td>Represents the root document type. It's always returned by <code>Gammo::Parser#document</code>.</td>
142
+ </tr>
143
+ <tr>
144
+ <td><code>Gammo::Node::Element</code></td>
145
+ <td>Represents any elements of HTML like <code>&lt;p&gt;</code>.</td>
146
+ </tr>
147
+ <tr>
148
+ <td><code>Gammo::Node::Comment</code></td>
149
+ <td>Represents comments like <code>&lt;!-- foo --&gt;</code></td>
150
+ </tr>
151
+ <tr>
152
+ <td><code>Gammo::Node::Doctype</code></td>
153
+ <td>Represents doctype like <code>&lt;!doctype html&gt;</code></td>
154
+ </tr>
155
+ </tbody>
156
+ </table>
157
+
158
+ For some nodes such as `Gammo::Node::Element` and `Gammo::Node::Document`, they contains pointers to nodes that can be referenced by itself, such as `Gammo::Node#next_sibling` or `Gammo::Node#first_child`. In addition, APIs such as `Gammo::Node#append_child` and `Gammo::Node#remove_child` that perform operations defined in DOM living standard are also provided.
159
+
160
+ ## Performance
161
+
162
+ As mentioned in the features at the beginning, Gammo doesn't prioritize its performance.
163
+ Thus, for example, Gammo is not suitable for very performance-sensitive applications (e.g. performing Gammo parsing synchronously from an incoming request from an end user).
164
+ Instead, the goal is to work well with batch processing such as crawlers.
165
+ Gammo places the highest priority on making it easy to parse HTML by peforming it without depending on native-extensions and external gems.
166
+
167
+ ## References
168
+
169
+ This was developed with reference to the following softwares.
170
+
171
+ - [x/net/html](https://godoc.org/golang.org/x/net/html): I've been working on this package, it gave me strong reason to make this happen.
172
+ - [Blink](https://www.chromium.org/blink): Blink gave me great impression about tree construction.
173
+ - [html5lib-tests](https://github.com/html5lib/html5lib-tests): Gammo relies on this test.
174
+
175
+ ## License
176
+
177
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,25 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+ require 'yaml'
4
+ require 'erubi'
5
+
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs << "test"
8
+ t.libs << "lib"
9
+ t.test_files = FileList["test/**/*_test.rb"]
10
+ end
11
+
12
+ task default: :test
13
+
14
+ def camelize(str)
15
+ str.sub(/^[a-z\d]*/) { $&.capitalize }.sub(/\-[a-z]*/) { $&.slice(1..-1).capitalize }
16
+ end
17
+
18
+ task default: :test
19
+
20
+ task :generate do
21
+ data = YAML.load(File.read('misc/html.yaml'), symbolize_names: true)
22
+ @tags = data.each_value.inject(:+).uniq
23
+ table = eval(Erubi::Engine.new(File.read('misc/table.erubi')).src, binding)
24
+ File.write('lib/gammo/tags/table.rb', table)
25
+ end
@@ -0,0 +1,23 @@
1
+ require_relative 'lib/gammo/version'
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "gammo"
5
+ spec.version = Gammo::VERSION
6
+ spec.authors = ["namusyaka"]
7
+ spec.email = ["namusyaka@gmail.com"]
8
+
9
+ spec.summary = %q{An HTML parser which implements WHATWG parsing algorithm.}
10
+ spec.description = %q{Gammo is an implementation of the HTML5 parsing algorithm which conforms the WHATWG specification with pure Ruby.}
11
+ spec.homepage = "https://github.com/namusyaka/gammo"
12
+ spec.license = "MIT"
13
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
+
15
+ spec.metadata["homepage_uri"] = spec.homepage
16
+ spec.metadata["source_code_uri"] = "https://github.com/namusyaka/gammo"
17
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
18
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
19
+ end
20
+ spec.bindir = "exe"
21
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
+ spec.require_paths = ["lib"]
23
+ end
@@ -0,0 +1,15 @@
1
+ require "gammo/version"
2
+ require "gammo/parser"
3
+ require "gammo/fragment_parser"
4
+
5
+ module Gammo
6
+ # Constructs a parser based on the input.
7
+ #
8
+ # @param [String] input
9
+ # @param [TrueClass, FalseClass] fragment
10
+ # @param [Hash] options
11
+ # @return [Gammo::Parser]
12
+ def self.new(input, fragment: false, **options)
13
+ (fragment ? FragmentParser : Parser).new(input, **options)
14
+ end
15
+ end
@@ -0,0 +1,17 @@
1
+ module Gammo
2
+ # Class for representing an attribute.
3
+ class Attribute
4
+ attr_accessor :key, :value, :namespace
5
+
6
+ # Constructs an attribute with the key-value pair.
7
+ # @param [String] key
8
+ # @param [String] value
9
+ # @param [String] namespace
10
+ # @return [Attribute]
11
+ def initialize(key:, value:, namespace: nil)
12
+ @key = key
13
+ @value = value
14
+ @namespace = namespace
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,65 @@
1
+ require 'gammo/parser'
2
+
3
+ module Gammo
4
+ # Class for parsing a fragment of an HTML input and building an HTML tree.
5
+ class FragmentParser < ::Gammo::Parser
6
+ # Constructs a parser instance for fragment parsing algorithm.
7
+ # @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm
8
+ # @param [String] input
9
+ # @param [Gammo::Node] context
10
+ # @raise [Gammo::ParseError] raises if context is not valid.
11
+ # @return Gammo::FragmentParser
12
+ def initialize(input, context:, **options)
13
+ validate_context(context)
14
+ super(input, context: context, **options)
15
+ @root = Node::Element.new(tag: Tags::Html, data: Tags::Html.to_s)
16
+ @tokenizer = Tokenizer.new(input, context: !context.namespace && context.tag.to_s)
17
+ @open_elements = NodeStack.new([@root])
18
+ document.append_child(@root)
19
+ template_stack << InTemplate if context.tag == Tags::Template
20
+ reset_insertion_mode
21
+ while context
22
+ if context.instance_of?(Node::Element) && context.tag == Tags::Form
23
+ @form = context
24
+ break
25
+ end
26
+ context = context.parent
27
+ end
28
+ end
29
+
30
+ # Parses a fragment of the current input and builds HTML tree from it.
31
+ # @raise [Gammo::ParseError] Raised if the parser gets error while parsing.
32
+ # @return [Array<Gammo::Node>]
33
+ def parse
34
+ super
35
+ parent = context ? @root : document
36
+ child = parent.first_child
37
+ collection = []
38
+ while child
39
+ node = child.next_sibling
40
+ parent.remove_child(child)
41
+ collection << child
42
+ child = node
43
+ end
44
+ collection
45
+ end
46
+
47
+ # Always returns true.
48
+ # @return [TrueClass]
49
+ # @!visibility private
50
+ def fragment?
51
+ true
52
+ end
53
+
54
+ # Validates given context. Raises {Gammo::ParseError} if context is not
55
+ # {Gammo::Node}.
56
+ # @param [Gammo::Node] context
57
+ # @raise [Gammo::ParseError]
58
+ def validate_context(context)
59
+ fail ParseError, 'given non-element node in "context"' unless context.instance_of?(Node::Element)
60
+ unless context.tag == Tags.lookup(context.data)
61
+ fail ParseError, "inconsistent context node, tag = #{context.tag}, data = #{Tags.lookup(context.data)}"
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,157 @@
1
+ module Gammo
2
+ # Class for representing Node.
3
+ # https://html.spec.whatwg.org/multipage/parsing.html#tokenization
4
+ class Node
5
+ # Represents the error token.
6
+ Error = Class.new(Node)
7
+
8
+ # Represents the text token.
9
+ Text = Class.new(Node)
10
+
11
+ # Represents the root document token.
12
+ Document = Class.new(Node)
13
+
14
+ # Represents the element token including start, end and self-closing token.
15
+ Element = Class.new(Node)
16
+
17
+ # Represents the comment token like "<!-- foo -->".
18
+ Comment = Class.new(Node)
19
+
20
+ # Represents the document type token.
21
+ Doctype = Class.new(Node)
22
+
23
+ # Represents the marker defined in 12.2.4.3.
24
+ # https://html.spec.whatwg.org/multipage/parsing.html#tokenization
25
+ ScopeMarker = Class.new(Node)
26
+
27
+ # Default scope marker is inserted when entering applet,
28
+ # object, marquee, template, td, th, and caption elements, and are used
29
+ # to prevent formatting from "leaking" into applet, object, marquee,
30
+ # template, td, th, and caption elements"
31
+ DEFAULT_SCOPE_MARKER = Node::ScopeMarker.new
32
+
33
+ # Raised if uncaught node is given for particular operations.
34
+ # @!visibility private
35
+ UncaughtTypeError = Class.new(ArgumentError)
36
+
37
+ # Raised if anything goes wrong on hierarchy while node operations.
38
+ # @!visibility private
39
+ HierarchyRequestError = Class.new(ArgumentError)
40
+
41
+ # `parent` is the pointer for the parent node.
42
+ attr_accessor :parent
43
+
44
+ # `first_child` and `last_child` are pointers for the first and the last nodes.
45
+ attr_accessor :first_child, :last_child
46
+
47
+ # `previous_sibling` and `next_sibling` are pointers for the previous and next sibling nodes.
48
+ attr_accessor :previous_sibling, :next_sibling
49
+
50
+ # Properties required to represent node.
51
+ attr_accessor :tag, :data, :namespace, :attributes
52
+
53
+ # Constructs a node which represents HTML element node.
54
+ # @param [String] tag
55
+ # @param [String] data
56
+ # @param [String, NilClass] namespace
57
+ # @param [Hash(String => String)] attributes
58
+ # @return [Gammo::Node]
59
+ def initialize(tag: nil, data: nil, namespace: nil, attributes: [])
60
+ @tag = tag
61
+ @data = data
62
+ @namespace = namespace
63
+ @attributes = attributes
64
+ end
65
+
66
+ # Inserts a node before a reference node as a child of a specified parent node.
67
+ # @param [Gammo::Node] node
68
+ # @param [Gammo::Node] ref
69
+ # @raise [HierarchyRequestError] Raised if given node is already attached to the self node.
70
+ # @return [Gammo::Node] A node inserted before the reference node.
71
+ def insert_before(node, ref)
72
+ raise HierarchyRequestError,
73
+ 'insert_before called for an attached child node' if attached?(node)
74
+ if ref
75
+ previous_sibling, next_sibling = ref.previous_sibling, ref
76
+ else
77
+ previous_sibling = last_child
78
+ end
79
+ if previous_sibling
80
+ previous_sibling.next_sibling = node
81
+ else
82
+ @first_child = node
83
+ end
84
+ if next_sibling
85
+ next_sibling.previous_sibling = node
86
+ else
87
+ @last_child = node
88
+ end
89
+ node.parent = self
90
+ node.previous_sibling = previous_sibling
91
+ node.next_sibling = next_sibling
92
+ node
93
+ end
94
+
95
+ # Appends given `child` into self node.
96
+ # @param [Gammo::Node] child
97
+ # @raise [HierarchyRequestError] Raised if given node is already attached to the self node.
98
+ # @return [Gammo::Node] A node appended into the self node.
99
+ def append_child(child)
100
+ raise HierarchyRequestError,
101
+ 'append_child called for an attached child node' if attached?(child)
102
+ if last = last_child
103
+ last.next_sibling = child
104
+ else
105
+ @first_child = child
106
+ end
107
+ @last_child = child
108
+ child.parent = self
109
+ child.previous_sibling = last
110
+ child
111
+ end
112
+
113
+ # Removes given `child` from self node.
114
+ # @param [Gammo::Node] child
115
+ # @raise [UncaughtTypeError] Raised unless given node is not child of the self node.
116
+ # @return [Gammo::Node] A node removed from the self node.
117
+ def remove_child(child)
118
+ raise UncaughtTypeError,
119
+ 'remove_child called for a non-child node' unless child?(child)
120
+ @first_child = child.next_sibling if first_child == child
121
+ child.next_sibling.previous_sibling = child.previous_sibling if child.next_sibling
122
+ @last_child = child.previous_sibling if last_child == child
123
+ child.previous_sibling.next_sibling = child.next_sibling if child.previous_sibling
124
+ child.parent = child.previous_sibling = child.next_sibling = nil
125
+ child
126
+ end
127
+
128
+ # Clones self into a new node.
129
+ # @return [Gammo::Node]
130
+ # @!visibility private
131
+ def clone
132
+ self.class.new(tag: self.tag, data: self.data, attributes: self.attributes.dup)
133
+ end
134
+
135
+ # @!visibility private
136
+ def to_h
137
+ {
138
+ tag: tag,
139
+ data: data,
140
+ attributes: attributes,
141
+ type: self.class
142
+ }
143
+ end
144
+
145
+ private
146
+
147
+ # @!visibility private
148
+ def attached?(node)
149
+ node.parent || node.previous_sibling || node.next_sibling
150
+ end
151
+
152
+ # @!visibility private
153
+ def child?(node)
154
+ node.parent == self
155
+ end
156
+ end
157
+ end