gammo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.travis.yml +6 -0
  4. data/Gemfile +9 -0
  5. data/Gemfile.lock +27 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +177 -0
  8. data/Rakefile +25 -0
  9. data/gammo.gemspec +23 -0
  10. data/lib/gammo.rb +15 -0
  11. data/lib/gammo/attribute.rb +17 -0
  12. data/lib/gammo/fragment_parser.rb +65 -0
  13. data/lib/gammo/node.rb +157 -0
  14. data/lib/gammo/parser.rb +524 -0
  15. data/lib/gammo/parser/constants.rb +94 -0
  16. data/lib/gammo/parser/foreign.rb +307 -0
  17. data/lib/gammo/parser/insertion_mode.rb +74 -0
  18. data/lib/gammo/parser/insertion_mode/after_after_body.rb +36 -0
  19. data/lib/gammo/parser/insertion_mode/after_after_frameset.rb +32 -0
  20. data/lib/gammo/parser/insertion_mode/after_body.rb +46 -0
  21. data/lib/gammo/parser/insertion_mode/after_frameset.rb +39 -0
  22. data/lib/gammo/parser/insertion_mode/after_head.rb +70 -0
  23. data/lib/gammo/parser/insertion_mode/before_head.rb +49 -0
  24. data/lib/gammo/parser/insertion_mode/before_html.rb +45 -0
  25. data/lib/gammo/parser/insertion_mode/in_body.rb +463 -0
  26. data/lib/gammo/parser/insertion_mode/in_caption.rb +47 -0
  27. data/lib/gammo/parser/insertion_mode/in_cell.rb +46 -0
  28. data/lib/gammo/parser/insertion_mode/in_column_group.rb +66 -0
  29. data/lib/gammo/parser/insertion_mode/in_frameset.rb +48 -0
  30. data/lib/gammo/parser/insertion_mode/in_head.rb +98 -0
  31. data/lib/gammo/parser/insertion_mode/in_head_noscript.rb +52 -0
  32. data/lib/gammo/parser/insertion_mode/in_row.rb +53 -0
  33. data/lib/gammo/parser/insertion_mode/in_select.rb +77 -0
  34. data/lib/gammo/parser/insertion_mode/in_select_in_table.rb +46 -0
  35. data/lib/gammo/parser/insertion_mode/in_table.rb +114 -0
  36. data/lib/gammo/parser/insertion_mode/in_table_body.rb +55 -0
  37. data/lib/gammo/parser/insertion_mode/in_template.rb +80 -0
  38. data/lib/gammo/parser/insertion_mode/initial.rb +152 -0
  39. data/lib/gammo/parser/insertion_mode/text.rb +32 -0
  40. data/lib/gammo/parser/insertion_mode_stack.rb +8 -0
  41. data/lib/gammo/parser/node_stack.rb +24 -0
  42. data/lib/gammo/tags.rb +9 -0
  43. data/lib/gammo/tags/table.rb +744 -0
  44. data/lib/gammo/tokenizer.rb +373 -0
  45. data/lib/gammo/tokenizer/debug.rb +34 -0
  46. data/lib/gammo/tokenizer/entity.rb +2240 -0
  47. data/lib/gammo/tokenizer/escape.rb +174 -0
  48. data/lib/gammo/tokenizer/script_scanner.rb +229 -0
  49. data/lib/gammo/tokenizer/tokens.rb +66 -0
  50. data/lib/gammo/version.rb +3 -0
  51. data/misc/html.yaml +384 -0
  52. data/misc/table.erubi +14 -0
  53. metadata +97 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: '009d6d5682151d83fe688e67ba57541bcccf5542b2865d8b85be77fae8156178'
4
+ data.tar.gz: 31a2f1d37e01a3c9e47db2b034965c75b0bc7ddd4d2f86826ae08fd37d199788
5
+ SHA512:
6
+ metadata.gz: c77bcb2f3cc9b25ac7400eff41819289980b1ff9a53481cca51b1444bca24dbdd114ead1c90f6cec219b0c844b0e63775338a7a7f74910b24c0ab6b0a00e2d54
7
+ data.tar.gz: a907e000dd8d4c01bcdb3f834ec17fbc20bdddcc91c38b77add44d3b56116eef8b7a64acdbd967fff65b1076df65871899816493ca012b98947266cc1ba51d8c
@@ -0,0 +1,8 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
@@ -0,0 +1,6 @@
1
+ ---
2
+ language: ruby
3
+ cache: bundler
4
+ rvm:
5
+ - 2.7.0
6
+ before_install: gem install bundler -v 2.1.2
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in gammo.gemspec
4
+ gemspec
5
+
6
+ gem 'yard'
7
+ gem 'rake', '~> 12.0'
8
+ gem 'test-unit', '~> 3.3.5'
9
+ gem 'erubi'
@@ -0,0 +1,27 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ gammo (0.1.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ erubi (1.9.0)
10
+ power_assert (1.1.5)
11
+ rake (12.3.3)
12
+ test-unit (3.3.5)
13
+ power_assert
14
+ yard (0.9.20)
15
+
16
+ PLATFORMS
17
+ ruby
18
+
19
+ DEPENDENCIES
20
+ erubi
21
+ gammo!
22
+ rake (~> 12.0)
23
+ test-unit (~> 3.3.5)
24
+ yard
25
+
26
+ BUNDLED WITH
27
+ 2.0.2
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020 namusyaka
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,177 @@
1
+ # Gammo - A pure-Ruby HTML5 parser
2
+
3
+ [![Build Status](https://travis-ci.org/namusyaka/gammo.svg?branch=master)](https://travis-ci.org/namusyaka/gammo)
4
+
5
+ Gammo is an implementation of the HTML5 parsing algorithm which conforms [the WHATWG specification](https://html.spec.whatwg.org/multipage/parsing.html), without any dependencies. Given an HTML string, Gammo parses it and builds DOM tree based on the tokenization and tree-construction algorithm defined in WHATWG parsing algorithm.
6
+
7
+ Gammo, its naming is inspired by [Gumbo](https://github.com/google/gumbo-parser). But Gammo is a fried tofu fritter made with vegetables.
8
+
9
+ ```ruby
10
+ require 'gammo'
11
+ require 'open-uri'
12
+
13
+ parser = Gammo.new(open('https://google.com'))
14
+ parser.parse #=> #<Gammo::Node::Document>
15
+ ```
16
+
17
+ ## Overview
18
+
19
+ ### Features
20
+
21
+ - [Tokenization](#tokenization): Gammo has a tokenizer for implementing [the tokenization algorithm](https://html.spec.whatwg.org/multipage/parsing.html#tokenization).
22
+ - [Parsing](#parsing): Gammo provides a parser which implements the parsing algorithm by the above tokenization and [the tree-construction algorithm](https://html.spec.whatwg.org/multipage/parsing.html#tree-construction).
23
+ - [Node](#node): Gammo provides the nodes which implement [WHATWG DOM specification](https://dom.spec.whatwg.org/) partially.
24
+ - [Performance](#performance): Gammo does not prioritize performance, and there are a few potential performance notes.
25
+
26
+ ## Tokenizaton
27
+
28
+ `Gammo::Tokenizer` implements the tokenization algorithm in WHATWG. You can get tokens in order by calling `Gammo::Tokenizer#next_token`.
29
+
30
+ Here is a simple example for performing only the tokenizer.
31
+
32
+ ```ruby
33
+ def dump_for(token)
34
+ puts "data: #{token.data}, class: #{token.class}"
35
+ end
36
+
37
+ tokenizer = Gammo::Tokenizer.new('<!doctype html><input type="button"><frameset>')
38
+ dump_for tokenizer.next_token #=> data: html, class: Gammo::Tokenizer::DoctypeToken
39
+ dump_for tokenizer.next_token #=> data: input, class: Gammo::Tokenizer::StartTagToken
40
+ dump_for tokenizer.next_token #=> data: frameset, class: Gammo::Tokenizer::StartTagToken
41
+ dump_for tokenizer.next_token #=> data: end of string, class: Gammo::Tokenizer::ErrorToken
42
+ ```
43
+
44
+ The parser described below depends on this tokenizer, it applies the WHATWG parsing algorithm to the tokens extracted by this tokenization in order.
45
+
46
+ ### Token types
47
+
48
+ The tokens generated by the tokenizer will be categorized into one of the following types:
49
+
50
+ <table>
51
+ <thead>
52
+ <tr>
53
+ <th>Token type</th>
54
+ <th>Description</th>
55
+ </tr>
56
+ </thead>
57
+ <tbody>
58
+ <tr>
59
+ <td><code>Gammo::Tokenizer::ErrorToken</code></td>
60
+ <td>Represents an error token, it usually means end-of-string.</td>
61
+ </tr>
62
+ <tr>
63
+ <td><code>Gammo::Tokenizer::TextToken</code></td>
64
+ <td>Represents a text token like "foo" which is inner text of elements.</td>
65
+ </tr>
66
+ <tr>
67
+ <td><code>Gammo::Tokenizer::StartTagToken</code></td>
68
+ <td>Represents a start tag token like <code>&lt;a&gt;</code>.</td>
69
+ </tr>
70
+ <tr>
71
+ <td><code>Gammo::Tokenizer::EndTagToken</code></td>
72
+ <td>Represents an end tag token like <code>&lt;/a&gt;</code>.</td>
73
+ </tr>
74
+ <tr>
75
+ <td><code>Gammo::Tokenizer::SelfClosingTagToken</code></td>
76
+ <td>Represents a self closing tag token like <code>&lt;img /&gt;</code></td>
77
+ </tr>
78
+ <tr>
79
+ <td><code>Gammo::Tokenizer::CommentToken</code></td>
80
+ <td>Represents a comment token like <code>&lt;!-- comment --&gt;</code>.</td>
81
+ </tr>
82
+ <tr>
83
+ <td><code>Gammo::Tokenizer::DoctypeToken</code></td>
84
+ <td>Represents a doctype token like <code>&lt;!doctype html&gt;</code>.</td>
85
+ </tr>
86
+ </tbody>
87
+ </table>
88
+
89
+ ## Parsing
90
+
91
+ `Gammo::Parser` implements processing in [the tree-construction stage](https://html.spec.whatwg.org/multipage/parsing.html#tree-construction) based on the tokenization described above.
92
+
93
+ A successfully parsed parser has the `document` accessor as the root document (this is the same as the return value of the `Gammo::Parser#parse`). From the `document` accessor, you can traverse the DOM tree constructed by the parser.
94
+
95
+ ```ruby
96
+ require 'gammo'
97
+ require 'pp'
98
+
99
+ document = Gammo.new('<!doctype html><input type="button">').parse
100
+
101
+ def dump_for(node, strm)
102
+ strm << node.to_h
103
+ return unless node && (child = node.first_child)
104
+ while child
105
+ dump_for(child, (strm.last[:children] ||= []))
106
+ child = child.next_sibling
107
+ end
108
+ strm
109
+ end
110
+
111
+ pp dump_for(document, [])
112
+ ```
113
+
114
+ ### Notes
115
+
116
+ Currently, it's not possible to traverse the DOM tree with css selector or xpath like [Nokogiri](https://nokogiri.org/).
117
+ However, Gammo plans to implement these features in the future.
118
+
119
+ ## Node
120
+
121
+ The nodes generated by the parser will be categorized into one of the following types:
122
+
123
+ <table>
124
+ <thead>
125
+ <tr>
126
+ <th>Node type</th>
127
+ <th>Description</th>
128
+ </tr>
129
+ </thead>
130
+ <tbody>
131
+ <tr>
132
+ <td><code>Gammo::Node::Error</code></td>
133
+ <td>Represents error node, it usually means end-of-string.</td>
134
+ </tr>
135
+ <tr>
136
+ <td><code>Gammo::Node::Text</code></td>
137
+ <td>Represents the text node like "foo" which is inner text of elements.</td>
138
+ </tr>
139
+ <tr>
140
+ <td><code>Gammo::Node::Document</code></td>
141
+ <td>Represents the root document type. It's always returned by <code>Gammo::Parser#document</code>.</td>
142
+ </tr>
143
+ <tr>
144
+ <td><code>Gammo::Node::Element</code></td>
145
+ <td>Represents any elements of HTML like <code>&lt;p&gt;</code>.</td>
146
+ </tr>
147
+ <tr>
148
+ <td><code>Gammo::Node::Comment</code></td>
149
+ <td>Represents comments like <code>&lt;!-- foo --&gt;</code></td>
150
+ </tr>
151
+ <tr>
152
+ <td><code>Gammo::Node::Doctype</code></td>
153
+ <td>Represents doctype like <code>&lt;!doctype html&gt;</code></td>
154
+ </tr>
155
+ </tbody>
156
+ </table>
157
+
158
+ For some nodes such as `Gammo::Node::Element` and `Gammo::Node::Document`, they contains pointers to nodes that can be referenced by itself, such as `Gammo::Node#next_sibling` or `Gammo::Node#first_child`. In addition, APIs such as `Gammo::Node#append_child` and `Gammo::Node#remove_child` that perform operations defined in DOM living standard are also provided.
159
+
160
+ ## Performance
161
+
162
+ As mentioned in the features at the beginning, Gammo doesn't prioritize its performance.
163
+ Thus, for example, Gammo is not suitable for very performance-sensitive applications (e.g. performing Gammo parsing synchronously from an incoming request from an end user).
164
+ Instead, the goal is to work well with batch processing such as crawlers.
165
+ Gammo places the highest priority on making it easy to parse HTML by peforming it without depending on native-extensions and external gems.
166
+
167
+ ## References
168
+
169
+ This was developed with reference to the following softwares.
170
+
171
+ - [x/net/html](https://godoc.org/golang.org/x/net/html): I've been working on this package, it gave me strong reason to make this happen.
172
+ - [Blink](https://www.chromium.org/blink): Blink gave me great impression about tree construction.
173
+ - [html5lib-tests](https://github.com/html5lib/html5lib-tests): Gammo relies on this test.
174
+
175
+ ## License
176
+
177
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,25 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+ require 'yaml'
4
+ require 'erubi'
5
+
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs << "test"
8
+ t.libs << "lib"
9
+ t.test_files = FileList["test/**/*_test.rb"]
10
+ end
11
+
12
+ task default: :test
13
+
14
+ def camelize(str)
15
+ str.sub(/^[a-z\d]*/) { $&.capitalize }.sub(/\-[a-z]*/) { $&.slice(1..-1).capitalize }
16
+ end
17
+
18
+ task default: :test
19
+
20
+ task :generate do
21
+ data = YAML.load(File.read('misc/html.yaml'), symbolize_names: true)
22
+ @tags = data.each_value.inject(:+).uniq
23
+ table = eval(Erubi::Engine.new(File.read('misc/table.erubi')).src, binding)
24
+ File.write('lib/gammo/tags/table.rb', table)
25
+ end
@@ -0,0 +1,23 @@
1
+ require_relative 'lib/gammo/version'
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "gammo"
5
+ spec.version = Gammo::VERSION
6
+ spec.authors = ["namusyaka"]
7
+ spec.email = ["namusyaka@gmail.com"]
8
+
9
+ spec.summary = %q{An HTML parser which implements WHATWG parsing algorithm.}
10
+ spec.description = %q{Gammo is an implementation of the HTML5 parsing algorithm which conforms the WHATWG specification with pure Ruby.}
11
+ spec.homepage = "https://github.com/namusyaka/gammo"
12
+ spec.license = "MIT"
13
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
+
15
+ spec.metadata["homepage_uri"] = spec.homepage
16
+ spec.metadata["source_code_uri"] = "https://github.com/namusyaka/gammo"
17
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
18
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
19
+ end
20
+ spec.bindir = "exe"
21
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
+ spec.require_paths = ["lib"]
23
+ end
@@ -0,0 +1,15 @@
1
+ require "gammo/version"
2
+ require "gammo/parser"
3
+ require "gammo/fragment_parser"
4
+
5
+ module Gammo
6
+ # Constructs a parser based on the input.
7
+ #
8
+ # @param [String] input
9
+ # @param [TrueClass, FalseClass] fragment
10
+ # @param [Hash] options
11
+ # @return [Gammo::Parser]
12
+ def self.new(input, fragment: false, **options)
13
+ (fragment ? FragmentParser : Parser).new(input, **options)
14
+ end
15
+ end
@@ -0,0 +1,17 @@
1
+ module Gammo
2
+ # Class for representing an attribute.
3
+ class Attribute
4
+ attr_accessor :key, :value, :namespace
5
+
6
+ # Constructs an attribute with the key-value pair.
7
+ # @param [String] key
8
+ # @param [String] value
9
+ # @param [String] namespace
10
+ # @return [Attribute]
11
+ def initialize(key:, value:, namespace: nil)
12
+ @key = key
13
+ @value = value
14
+ @namespace = namespace
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,65 @@
1
+ require 'gammo/parser'
2
+
3
+ module Gammo
4
+ # Class for parsing a fragment of an HTML input and building an HTML tree.
5
+ class FragmentParser < ::Gammo::Parser
6
+ # Constructs a parser instance for fragment parsing algorithm.
7
+ # @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm
8
+ # @param [String] input
9
+ # @param [Gammo::Node] context
10
+ # @raise [Gammo::ParseError] raises if context is not valid.
11
+ # @return Gammo::FragmentParser
12
+ def initialize(input, context:, **options)
13
+ validate_context(context)
14
+ super(input, context: context, **options)
15
+ @root = Node::Element.new(tag: Tags::Html, data: Tags::Html.to_s)
16
+ @tokenizer = Tokenizer.new(input, context: !context.namespace && context.tag.to_s)
17
+ @open_elements = NodeStack.new([@root])
18
+ document.append_child(@root)
19
+ template_stack << InTemplate if context.tag == Tags::Template
20
+ reset_insertion_mode
21
+ while context
22
+ if context.instance_of?(Node::Element) && context.tag == Tags::Form
23
+ @form = context
24
+ break
25
+ end
26
+ context = context.parent
27
+ end
28
+ end
29
+
30
+ # Parses a fragment of the current input and builds HTML tree from it.
31
+ # @raise [Gammo::ParseError] Raised if the parser gets error while parsing.
32
+ # @return [Array<Gammo::Node>]
33
+ def parse
34
+ super
35
+ parent = context ? @root : document
36
+ child = parent.first_child
37
+ collection = []
38
+ while child
39
+ node = child.next_sibling
40
+ parent.remove_child(child)
41
+ collection << child
42
+ child = node
43
+ end
44
+ collection
45
+ end
46
+
47
+ # Always returns true.
48
+ # @return [TrueClass]
49
+ # @!visibility private
50
+ def fragment?
51
+ true
52
+ end
53
+
54
+ # Validates given context. Raises {Gammo::ParseError} if context is not
55
+ # {Gammo::Node}.
56
+ # @param [Gammo::Node] context
57
+ # @raise [Gammo::ParseError]
58
+ def validate_context(context)
59
+ fail ParseError, 'given non-element node in "context"' unless context.instance_of?(Node::Element)
60
+ unless context.tag == Tags.lookup(context.data)
61
+ fail ParseError, "inconsistent context node, tag = #{context.tag}, data = #{Tags.lookup(context.data)}"
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,157 @@
1
+ module Gammo
2
+ # Class for representing Node.
3
+ # https://html.spec.whatwg.org/multipage/parsing.html#tokenization
4
+ class Node
5
+ # Represents the error token.
6
+ Error = Class.new(Node)
7
+
8
+ # Represents the text token.
9
+ Text = Class.new(Node)
10
+
11
+ # Represents the root document token.
12
+ Document = Class.new(Node)
13
+
14
+ # Represents the element token including start, end and self-closing token.
15
+ Element = Class.new(Node)
16
+
17
+ # Represents the comment token like "<!-- foo -->".
18
+ Comment = Class.new(Node)
19
+
20
+ # Represents the document type token.
21
+ Doctype = Class.new(Node)
22
+
23
+ # Represents the marker defined in 12.2.4.3.
24
+ # https://html.spec.whatwg.org/multipage/parsing.html#tokenization
25
+ ScopeMarker = Class.new(Node)
26
+
27
+ # Default scope marker is inserted when entering applet,
28
+ # object, marquee, template, td, th, and caption elements, and are used
29
+ # to prevent formatting from "leaking" into applet, object, marquee,
30
+ # template, td, th, and caption elements"
31
+ DEFAULT_SCOPE_MARKER = Node::ScopeMarker.new
32
+
33
+ # Raised if uncaught node is given for particular operations.
34
+ # @!visibility private
35
+ UncaughtTypeError = Class.new(ArgumentError)
36
+
37
+ # Raised if anything goes wrong on hierarchy while node operations.
38
+ # @!visibility private
39
+ HierarchyRequestError = Class.new(ArgumentError)
40
+
41
+ # `parent` is the pointer for the parent node.
42
+ attr_accessor :parent
43
+
44
+ # `first_child` and `last_child` are pointers for the first and the last nodes.
45
+ attr_accessor :first_child, :last_child
46
+
47
+ # `previous_sibling` and `next_sibling` are pointers for the previous and next sibling nodes.
48
+ attr_accessor :previous_sibling, :next_sibling
49
+
50
+ # Properties required to represent node.
51
+ attr_accessor :tag, :data, :namespace, :attributes
52
+
53
+ # Constructs a node which represents HTML element node.
54
+ # @param [String] tag
55
+ # @param [String] data
56
+ # @param [String, NilClass] namespace
57
+ # @param [Hash(String => String)] attributes
58
+ # @return [Gammo::Node]
59
+ def initialize(tag: nil, data: nil, namespace: nil, attributes: [])
60
+ @tag = tag
61
+ @data = data
62
+ @namespace = namespace
63
+ @attributes = attributes
64
+ end
65
+
66
+ # Inserts a node before a reference node as a child of a specified parent node.
67
+ # @param [Gammo::Node] node
68
+ # @param [Gammo::Node] ref
69
+ # @raise [HierarchyRequestError] Raised if given node is already attached to the self node.
70
+ # @return [Gammo::Node] A node inserted before the reference node.
71
+ def insert_before(node, ref)
72
+ raise HierarchyRequestError,
73
+ 'insert_before called for an attached child node' if attached?(node)
74
+ if ref
75
+ previous_sibling, next_sibling = ref.previous_sibling, ref
76
+ else
77
+ previous_sibling = last_child
78
+ end
79
+ if previous_sibling
80
+ previous_sibling.next_sibling = node
81
+ else
82
+ @first_child = node
83
+ end
84
+ if next_sibling
85
+ next_sibling.previous_sibling = node
86
+ else
87
+ @last_child = node
88
+ end
89
+ node.parent = self
90
+ node.previous_sibling = previous_sibling
91
+ node.next_sibling = next_sibling
92
+ node
93
+ end
94
+
95
+ # Appends given `child` into self node.
96
+ # @param [Gammo::Node] child
97
+ # @raise [HierarchyRequestError] Raised if given node is already attached to the self node.
98
+ # @return [Gammo::Node] A node appended into the self node.
99
+ def append_child(child)
100
+ raise HierarchyRequestError,
101
+ 'append_child called for an attached child node' if attached?(child)
102
+ if last = last_child
103
+ last.next_sibling = child
104
+ else
105
+ @first_child = child
106
+ end
107
+ @last_child = child
108
+ child.parent = self
109
+ child.previous_sibling = last
110
+ child
111
+ end
112
+
113
+ # Removes given `child` from self node.
114
+ # @param [Gammo::Node] child
115
+ # @raise [UncaughtTypeError] Raised unless given node is not child of the self node.
116
+ # @return [Gammo::Node] A node removed from the self node.
117
+ def remove_child(child)
118
+ raise UncaughtTypeError,
119
+ 'remove_child called for a non-child node' unless child?(child)
120
+ @first_child = child.next_sibling if first_child == child
121
+ child.next_sibling.previous_sibling = child.previous_sibling if child.next_sibling
122
+ @last_child = child.previous_sibling if last_child == child
123
+ child.previous_sibling.next_sibling = child.next_sibling if child.previous_sibling
124
+ child.parent = child.previous_sibling = child.next_sibling = nil
125
+ child
126
+ end
127
+
128
+ # Clones self into a new node.
129
+ # @return [Gammo::Node]
130
+ # @!visibility private
131
+ def clone
132
+ self.class.new(tag: self.tag, data: self.data, attributes: self.attributes.dup)
133
+ end
134
+
135
+ # @!visibility private
136
+ def to_h
137
+ {
138
+ tag: tag,
139
+ data: data,
140
+ attributes: attributes,
141
+ type: self.class
142
+ }
143
+ end
144
+
145
+ private
146
+
147
+ # @!visibility private
148
+ def attached?(node)
149
+ node.parent || node.previous_sibling || node.next_sibling
150
+ end
151
+
152
+ # @!visibility private
153
+ def child?(node)
154
+ node.parent == self
155
+ end
156
+ end
157
+ end