nokogiri 1.11.7 → 1.12.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +243 -22
- data/LICENSE.md +1 -1
- data/README.md +6 -5
- data/ext/nokogiri/depend +35 -34
- data/ext/nokogiri/extconf.rb +181 -103
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
- data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +20 -18
- data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
- data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +5 -5
- data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
- data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
- data/ext/nokogiri/nokogiri.c +51 -38
- data/ext/nokogiri/nokogiri.h +16 -9
- data/ext/nokogiri/xml_document.c +13 -13
- data/ext/nokogiri/xml_element_content.c +2 -0
- data/ext/nokogiri/xml_encoding_handler.c +11 -6
- data/ext/nokogiri/xml_namespace.c +2 -0
- data/ext/nokogiri/xml_node.c +102 -102
- data/ext/nokogiri/xml_node_set.c +20 -20
- data/ext/nokogiri/xml_reader.c +2 -0
- data/ext/nokogiri/xml_sax_parser.c +6 -6
- data/ext/nokogiri/xml_sax_parser_context.c +2 -0
- data/ext/nokogiri/xml_schema.c +2 -0
- data/ext/nokogiri/xml_xpath_context.c +67 -65
- data/ext/nokogiri/xslt_stylesheet.c +2 -1
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4886 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri.rb +31 -29
- data/lib/nokogiri/css.rb +14 -14
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/css/parser.y +1 -1
- data/lib/nokogiri/css/syntax_error.rb +1 -1
- data/lib/nokogiri/extension.rb +7 -2
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +31 -27
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/{html → html4}/builder.rb +2 -2
- data/lib/nokogiri/{html → html4}/document.rb +4 -4
- data/lib/nokogiri/{html → html4}/document_fragment.rb +3 -3
- data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +11 -2
- data/lib/nokogiri/xml.rb +35 -36
- data/lib/nokogiri/xml/node.rb +6 -5
- data/lib/nokogiri/xml/parse_options.rb +2 -0
- data/lib/nokogiri/xml/pp.rb +2 -2
- data/lib/nokogiri/xml/sax.rb +4 -4
- data/lib/nokogiri/xml/sax/document.rb +24 -30
- data/lib/nokogiri/xml/xpath.rb +2 -2
- data/lib/nokogiri/xslt.rb +16 -16
- data/lib/nokogiri/xslt/stylesheet.rb +1 -1
- metadata +100 -58
- data/lib/nokogiri/html/sax/parser_context.rb +0 -17
@@ -0,0 +1,30 @@
|
|
1
|
+
#ifndef GUMBO_UTIL_H_
|
2
|
+
#define GUMBO_UTIL_H_
|
3
|
+
|
4
|
+
#include <stdbool.h>
|
5
|
+
#include <stddef.h>
|
6
|
+
#include "macros.h"
|
7
|
+
|
8
|
+
#ifdef __cplusplus
|
9
|
+
extern "C" {
|
10
|
+
#endif
|
11
|
+
|
12
|
+
// Utility function for allocating & copying a null-terminated string into a
|
13
|
+
// freshly-allocated buffer. This is necessary for proper memory management; we
|
14
|
+
// have the convention that all const char* in parse tree structures are
|
15
|
+
// freshly-allocated, so if we didn't copy, we'd try to delete a literal string
|
16
|
+
// when the parse tree is destroyed.
|
17
|
+
char* gumbo_strdup(const char* str) XMALLOC NONNULL_ARGS;
|
18
|
+
|
19
|
+
void* gumbo_alloc(size_t size) XMALLOC;
|
20
|
+
void* gumbo_realloc(void* ptr, size_t size) RETURNS_NONNULL;
|
21
|
+
void gumbo_free(void* ptr);
|
22
|
+
|
23
|
+
// Debug wrapper for printf
|
24
|
+
void gumbo_debug(const char* format, ...) PRINTF(1);
|
25
|
+
|
26
|
+
#ifdef __cplusplus
|
27
|
+
}
|
28
|
+
#endif
|
29
|
+
|
30
|
+
#endif // GUMBO_UTIL_H_
|
@@ -0,0 +1,111 @@
|
|
1
|
+
/*
|
2
|
+
Copyright 2018 Craig Barnes.
|
3
|
+
Copyright 2010 Google Inc.
|
4
|
+
|
5
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
+
you may not use this file except in compliance with the License.
|
7
|
+
You may obtain a copy of the License at
|
8
|
+
|
9
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
|
11
|
+
Unless required by applicable law or agreed to in writing, software
|
12
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
See the License for the specific language governing permissions and
|
15
|
+
limitations under the License.
|
16
|
+
*/
|
17
|
+
|
18
|
+
#include <assert.h>
|
19
|
+
#include <stdlib.h>
|
20
|
+
#include <string.h>
|
21
|
+
#include "vector.h"
|
22
|
+
#include "util.h"
|
23
|
+
|
24
|
+
void gumbo_vector_init(unsigned int initial_capacity, GumboVector* vector) {
|
25
|
+
vector->length = 0;
|
26
|
+
vector->capacity = initial_capacity;
|
27
|
+
if (initial_capacity > 0) {
|
28
|
+
vector->data = gumbo_alloc(sizeof(void*) * initial_capacity);
|
29
|
+
} else {
|
30
|
+
vector->data = NULL;
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
void gumbo_vector_destroy(GumboVector* vector) {
|
35
|
+
if (vector->capacity > 0) {
|
36
|
+
gumbo_free(vector->data);
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
static void enlarge_vector_if_full(GumboVector* vector) {
|
41
|
+
if (vector->length >= vector->capacity) {
|
42
|
+
if (vector->capacity) {
|
43
|
+
vector->capacity *= 2;
|
44
|
+
size_t num_bytes = sizeof(void*) * vector->capacity;
|
45
|
+
vector->data = gumbo_realloc(vector->data, num_bytes);
|
46
|
+
} else {
|
47
|
+
// 0-capacity vector; no previous array to deallocate.
|
48
|
+
vector->capacity = 2;
|
49
|
+
vector->data = gumbo_alloc(sizeof(void*) * vector->capacity);
|
50
|
+
}
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
void gumbo_vector_add(void* element, GumboVector* vector) {
|
55
|
+
enlarge_vector_if_full(vector);
|
56
|
+
assert(vector->data);
|
57
|
+
assert(vector->length < vector->capacity);
|
58
|
+
vector->data[vector->length++] = element;
|
59
|
+
}
|
60
|
+
|
61
|
+
void* gumbo_vector_pop(GumboVector* vector) {
|
62
|
+
if (vector->length == 0) {
|
63
|
+
return NULL;
|
64
|
+
}
|
65
|
+
return vector->data[--vector->length];
|
66
|
+
}
|
67
|
+
|
68
|
+
int gumbo_vector_index_of(GumboVector* vector, const void* element) {
|
69
|
+
for (unsigned int i = 0; i < vector->length; ++i) {
|
70
|
+
if (vector->data[i] == element) {
|
71
|
+
return i;
|
72
|
+
}
|
73
|
+
}
|
74
|
+
return -1;
|
75
|
+
}
|
76
|
+
|
77
|
+
void gumbo_vector_insert_at (
|
78
|
+
void* element,
|
79
|
+
unsigned int index,
|
80
|
+
GumboVector* vector
|
81
|
+
) {
|
82
|
+
assert(index <= vector->length);
|
83
|
+
enlarge_vector_if_full(vector);
|
84
|
+
++vector->length;
|
85
|
+
memmove (
|
86
|
+
&vector->data[index + 1],
|
87
|
+
&vector->data[index],
|
88
|
+
sizeof(void*) * (vector->length - index - 1)
|
89
|
+
);
|
90
|
+
vector->data[index] = element;
|
91
|
+
}
|
92
|
+
|
93
|
+
void gumbo_vector_remove(void* node, GumboVector* vector) {
|
94
|
+
int index = gumbo_vector_index_of(vector, node);
|
95
|
+
if (index == -1) {
|
96
|
+
return;
|
97
|
+
}
|
98
|
+
gumbo_vector_remove_at(index, vector);
|
99
|
+
}
|
100
|
+
|
101
|
+
void* gumbo_vector_remove_at(unsigned int index, GumboVector* vector) {
|
102
|
+
assert(index < vector->length);
|
103
|
+
void* result = vector->data[index];
|
104
|
+
memmove (
|
105
|
+
&vector->data[index],
|
106
|
+
&vector->data[index + 1],
|
107
|
+
sizeof(void*) * (vector->length - index - 1)
|
108
|
+
);
|
109
|
+
--vector->length;
|
110
|
+
return result;
|
111
|
+
}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
#ifndef GUMBO_VECTOR_H_
|
2
|
+
#define GUMBO_VECTOR_H_
|
3
|
+
|
4
|
+
#include "gumbo.h"
|
5
|
+
|
6
|
+
#ifdef __cplusplus
|
7
|
+
extern "C" {
|
8
|
+
#endif
|
9
|
+
|
10
|
+
// Initializes a new GumboVector with the specified initial capacity.
|
11
|
+
void gumbo_vector_init(unsigned int initial_capacity, GumboVector* vector);
|
12
|
+
|
13
|
+
// Frees the memory used by a GumboVector. Does not free the contained
|
14
|
+
// pointers.
|
15
|
+
void gumbo_vector_destroy(GumboVector* vector);
|
16
|
+
|
17
|
+
// Adds a new element to a GumboVector.
|
18
|
+
void gumbo_vector_add(void* element, GumboVector* vector);
|
19
|
+
|
20
|
+
// Removes and returns the element most recently added to the GumboVector.
|
21
|
+
// Ownership is transferred to caller. Capacity is unchanged. If the vector is
|
22
|
+
// empty, NULL is returned.
|
23
|
+
void* gumbo_vector_pop(GumboVector* vector);
|
24
|
+
|
25
|
+
// Inserts an element at a specific index. This is potentially O(N) time, but
|
26
|
+
// is necessary for some of the spec's behavior.
|
27
|
+
void gumbo_vector_insert_at (
|
28
|
+
void* element,
|
29
|
+
unsigned int index,
|
30
|
+
GumboVector* vector
|
31
|
+
);
|
32
|
+
|
33
|
+
// Removes an element from the vector, or does nothing if the element is not in
|
34
|
+
// the vector.
|
35
|
+
void gumbo_vector_remove(void* element, GumboVector* vector);
|
36
|
+
|
37
|
+
// Removes and returns an element at a specific index. Note that this is
|
38
|
+
// potentially O(N) time and should be used sparingly.
|
39
|
+
void* gumbo_vector_remove_at(unsigned int index, GumboVector* vector);
|
40
|
+
|
41
|
+
#ifdef __cplusplus
|
42
|
+
}
|
43
|
+
#endif
|
44
|
+
|
45
|
+
#endif // GUMBO_VECTOR_H_
|
data/lib/nokogiri.rb
CHANGED
@@ -2,38 +2,29 @@
|
|
2
2
|
# frozen_string_literal: true
|
3
3
|
# Modify the PATH on windows so that the external DLLs will get loaded.
|
4
4
|
|
5
|
-
require
|
5
|
+
require "rbconfig"
|
6
6
|
|
7
7
|
if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
|
8
|
-
|
8
|
+
require_relative "nokogiri/jruby/dependencies"
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
require 'nokogiri/version'
|
14
|
-
require 'nokogiri/syntax_error'
|
15
|
-
require 'nokogiri/xml'
|
16
|
-
require 'nokogiri/xslt'
|
17
|
-
require 'nokogiri/html'
|
18
|
-
require 'nokogiri/decorators/slop'
|
19
|
-
require 'nokogiri/css'
|
20
|
-
require 'nokogiri/html/builder'
|
11
|
+
require_relative "nokogiri/extension"
|
21
12
|
|
22
13
|
# Nokogiri parses and searches XML/HTML very quickly, and also has
|
23
14
|
# correctly implemented CSS3 selector support as well as XPath 1.0
|
24
15
|
# support.
|
25
16
|
#
|
26
17
|
# Parsing a document returns either a Nokogiri::XML::Document, or a
|
27
|
-
# Nokogiri::
|
18
|
+
# Nokogiri::HTML4::Document depending on the kind of document you parse.
|
28
19
|
#
|
29
20
|
# Here is an example:
|
30
21
|
#
|
31
22
|
# require 'nokogiri'
|
32
23
|
# require 'open-uri'
|
33
24
|
#
|
34
|
-
# # Get a Nokogiri::
|
25
|
+
# # Get a Nokogiri::HTML4::Document for the page we’re interested in...
|
35
26
|
#
|
36
|
-
# doc = Nokogiri::
|
27
|
+
# doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
|
37
28
|
#
|
38
29
|
# # Do funky things with it using Nokogiri::XML::Node methods...
|
39
30
|
#
|
@@ -49,27 +40,27 @@ module Nokogiri
|
|
49
40
|
class << self
|
50
41
|
###
|
51
42
|
# Parse an HTML or XML document. +string+ contains the document.
|
52
|
-
def parse
|
43
|
+
def parse(string, url = nil, encoding = nil, options = nil)
|
53
44
|
if string.respond_to?(:read) ||
|
54
45
|
/^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
|
55
46
|
# Expect an HTML indicator to appear within the first 512
|
56
47
|
# characters of a document. (<?xml ?> + <?xml-stylesheet ?>
|
57
48
|
# shouldn't be that long)
|
58
|
-
Nokogiri.
|
49
|
+
Nokogiri.HTML4(string, url, encoding,
|
59
50
|
options || XML::ParseOptions::DEFAULT_HTML)
|
60
51
|
else
|
61
52
|
Nokogiri.XML(string, url, encoding,
|
62
53
|
options || XML::ParseOptions::DEFAULT_XML)
|
63
|
-
end.tap
|
54
|
+
end.tap do |doc|
|
64
55
|
yield doc if block_given?
|
65
|
-
|
56
|
+
end
|
66
57
|
end
|
67
58
|
|
68
59
|
###
|
69
60
|
# Create a new Nokogiri::XML::DocumentFragment
|
70
|
-
def make
|
61
|
+
def make(input = nil, opts = {}, &blk)
|
71
62
|
if input
|
72
|
-
Nokogiri::
|
63
|
+
Nokogiri::HTML4.fragment(input).children.first
|
73
64
|
else
|
74
65
|
Nokogiri(&blk)
|
75
66
|
end
|
@@ -98,10 +89,10 @@ module Nokogiri
|
|
98
89
|
# Make sure to support some popular encoding aliases not known by
|
99
90
|
# all iconv implementations.
|
100
91
|
{
|
101
|
-
|
102
|
-
}.each
|
92
|
+
"Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
|
93
|
+
}.each do |alias_name, name|
|
103
94
|
EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
|
104
|
-
|
95
|
+
end
|
105
96
|
end
|
106
97
|
end
|
107
98
|
|
@@ -109,15 +100,26 @@ module Nokogiri
|
|
109
100
|
end
|
110
101
|
|
111
102
|
###
|
112
|
-
# Parse a document contained in +args+. Nokogiri will try to guess what
|
113
|
-
#
|
114
|
-
# Nokogiri.parse
|
103
|
+
# Parse a document contained in +args+. Nokogiri will try to guess what type of document you are
|
104
|
+
# attempting to parse. For more information, see Nokogiri.parse
|
115
105
|
#
|
116
|
-
# To specify the type of document, use Nokogiri.XML or Nokogiri.
|
106
|
+
# To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
|
117
107
|
def Nokogiri(*args, &block)
|
118
108
|
if block_given?
|
119
|
-
Nokogiri::
|
109
|
+
Nokogiri::HTML4::Builder.new(&block).doc.root
|
120
110
|
else
|
121
111
|
Nokogiri.parse(*args)
|
122
112
|
end
|
123
113
|
end
|
114
|
+
|
115
|
+
require_relative "nokogiri/version"
|
116
|
+
require_relative "nokogiri/syntax_error"
|
117
|
+
require_relative "nokogiri/xml"
|
118
|
+
require_relative "nokogiri/xslt"
|
119
|
+
require_relative "nokogiri/html4"
|
120
|
+
require_relative "nokogiri/html"
|
121
|
+
require_relative "nokogiri/decorators/slop"
|
122
|
+
require_relative "nokogiri/css"
|
123
|
+
require_relative "nokogiri/html4/builder"
|
124
|
+
|
125
|
+
require_relative "nokogiri/html5" if Nokogiri.uses_gumbo?
|
data/lib/nokogiri/css.rb
CHANGED
@@ -1,28 +1,28 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'nokogiri/css/node'
|
3
|
-
require 'nokogiri/css/xpath_visitor'
|
4
|
-
x = $-w
|
5
|
-
$-w = false
|
6
|
-
require 'nokogiri/css/parser'
|
7
|
-
$-w = x
|
8
|
-
|
9
|
-
require 'nokogiri/css/tokenizer'
|
10
|
-
require 'nokogiri/css/syntax_error'
|
11
|
-
|
12
2
|
module Nokogiri
|
13
3
|
module CSS
|
14
4
|
class << self
|
15
5
|
###
|
16
6
|
# Parse this CSS selector in +selector+. Returns an AST.
|
17
|
-
def parse
|
18
|
-
Parser.new.parse
|
7
|
+
def parse(selector)
|
8
|
+
Parser.new.parse(selector)
|
19
9
|
end
|
20
10
|
|
21
11
|
###
|
22
12
|
# Get the XPath for +selector+.
|
23
|
-
def xpath_for
|
24
|
-
Parser.new(options[:ns] || {}).xpath_for
|
13
|
+
def xpath_for(selector, options = {})
|
14
|
+
Parser.new(options[:ns] || {}).xpath_for(selector, options)
|
25
15
|
end
|
26
16
|
end
|
27
17
|
end
|
28
18
|
end
|
19
|
+
|
20
|
+
require_relative "css/node"
|
21
|
+
require_relative "css/xpath_visitor"
|
22
|
+
x = $-w
|
23
|
+
$-w = false
|
24
|
+
require_relative "css/parser"
|
25
|
+
$-w = x
|
26
|
+
|
27
|
+
require_relative "css/tokenizer"
|
28
|
+
require_relative "css/syntax_error"
|
data/lib/nokogiri/css/parser.rb
CHANGED
data/lib/nokogiri/css/parser.y
CHANGED
data/lib/nokogiri/extension.rb
CHANGED
@@ -2,8 +2,9 @@
|
|
2
2
|
|
3
3
|
# load the C or Java extension
|
4
4
|
begin
|
5
|
+
# native precompiled gems package shared libraries in <gem_dir>/lib/nokogiri/<ruby_version>
|
5
6
|
::RUBY_VERSION =~ /(\d+\.\d+)/
|
6
|
-
|
7
|
+
require_relative "#{Regexp.last_match(1)}/nokogiri"
|
7
8
|
rescue LoadError => e
|
8
9
|
if e.message =~ /GLIBC/
|
9
10
|
warn(<<~EOM)
|
@@ -22,5 +23,9 @@ rescue LoadError => e
|
|
22
23
|
EOM
|
23
24
|
raise e
|
24
25
|
end
|
25
|
-
|
26
|
+
|
27
|
+
# use "require" instead of "require_relative" because non-native gems will place C extension files
|
28
|
+
# in Gem::BasicSpecification#extension_dir after compilation (during normal installation), which
|
29
|
+
# is in $LOAD_PATH but not necessarily relative to this file (see #2300)
|
30
|
+
require "nokogiri/nokogiri"
|
26
31
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
module Gumbo
|
4
|
+
# The default maximum number of attributes per element.
|
5
|
+
DEFAULT_MAX_ATTRIBUTES = 400
|
6
|
+
|
7
|
+
# The default maximum number of errors for parsing a document or a fragment.
|
8
|
+
DEFAULT_MAX_ERRORS = 0
|
9
|
+
|
10
|
+
# The default maximum depth of the DOM tree produced by parsing a document
|
11
|
+
# or fragment.
|
12
|
+
DEFAULT_MAX_TREE_DEPTH = 400
|
13
|
+
end
|
14
|
+
end
|
data/lib/nokogiri/html.rb
CHANGED
@@ -1,38 +1,42 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'nokogiri/html/document'
|
4
|
-
require 'nokogiri/html/document_fragment'
|
5
|
-
require 'nokogiri/html/sax/parser_context'
|
6
|
-
require 'nokogiri/html/sax/parser'
|
7
|
-
require 'nokogiri/html/sax/push_parser'
|
8
|
-
require 'nokogiri/html/element_description'
|
9
|
-
require 'nokogiri/html/element_description_defaults'
|
2
|
+
require_relative "html4"
|
10
3
|
|
11
4
|
module Nokogiri
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
end
|
5
|
+
HTML = Nokogiri::HTML4
|
6
|
+
|
7
|
+
# @!method HTML(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
|
8
|
+
# Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
|
9
|
+
# @!scope class
|
10
|
+
define_singleton_method(:HTML, Nokogiri.method(:HTML4))
|
19
11
|
|
12
|
+
# @note This module/namespace is an alias for {Nokogiri::HTML4} as of v1.12.0. Before v1.12.0,
|
13
|
+
# {Nokogiri::HTML4} did not exist, and this was the module/namespace for all HTML-related
|
14
|
+
# classes.
|
20
15
|
module HTML
|
21
|
-
class
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
16
|
+
# @note This class is an alias for {Nokogiri::HTML4::Document} as of v1.12.0.
|
17
|
+
class Document < Nokogiri::XML::Document
|
18
|
+
end
|
19
|
+
|
20
|
+
# @note This class is an alias for {Nokogiri::HTML4::DocumentFragment} as of v1.12.0.
|
21
|
+
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
22
|
+
end
|
23
|
+
|
24
|
+
# @note This class is an alias for {Nokogiri::HTML4::Builder} as of v1.12.0.
|
25
|
+
class Builder < Nokogiri::XML::Builder
|
26
|
+
end
|
27
|
+
|
28
|
+
module SAX
|
29
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::Parser} as of v1.12.0.
|
30
|
+
class Parser < Nokogiri::XML::SAX::Parser
|
26
31
|
end
|
27
32
|
|
28
|
-
|
29
|
-
|
30
|
-
def fragment string, encoding = nil
|
31
|
-
HTML::DocumentFragment.parse string, encoding
|
33
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::ParserContext} as of v1.12.0.
|
34
|
+
class ParserContext < Nokogiri::XML::SAX::ParserContext
|
32
35
|
end
|
33
|
-
end
|
34
36
|
|
35
|
-
|
36
|
-
|
37
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::PushParser} as of v1.12.0.
|
38
|
+
class PushParser
|
39
|
+
end
|
40
|
+
end
|
37
41
|
end
|
38
42
|
end
|