nokogiri 1.11.7-java → 1.12.0.rc1-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +243 -22
- data/LICENSE.md +1 -1
- data/README.md +6 -5
- data/ext/java/nokogiri/{HtmlDocument.java → Html4Document.java} +8 -22
- data/ext/java/nokogiri/{HtmlElementDescription.java → Html4ElementDescription.java} +6 -6
- data/ext/java/nokogiri/{HtmlEntityLookup.java → Html4EntityLookup.java} +5 -5
- data/ext/java/nokogiri/{HtmlSaxParserContext.java → Html4SaxParserContext.java} +13 -13
- data/ext/java/nokogiri/{HtmlSaxPushParser.java → Html4SaxPushParser.java} +14 -14
- data/ext/java/nokogiri/NokogiriService.java +20 -20
- data/ext/java/nokogiri/XmlAttr.java +2 -2
- data/ext/java/nokogiri/XmlDocument.java +14 -14
- data/ext/java/nokogiri/XmlElementContent.java +5 -5
- data/ext/java/nokogiri/XmlNode.java +74 -74
- data/ext/java/nokogiri/XmlSaxPushParser.java +2 -2
- data/ext/java/nokogiri/XmlSyntaxError.java +1 -1
- data/ext/java/nokogiri/XmlXpathContext.java +9 -9
- data/ext/java/nokogiri/XsltStylesheet.java +8 -8
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +4 -4
- data/ext/java/nokogiri/internals/NokogiriHandler.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +57 -57
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +24 -24
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +105 -105
- data/ext/java/nokogiri/internals/c14n/XMLUtils.java +30 -30
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +87 -87
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +95 -95
- data/ext/nokogiri/depend +35 -34
- data/ext/nokogiri/extconf.rb +181 -103
- data/ext/nokogiri/gumbo.c +611 -0
- data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
- data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +20 -18
- data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
- data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +5 -5
- data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
- data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
- data/ext/nokogiri/nokogiri.c +51 -38
- data/ext/nokogiri/xml_document.c +13 -13
- data/ext/nokogiri/xml_element_content.c +2 -0
- data/ext/nokogiri/xml_encoding_handler.c +11 -6
- data/ext/nokogiri/xml_namespace.c +2 -0
- data/ext/nokogiri/xml_node.c +102 -102
- data/ext/nokogiri/xml_node_set.c +20 -20
- data/ext/nokogiri/xml_reader.c +2 -0
- data/ext/nokogiri/xml_sax_parser.c +6 -6
- data/ext/nokogiri/xml_sax_parser_context.c +2 -0
- data/ext/nokogiri/xml_schema.c +2 -0
- data/ext/nokogiri/xml_xpath_context.c +67 -65
- data/ext/nokogiri/xslt_stylesheet.c +2 -1
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri.rb +31 -29
- data/lib/nokogiri/css.rb +14 -14
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/css/parser.y +1 -1
- data/lib/nokogiri/css/syntax_error.rb +1 -1
- data/lib/nokogiri/extension.rb +2 -2
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +31 -27
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/{html → html4}/builder.rb +2 -2
- data/lib/nokogiri/{html → html4}/document.rb +4 -4
- data/lib/nokogiri/{html → html4}/document_fragment.rb +3 -3
- data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +11 -2
- data/lib/nokogiri/xml.rb +35 -36
- data/lib/nokogiri/xml/node.rb +6 -5
- data/lib/nokogiri/xml/parse_options.rb +2 -0
- data/lib/nokogiri/xml/pp.rb +2 -2
- data/lib/nokogiri/xml/sax.rb +4 -4
- data/lib/nokogiri/xml/sax/document.rb +24 -30
- data/lib/nokogiri/xml/xpath.rb +2 -2
- data/lib/nokogiri/xslt.rb +16 -16
- data/lib/nokogiri/xslt/stylesheet.rb +1 -1
- metadata +42 -42
- data/lib/nokogiri/html/sax/parser_context.rb +0 -17
@@ -64,7 +64,6 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
|
64
64
|
VALUE errstr, exception;
|
65
65
|
xsltStylesheetPtr ss ;
|
66
66
|
Data_Get_Struct(xmldocobj, xmlDoc, xml);
|
67
|
-
exsltRegisterAll();
|
68
67
|
|
69
68
|
errstr = rb_str_new(0, 0);
|
70
69
|
xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
|
@@ -257,6 +256,8 @@ noko_init_xslt_stylesheet()
|
|
257
256
|
|
258
257
|
cNokogiriXsltStylesheet = rb_define_class_under(mNokogiriXslt, "Stylesheet", rb_cObject);
|
259
258
|
|
259
|
+
rb_undef_alloc_func(cNokogiriXsltStylesheet);
|
260
|
+
|
260
261
|
rb_define_singleton_method(cNokogiriXsltStylesheet, "parse_stylesheet_doc", parse_stylesheet_doc, 1);
|
261
262
|
rb_define_method(cNokogiriXsltStylesheet, "serialize", serialize, 1);
|
262
263
|
rb_define_method(cNokogiriXsltStylesheet, "transform", transform, -1);
|
@@ -0,0 +1,63 @@
|
|
1
|
+
## Gumbo 0.10.1 (2015-04-30)
|
2
|
+
|
3
|
+
Same as 0.10.0, but with the version number bumped because the last version-number commit to v0.9.4 makes GitHub think that v0.9.4 is the latest version and so it's not highlighted on the webpage.
|
4
|
+
|
5
|
+
## Gumbo 0.10.0 (2015-04-30)
|
6
|
+
|
7
|
+
* Full support for `<template>` tag (kevinhendricks, nostrademons).
|
8
|
+
* Some fixes for `<rtc>`/`<rt>` handling (kevinhendricks, vmg).
|
9
|
+
* All html5lib-trunk tests pass now! (kevinhendricks, vmg, nostrademons)
|
10
|
+
* Support for fragment parsing (vmg)
|
11
|
+
* A couple additional example programs (kevinhendricks)
|
12
|
+
* Performance improvements totaling an estimated 30-40% total improvement (vmg, nostrademons).
|
13
|
+
|
14
|
+
## Gumbo 0.9.4 (2015-04-30)
|
15
|
+
|
16
|
+
* Additional Visual Studio fixes (lowjoel, nostrademons)
|
17
|
+
* Fixed some unused variable warnings.
|
18
|
+
* Fix for glibtoolize vs. libtoolize build errors on Mac.
|
19
|
+
* Fixed `CDATA` end tag handling.
|
20
|
+
|
21
|
+
## Gumbo 0.9.3 (2015-02-17)
|
22
|
+
|
23
|
+
* Bugfix for `Æ` entities (rgrove)
|
24
|
+
* Fix `CDATA` handling; `CDATA` sections now generate a `GUMBO_NODE_CDATA` node rather
|
25
|
+
than plain text.
|
26
|
+
* Fix `get_title example` to handle whitespace nodes (gsnedders)
|
27
|
+
* Visual Studio compilation fixes (fishioon)
|
28
|
+
* Take the namespace into account when determining whether a node matches a
|
29
|
+
certain tag (aroben)
|
30
|
+
* Replace the varargs tag functions with a tagset bytevector, for a 20-30%
|
31
|
+
speedup in overall parse time (kevinhendricks, vmg)
|
32
|
+
* Add MacOS X support to Travis CI, and fix the deployment/DLL issues this
|
33
|
+
uncovered (nostrademons, kevinhendricks, vmg)
|
34
|
+
|
35
|
+
## Gumbo 0.9.2 (2014-09-21)
|
36
|
+
|
37
|
+
* Performance improvements: Ragel-based char ref decoder and DFA-based UTF8
|
38
|
+
decoder, totaling speedups of up to 300%.
|
39
|
+
* Added benchmarking program and some sample data.
|
40
|
+
* Fixed a compiler error under Visual Studio.
|
41
|
+
* Fix an error in the ctypes bindings that could lead to memory corruption in
|
42
|
+
the Python bindings.
|
43
|
+
* Fix duplicate attributes when parsing `<isindex>` tags.
|
44
|
+
* Don't leave semicolons behind when consuming entity references (rgrove)
|
45
|
+
* Internally rename some functions in preparation for an amalgamation file
|
46
|
+
(jdeng)
|
47
|
+
* Add proper cflags for gyp builds (skabbes)
|
48
|
+
|
49
|
+
## Gumbo 0.9.1 (2014-08-07)
|
50
|
+
|
51
|
+
* First version listed on PyPi.
|
52
|
+
* Autotools files excluded from GitHub and generated via autogen.sh. (endgame)
|
53
|
+
* Numerous compiler warnings fixed. (bnoordhuis, craigbarnes)
|
54
|
+
* Google security audit passed.
|
55
|
+
* Gyp support (tfarina)
|
56
|
+
* Naming convention for structs changed to avoid C reserved words.
|
57
|
+
* Fix several integer and buffer overflows (Maxime2)
|
58
|
+
* Some Visual Studio compiler support (bugparty)
|
59
|
+
* Python3 compatibility for the ctypes bindings.
|
60
|
+
|
61
|
+
## Gumbo 0.9.0 (2013-08-13)
|
62
|
+
|
63
|
+
* Initial release open-sourced by Google.
|
@@ -0,0 +1,101 @@
|
|
1
|
+
.PHONY: all clean check coverage
|
2
|
+
|
3
|
+
gumbo_objs := $(patsubst %.c,build/%.o,$(wildcard src/*.c))
|
4
|
+
test_objs := $(patsubst %.cc,build/%.o,$(wildcard test/*.cc))
|
5
|
+
gtest_lib := googletest/make/gtest_main.a
|
6
|
+
|
7
|
+
# make SANITIZEFLAGS='-fsanitize=undefined -fsanitize=address'
|
8
|
+
SANITIZEFLAGS :=
|
9
|
+
CPPFLAGS := -Isrc
|
10
|
+
CFLAGS := -std=c99 -Os -Wall
|
11
|
+
CXXFLAGS := -isystem googletest/include -std=c++11 -Os -Wall
|
12
|
+
LDFLAGS := -pthread
|
13
|
+
|
14
|
+
all: check
|
15
|
+
|
16
|
+
src/%.c: src/%.rl
|
17
|
+
ragel -F1 -o $@ $<
|
18
|
+
|
19
|
+
build/src:
|
20
|
+
mkdir -p $@
|
21
|
+
|
22
|
+
build/test:
|
23
|
+
mkdir -p $@
|
24
|
+
|
25
|
+
build/src/%.o: src/%.c build/src/flags | build/src
|
26
|
+
$(CC) -MMD $(CPPFLAGS) $(CFLAGS) $(SANITIZEFLAGS) -c -o $@ $<
|
27
|
+
|
28
|
+
build/test/%.o: test/%.cc build/test/flags | build/test
|
29
|
+
$(CXX) -MMD $(CPPFLAGS) $(CXXFLAGS) $(SANITIZEFLAGS) -c -o $@ $<
|
30
|
+
|
31
|
+
build/run_tests: $(gumbo_objs) $(test_objs) $(gtest_lib)
|
32
|
+
$(CXX) -o $@ $+ $(LDFLAGS) $(SANITIZEFLAGS)
|
33
|
+
|
34
|
+
check: build/run_tests
|
35
|
+
./build/run_tests
|
36
|
+
|
37
|
+
coverage:
|
38
|
+
$(RM) build/{src,test}/*.gcda
|
39
|
+
$(RM) build/*.info
|
40
|
+
$(MAKE) CPPFLAGS='-Isrc -DNDEBUG=1' \
|
41
|
+
CFLAGS='-std=c99 --coverage -g -O0' \
|
42
|
+
CXXFLAGS='-isystem googletest/include -std=c++11 --coverage -g -O0' \
|
43
|
+
LDFLAGS='--coverage' \
|
44
|
+
build/run_tests
|
45
|
+
lcov --no-external \
|
46
|
+
--initial \
|
47
|
+
--capture \
|
48
|
+
--base-directory . \
|
49
|
+
--directory build \
|
50
|
+
--output-file build/coverage-pre.info
|
51
|
+
awk -F '[:,]' \
|
52
|
+
'/^SF:/ { delete defs } /^FN:/ { defs[$$2]=1 } /^DA:/ { if ($$3 == 0 && $$2 in defs) next } { print }' \
|
53
|
+
build/coverage-pre.info > build/coverage-initial.info
|
54
|
+
./build/run_tests
|
55
|
+
lcov --no-external \
|
56
|
+
--capture \
|
57
|
+
--base-directory . \
|
58
|
+
--directory build \
|
59
|
+
--rc lcov_branch_coverage=1 \
|
60
|
+
--output-file build/coverage-test.info
|
61
|
+
lcov --add-tracefile build/coverage-initial.info \
|
62
|
+
--add-tracefile build/coverage-test.info \
|
63
|
+
--rc lcov_branch_coverage=1 \
|
64
|
+
--output-file build/coverage.info
|
65
|
+
lcov --remove build/coverage.info '$(CURDIR)/googletest/*' \
|
66
|
+
--rc lcov_branch_coverage=1 \
|
67
|
+
--output-file build/coverage.info
|
68
|
+
genhtml --branch-coverage \
|
69
|
+
--output-directory build/coverage \
|
70
|
+
build/coverage.info
|
71
|
+
|
72
|
+
clean:
|
73
|
+
$(RM) -r build
|
74
|
+
|
75
|
+
build/src/flags: | build/src
|
76
|
+
@echo 'old_CC := $(CC)' > $@
|
77
|
+
@echo 'old_CPPFLAGS := $(CPPFLAGS)' >> $@
|
78
|
+
@echo 'old_CFLAGS := $(CFLAGS)' >>$@
|
79
|
+
@echo 'old_SANITIZEFLAGS := $(SANITIZEFLAGS)' >> $@
|
80
|
+
@echo 'old_LDFLAGS := $(LDFLAGS)' >> $@
|
81
|
+
|
82
|
+
build/test/flags: | build/test
|
83
|
+
@echo 'old_CXX := $(CXX)' > $@
|
84
|
+
@echo 'old_CPPFLAGS := $(CPPFLAGS)' >> $@
|
85
|
+
@echo 'old_CXXFLAGS := $(CXXFLAGS)' >> $@
|
86
|
+
@echo 'old_SANITIZEFLAGS := $(SANITIZEFLAGS)' >> $@
|
87
|
+
@echo 'old_LDFLAGS := $(LDFLAGS)' >> $@
|
88
|
+
|
89
|
+
ifeq (,$(filter clean coverage,$(MAKECMDGOALS)))
|
90
|
+
# Ensure that the flags are up to date.
|
91
|
+
-include build/src/flags build/test/flags
|
92
|
+
ifneq ($(old_CC) | $(old_CPPFLAGS) | $(old_CFLAGS) | $(old_SANITIZEFLAGS) | $(old_LDFLAGS),$(CC) | $(CPPFLAGS) | $(CFLAGS) | $(SANITIZEFLAGS) | $(LDFLAGS))
|
93
|
+
.PHONY: build/src/flags
|
94
|
+
endif
|
95
|
+
ifneq ($(old_CXX) | $(old_CPPFLAGS) | $(old_CXXFLAGS) | $(old_SANITIZEFLAGS) | $(old_LDFLAGS),$(CXX) | $(CPPFLAGS) | $(CXXFLAGS) | $(SANITIZEFLAGS) | $(LDFLAGS))
|
96
|
+
.PHONY: build/test/flags
|
97
|
+
endif
|
98
|
+
|
99
|
+
# Include dependencies.
|
100
|
+
-include $(test_objs:.o=.d) $(gumbo_objs:.o=.d)
|
101
|
+
endif
|
data/gumbo-parser/THANKS
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Gumbo HTML parser THANKS file
|
2
|
+
|
3
|
+
Gumbo was originally written by Jonathan Tang, but many people helped out through suggestions, question-answering, code reviews, bugfixes, and organizational support. Here is a list of these people. Help me keep it complete and exempt of errors.
|
4
|
+
|
5
|
+
Adam Barth
|
6
|
+
Adam Roben
|
7
|
+
Ben Noordhuis
|
8
|
+
Bowen Han
|
9
|
+
Constantinos Michael
|
10
|
+
Craig Barnes
|
11
|
+
Geoffrey Sneddon
|
12
|
+
Ian Hickson
|
13
|
+
Jack Deng
|
14
|
+
Joel Low
|
15
|
+
Jonathan Shneier
|
16
|
+
Kevin Hendricks
|
17
|
+
Mason Tang
|
18
|
+
Maxim Zakharov
|
19
|
+
Michal Zalewski
|
20
|
+
Neal Norwitz
|
21
|
+
Othar Hansson
|
22
|
+
Ryan Grove
|
23
|
+
Stefan Haustein
|
24
|
+
Steffen Meschkat
|
25
|
+
Steven Kabbes
|
26
|
+
Thiago Farina
|
27
|
+
Vicent Marti
|
data/lib/nokogiri.rb
CHANGED
@@ -2,38 +2,29 @@
|
|
2
2
|
# frozen_string_literal: true
|
3
3
|
# Modify the PATH on windows so that the external DLLs will get loaded.
|
4
4
|
|
5
|
-
require
|
5
|
+
require "rbconfig"
|
6
6
|
|
7
7
|
if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
|
8
|
-
|
8
|
+
require_relative "nokogiri/jruby/dependencies"
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
require 'nokogiri/version'
|
14
|
-
require 'nokogiri/syntax_error'
|
15
|
-
require 'nokogiri/xml'
|
16
|
-
require 'nokogiri/xslt'
|
17
|
-
require 'nokogiri/html'
|
18
|
-
require 'nokogiri/decorators/slop'
|
19
|
-
require 'nokogiri/css'
|
20
|
-
require 'nokogiri/html/builder'
|
11
|
+
require_relative "nokogiri/extension"
|
21
12
|
|
22
13
|
# Nokogiri parses and searches XML/HTML very quickly, and also has
|
23
14
|
# correctly implemented CSS3 selector support as well as XPath 1.0
|
24
15
|
# support.
|
25
16
|
#
|
26
17
|
# Parsing a document returns either a Nokogiri::XML::Document, or a
|
27
|
-
# Nokogiri::
|
18
|
+
# Nokogiri::HTML4::Document depending on the kind of document you parse.
|
28
19
|
#
|
29
20
|
# Here is an example:
|
30
21
|
#
|
31
22
|
# require 'nokogiri'
|
32
23
|
# require 'open-uri'
|
33
24
|
#
|
34
|
-
# # Get a Nokogiri::
|
25
|
+
# # Get a Nokogiri::HTML4::Document for the page we’re interested in...
|
35
26
|
#
|
36
|
-
# doc = Nokogiri::
|
27
|
+
# doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
|
37
28
|
#
|
38
29
|
# # Do funky things with it using Nokogiri::XML::Node methods...
|
39
30
|
#
|
@@ -49,27 +40,27 @@ module Nokogiri
|
|
49
40
|
class << self
|
50
41
|
###
|
51
42
|
# Parse an HTML or XML document. +string+ contains the document.
|
52
|
-
def parse
|
43
|
+
def parse(string, url = nil, encoding = nil, options = nil)
|
53
44
|
if string.respond_to?(:read) ||
|
54
45
|
/^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
|
55
46
|
# Expect an HTML indicator to appear within the first 512
|
56
47
|
# characters of a document. (<?xml ?> + <?xml-stylesheet ?>
|
57
48
|
# shouldn't be that long)
|
58
|
-
Nokogiri.
|
49
|
+
Nokogiri.HTML4(string, url, encoding,
|
59
50
|
options || XML::ParseOptions::DEFAULT_HTML)
|
60
51
|
else
|
61
52
|
Nokogiri.XML(string, url, encoding,
|
62
53
|
options || XML::ParseOptions::DEFAULT_XML)
|
63
|
-
end.tap
|
54
|
+
end.tap do |doc|
|
64
55
|
yield doc if block_given?
|
65
|
-
|
56
|
+
end
|
66
57
|
end
|
67
58
|
|
68
59
|
###
|
69
60
|
# Create a new Nokogiri::XML::DocumentFragment
|
70
|
-
def make
|
61
|
+
def make(input = nil, opts = {}, &blk)
|
71
62
|
if input
|
72
|
-
Nokogiri::
|
63
|
+
Nokogiri::HTML4.fragment(input).children.first
|
73
64
|
else
|
74
65
|
Nokogiri(&blk)
|
75
66
|
end
|
@@ -98,10 +89,10 @@ module Nokogiri
|
|
98
89
|
# Make sure to support some popular encoding aliases not known by
|
99
90
|
# all iconv implementations.
|
100
91
|
{
|
101
|
-
|
102
|
-
}.each
|
92
|
+
"Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
|
93
|
+
}.each do |alias_name, name|
|
103
94
|
EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
|
104
|
-
|
95
|
+
end
|
105
96
|
end
|
106
97
|
end
|
107
98
|
|
@@ -109,15 +100,26 @@ module Nokogiri
|
|
109
100
|
end
|
110
101
|
|
111
102
|
###
|
112
|
-
# Parse a document contained in +args+. Nokogiri will try to guess what
|
113
|
-
#
|
114
|
-
# Nokogiri.parse
|
103
|
+
# Parse a document contained in +args+. Nokogiri will try to guess what type of document you are
|
104
|
+
# attempting to parse. For more information, see Nokogiri.parse
|
115
105
|
#
|
116
|
-
# To specify the type of document, use Nokogiri.XML or Nokogiri.
|
106
|
+
# To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
|
117
107
|
def Nokogiri(*args, &block)
|
118
108
|
if block_given?
|
119
|
-
Nokogiri::
|
109
|
+
Nokogiri::HTML4::Builder.new(&block).doc.root
|
120
110
|
else
|
121
111
|
Nokogiri.parse(*args)
|
122
112
|
end
|
123
113
|
end
|
114
|
+
|
115
|
+
require_relative "nokogiri/version"
|
116
|
+
require_relative "nokogiri/syntax_error"
|
117
|
+
require_relative "nokogiri/xml"
|
118
|
+
require_relative "nokogiri/xslt"
|
119
|
+
require_relative "nokogiri/html4"
|
120
|
+
require_relative "nokogiri/html"
|
121
|
+
require_relative "nokogiri/decorators/slop"
|
122
|
+
require_relative "nokogiri/css"
|
123
|
+
require_relative "nokogiri/html4/builder"
|
124
|
+
|
125
|
+
require_relative "nokogiri/html5" if Nokogiri.uses_gumbo?
|
data/lib/nokogiri/css.rb
CHANGED
@@ -1,28 +1,28 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'nokogiri/css/node'
|
3
|
-
require 'nokogiri/css/xpath_visitor'
|
4
|
-
x = $-w
|
5
|
-
$-w = false
|
6
|
-
require 'nokogiri/css/parser'
|
7
|
-
$-w = x
|
8
|
-
|
9
|
-
require 'nokogiri/css/tokenizer'
|
10
|
-
require 'nokogiri/css/syntax_error'
|
11
|
-
|
12
2
|
module Nokogiri
|
13
3
|
module CSS
|
14
4
|
class << self
|
15
5
|
###
|
16
6
|
# Parse this CSS selector in +selector+. Returns an AST.
|
17
|
-
def parse
|
18
|
-
Parser.new.parse
|
7
|
+
def parse(selector)
|
8
|
+
Parser.new.parse(selector)
|
19
9
|
end
|
20
10
|
|
21
11
|
###
|
22
12
|
# Get the XPath for +selector+.
|
23
|
-
def xpath_for
|
24
|
-
Parser.new(options[:ns] || {}).xpath_for
|
13
|
+
def xpath_for(selector, options = {})
|
14
|
+
Parser.new(options[:ns] || {}).xpath_for(selector, options)
|
25
15
|
end
|
26
16
|
end
|
27
17
|
end
|
28
18
|
end
|
19
|
+
|
20
|
+
require_relative "css/node"
|
21
|
+
require_relative "css/xpath_visitor"
|
22
|
+
x = $-w
|
23
|
+
$-w = false
|
24
|
+
require_relative "css/parser"
|
25
|
+
$-w = x
|
26
|
+
|
27
|
+
require_relative "css/tokenizer"
|
28
|
+
require_relative "css/syntax_error"
|
data/lib/nokogiri/css/parser.rb
CHANGED
data/lib/nokogiri/css/parser.y
CHANGED
data/lib/nokogiri/extension.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
# load the C or Java extension
|
4
4
|
begin
|
5
5
|
::RUBY_VERSION =~ /(\d+\.\d+)/
|
6
|
-
|
6
|
+
require_relative "#{Regexp.last_match(1)}/nokogiri"
|
7
7
|
rescue LoadError => e
|
8
8
|
if e.message =~ /GLIBC/
|
9
9
|
warn(<<~EOM)
|
@@ -22,5 +22,5 @@ rescue LoadError => e
|
|
22
22
|
EOM
|
23
23
|
raise e
|
24
24
|
end
|
25
|
-
|
25
|
+
require_relative "nokogiri"
|
26
26
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
module Gumbo
|
4
|
+
# The default maximum number of attributes per element.
|
5
|
+
DEFAULT_MAX_ATTRIBUTES = 400
|
6
|
+
|
7
|
+
# The default maximum number of errors for parsing a document or a fragment.
|
8
|
+
DEFAULT_MAX_ERRORS = 0
|
9
|
+
|
10
|
+
# The default maximum depth of the DOM tree produced by parsing a document
|
11
|
+
# or fragment.
|
12
|
+
DEFAULT_MAX_TREE_DEPTH = 400
|
13
|
+
end
|
14
|
+
end
|
data/lib/nokogiri/html.rb
CHANGED
@@ -1,38 +1,42 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'nokogiri/html/document'
|
4
|
-
require 'nokogiri/html/document_fragment'
|
5
|
-
require 'nokogiri/html/sax/parser_context'
|
6
|
-
require 'nokogiri/html/sax/parser'
|
7
|
-
require 'nokogiri/html/sax/push_parser'
|
8
|
-
require 'nokogiri/html/element_description'
|
9
|
-
require 'nokogiri/html/element_description_defaults'
|
2
|
+
require_relative "html4"
|
10
3
|
|
11
4
|
module Nokogiri
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
end
|
5
|
+
HTML = Nokogiri::HTML4
|
6
|
+
|
7
|
+
# @!method HTML(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
|
8
|
+
# Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
|
9
|
+
# @!scope class
|
10
|
+
define_singleton_method(:HTML, Nokogiri.method(:HTML4))
|
19
11
|
|
12
|
+
# @note This module/namespace is an alias for {Nokogiri::HTML4} as of v1.12.0. Before v1.12.0,
|
13
|
+
# {Nokogiri::HTML4} did not exist, and this was the module/namespace for all HTML-related
|
14
|
+
# classes.
|
20
15
|
module HTML
|
21
|
-
class
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
16
|
+
# @note This class is an alias for {Nokogiri::HTML4::Document} as of v1.12.0.
|
17
|
+
class Document < Nokogiri::XML::Document
|
18
|
+
end
|
19
|
+
|
20
|
+
# @note This class is an alias for {Nokogiri::HTML4::DocumentFragment} as of v1.12.0.
|
21
|
+
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
22
|
+
end
|
23
|
+
|
24
|
+
# @note This class is an alias for {Nokogiri::HTML4::Builder} as of v1.12.0.
|
25
|
+
class Builder < Nokogiri::XML::Builder
|
26
|
+
end
|
27
|
+
|
28
|
+
module SAX
|
29
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::Parser} as of v1.12.0.
|
30
|
+
class Parser < Nokogiri::XML::SAX::Parser
|
26
31
|
end
|
27
32
|
|
28
|
-
|
29
|
-
|
30
|
-
def fragment string, encoding = nil
|
31
|
-
HTML::DocumentFragment.parse string, encoding
|
33
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::ParserContext} as of v1.12.0.
|
34
|
+
class ParserContext < Nokogiri::XML::SAX::ParserContext
|
32
35
|
end
|
33
|
-
end
|
34
36
|
|
35
|
-
|
36
|
-
|
37
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::PushParser} as of v1.12.0.
|
38
|
+
class PushParser
|
39
|
+
end
|
40
|
+
end
|
37
41
|
end
|
38
42
|
end
|