nokogiri 1.5.10 → 1.12.5
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1903 -0
- data/LICENSE.md +9 -0
- data/README.md +278 -0
- data/bin/nokogiri +50 -10
- data/dependencies.yml +74 -0
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +944 -100
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +120 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +232 -87
- data/ext/nokogiri/nokogiri.h +188 -129
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +49 -40
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +24 -23
- data/ext/nokogiri/xml_comment.c +29 -21
- data/ext/nokogiri/xml_document.c +305 -201
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +30 -19
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +74 -32
- data/ext/nokogiri/xml_node.c +808 -503
- data/ext/nokogiri/xml_node_set.c +239 -208
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +198 -186
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +123 -125
- data/ext/nokogiri/xml_sax_parser_context.c +138 -79
- data/ext/nokogiri/xml_sax_push_parser.c +88 -35
- data/ext/nokogiri/xml_schema.c +112 -33
- data/ext/nokogiri/xml_syntax_error.c +50 -23
- data/ext/nokogiri/xml_text.c +14 -18
- data/ext/nokogiri/xml_xpath_context.c +162 -98
- data/ext/nokogiri/xslt_stylesheet.c +162 -168
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4886 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/css/node.rb +1 -50
- data/lib/nokogiri/css/parser.rb +317 -286
- data/lib/nokogiri/css/parser.y +57 -43
- data/lib/nokogiri/css/parser_extras.rb +39 -36
- data/lib/nokogiri/css/syntax_error.rb +2 -1
- data/lib/nokogiri/css/tokenizer.rb +105 -103
- data/lib/nokogiri/css/tokenizer.rex +5 -5
- data/lib/nokogiri/css/xpath_visitor.rb +137 -48
- data/lib/nokogiri/css.rb +15 -14
- data/lib/nokogiri/decorators/slop.rb +13 -5
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +32 -27
- data/lib/nokogiri/{html → html4}/builder.rb +3 -2
- data/lib/nokogiri/{html → html4}/document.rb +118 -50
- data/lib/nokogiri/{html → html4}/document_fragment.rb +20 -11
- data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +22 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +215 -0
- data/lib/nokogiri/version.rb +3 -91
- data/lib/nokogiri/xml/attr.rb +1 -0
- data/lib/nokogiri/xml/attribute_decl.rb +1 -0
- data/lib/nokogiri/xml/builder.rb +75 -33
- data/lib/nokogiri/xml/cdata.rb +1 -0
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +157 -54
- data/lib/nokogiri/xml/document_fragment.rb +55 -8
- data/lib/nokogiri/xml/dtd.rb +15 -4
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +1 -0
- data/lib/nokogiri/xml/node/save_options.rb +2 -1
- data/lib/nokogiri/xml/node.rb +712 -431
- data/lib/nokogiri/xml/node_set.rb +140 -123
- data/lib/nokogiri/xml/notation.rb +1 -0
- data/lib/nokogiri/xml/parse_options.rb +31 -0
- data/lib/nokogiri/xml/pp/character_data.rb +1 -0
- data/lib/nokogiri/xml/pp/node.rb +1 -0
- data/lib/nokogiri/xml/pp.rb +3 -2
- data/lib/nokogiri/xml/processing_instruction.rb +1 -0
- data/lib/nokogiri/xml/reader.rb +9 -12
- data/lib/nokogiri/xml/relax_ng.rb +7 -2
- data/lib/nokogiri/xml/sax/document.rb +25 -30
- data/lib/nokogiri/xml/sax/parser.rb +8 -8
- data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
- data/lib/nokogiri/xml/sax.rb +5 -4
- data/lib/nokogiri/xml/schema.rb +13 -4
- data/lib/nokogiri/xml/searchable.rb +239 -0
- data/lib/nokogiri/xml/syntax_error.rb +25 -1
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +4 -5
- data/lib/nokogiri/xml/xpath_context.rb +1 -0
- data/lib/nokogiri/xml.rb +37 -35
- data/lib/nokogiri/xslt/stylesheet.rb +2 -1
- data/lib/nokogiri/xslt.rb +17 -16
- data/lib/nokogiri.rb +55 -58
- data/lib/xsd/xmlparser/nokogiri.rb +1 -0
- data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- metadata +307 -459
- data/.autotest +0 -26
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +0 -785
- data/CHANGELOG.rdoc +0 -783
- data/C_CODING_STYLE.rdoc +0 -33
- data/Manifest.txt +0 -303
- data/README.ja.rdoc +0 -106
- data/README.rdoc +0 -175
- data/ROADMAP.md +0 -90
- data/Rakefile +0 -228
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/build_all +0 -105
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -56
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -13
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -14
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/lib/nokogiri/html/sax/push_parser.rb +0 -16
- data/tasks/cross_compile.rb +0 -150
- data/tasks/nokogiri.org.rb +0 -24
- data/tasks/test.rb +0 -95
- data/test/css/test_nthiness.rb +0 -159
- data/test/css/test_parser.rb +0 -341
- data/test/css/test_tokenizer.rb +0 -198
- data/test/css/test_xpath_visitor.rb +0 -91
- data/test/decorators/test_slop.rb +0 -16
- data/test/files/2ch.html +0 -108
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -850
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -154
- data/test/html/sax/test_parser.rb +0 -141
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -552
- data/test/html/test_document_encoding.rb +0 -138
- data/test/html/test_document_fragment.rb +0 -261
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -196
- data/test/html/test_node_encoding.rb +0 -27
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -45
- data/test/test_encoding_handler.rb +0 -46
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -132
- data/test/test_reader.rb +0 -555
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -254
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -366
- data/test/xml/sax/test_parser_context.rb +0 -106
- data/test/xml/sax/test_push_parser.rb +0 -157
- data/test/xml/test_attr.rb +0 -64
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -306
- data/test/xml/test_c14n.rb +0 -151
- data/test/xml/test_cdata.rb +0 -48
- data/test/xml/test_comment.rb +0 -29
- data/test/xml/test_document.rb +0 -828
- data/test/xml/test_document_encoding.rb +0 -28
- data/test/xml/test_document_fragment.rb +0 -223
- data/test/xml/test_dtd.rb +0 -103
- data/test/xml/test_dtd_encoding.rb +0 -33
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -245
- data/test/xml/test_namespace.rb +0 -95
- data/test/xml/test_node.rb +0 -1137
- data/test/xml/test_node_attributes.rb +0 -96
- data/test/xml/test_node_encoding.rb +0 -107
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -374
- data/test/xml/test_node_set.rb +0 -755
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader_encoding.rb +0 -142
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -103
- data/test/xml/test_syntax_error.rb +0 -12
- data/test/xml/test_text.rb +0 -45
- data/test/xml/test_unparented_node.rb +0 -422
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -295
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
- data/test_all +0 -81
@@ -1,8 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module CSS
|
3
4
|
class XPathVisitor # :nodoc:
|
4
5
|
def visit_function node
|
5
|
-
# note that nth-child and nth-last-child are preprocessed in css/node.rb.
|
6
6
|
msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
|
7
7
|
return self.send(msg, node) if self.respond_to?(msg)
|
8
8
|
|
@@ -12,37 +12,51 @@ module Nokogiri
|
|
12
12
|
when /^self\(/
|
13
13
|
"self::#{node.value[1]}"
|
14
14
|
when /^eq\(/
|
15
|
-
"position()
|
16
|
-
when /^(nth|nth-of-type
|
17
|
-
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :
|
18
|
-
|
15
|
+
"position()=#{node.value[1]}"
|
16
|
+
when /^(nth|nth-of-type)\(/
|
17
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
18
|
+
nth(node.value[1])
|
19
19
|
else
|
20
|
-
"position()
|
20
|
+
"position()=#{node.value[1]}"
|
21
21
|
end
|
22
|
-
when /^
|
23
|
-
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :
|
24
|
-
|
22
|
+
when /^nth-child\(/
|
23
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
24
|
+
nth(node.value[1], :child => true)
|
25
|
+
else
|
26
|
+
"count(preceding-sibling::*)=#{node.value[1].to_i-1}"
|
27
|
+
end
|
28
|
+
when /^nth-last-of-type\(/
|
29
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
30
|
+
nth(node.value[1], :last => true)
|
25
31
|
else
|
26
32
|
index = node.value[1].to_i - 1
|
27
|
-
index == 0 ? "position()
|
33
|
+
index == 0 ? "position()=last()" : "position()=last()-#{index}"
|
34
|
+
end
|
35
|
+
when /^nth-last-child\(/
|
36
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
37
|
+
nth(node.value[1], :last => true, :child => true)
|
38
|
+
else
|
39
|
+
"count(following-sibling::*)=#{node.value[1].to_i-1}"
|
28
40
|
end
|
29
41
|
when /^(first|first-of-type)\(/
|
30
|
-
"position()
|
42
|
+
"position()=1"
|
31
43
|
when /^(last|last-of-type)\(/
|
32
|
-
"position()
|
44
|
+
"position()=last()"
|
33
45
|
when /^contains\(/
|
34
|
-
"contains(
|
46
|
+
"contains(.,#{node.value[1]})"
|
35
47
|
when /^gt\(/
|
36
|
-
"position()
|
48
|
+
"position()>#{node.value[1]}"
|
37
49
|
when /^only-child\(/
|
38
|
-
"last()
|
50
|
+
"last()=1"
|
39
51
|
when /^comment\(/
|
40
52
|
"comment()"
|
41
53
|
when /^has\(/
|
42
|
-
node.value[1].
|
54
|
+
is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
|
55
|
+
".#{"//" if !is_direct}#{node.value[1].accept(self)}"
|
43
56
|
else
|
57
|
+
# non-standard. this looks like a function call.
|
44
58
|
args = ['.'] + node.value[1..-1]
|
45
|
-
"#{node.value.first}#{args.join(',
|
59
|
+
"#{node.value.first}#{args.join(',')})"
|
46
60
|
end
|
47
61
|
end
|
48
62
|
|
@@ -57,18 +71,18 @@ module Nokogiri
|
|
57
71
|
|
58
72
|
def visit_id node
|
59
73
|
node.value.first =~ /^#(.*)$/
|
60
|
-
"@id
|
74
|
+
"@id='#{$1}'"
|
61
75
|
end
|
62
76
|
|
63
77
|
def visit_attribute_condition node
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
78
|
+
attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
|
79
|
+
''
|
80
|
+
else
|
81
|
+
'@'
|
82
|
+
end
|
69
83
|
attribute += node.value.first.accept(self)
|
70
84
|
|
71
|
-
#
|
85
|
+
# non-standard. attributes starting with '@'
|
72
86
|
attribute.gsub!(/^@@/, '@')
|
73
87
|
|
74
88
|
return attribute unless node.value.length == 3
|
@@ -76,22 +90,30 @@ module Nokogiri
|
|
76
90
|
value = node.value.last
|
77
91
|
value = "'#{value}'" if value !~ /^['"]/
|
78
92
|
|
93
|
+
# quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
|
94
|
+
if (value[0]==value[-1]) && %q{"'}.include?(value[0])
|
95
|
+
str_value = value[1..-2]
|
96
|
+
if str_value.include?(value[0])
|
97
|
+
value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
79
101
|
case node.value[1]
|
80
102
|
when :equal
|
81
|
-
attribute + "
|
103
|
+
attribute + "=" + "#{value}"
|
82
104
|
when :not_equal
|
83
|
-
attribute + "
|
105
|
+
attribute + "!=" + "#{value}"
|
84
106
|
when :substring_match
|
85
|
-
"contains(#{attribute}
|
107
|
+
"contains(#{attribute},#{value})"
|
86
108
|
when :prefix_match
|
87
|
-
"starts-with(#{attribute}
|
109
|
+
"starts-with(#{attribute},#{value})"
|
88
110
|
when :dash_match
|
89
|
-
"#{attribute}
|
111
|
+
"#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
|
90
112
|
when :includes
|
91
|
-
|
113
|
+
value = value[1..-2] # strip quotes
|
114
|
+
css_class(attribute, value)
|
92
115
|
when :suffix_match
|
93
|
-
"substring(#{attribute},
|
94
|
-
"string-length(#{value}) + 1, string-length(#{value})) = #{value}"
|
116
|
+
"substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
|
95
117
|
else
|
96
118
|
attribute + " #{node.value[1]} " + "#{value}"
|
97
119
|
end
|
@@ -105,11 +127,14 @@ module Nokogiri
|
|
105
127
|
return self.send(msg, node) if self.respond_to?(msg)
|
106
128
|
|
107
129
|
case node.value.first
|
108
|
-
when "first"
|
109
|
-
when "
|
110
|
-
when "
|
111
|
-
when "last-
|
112
|
-
when "
|
130
|
+
when "first" then "position()=1"
|
131
|
+
when "first-child" then "count(preceding-sibling::*)=0"
|
132
|
+
when "last" then "position()=last()"
|
133
|
+
when "last-child" then "count(following-sibling::*)=0"
|
134
|
+
when "first-of-type" then "position()=1"
|
135
|
+
when "last-of-type" then "position()=last()"
|
136
|
+
when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
|
137
|
+
when "only-of-type" then "last()=1"
|
113
138
|
when "empty" then "not(node())"
|
114
139
|
when "parent" then "node()"
|
115
140
|
when "root" then "not(parent::*)"
|
@@ -120,11 +145,18 @@ module Nokogiri
|
|
120
145
|
end
|
121
146
|
|
122
147
|
def visit_class_condition node
|
123
|
-
"
|
148
|
+
css_class("@class", node.value.first)
|
149
|
+
end
|
150
|
+
|
151
|
+
def visit_combinator node
|
152
|
+
if is_of_type_pseudo_class?(node.value.last)
|
153
|
+
"#{node.value.first.accept(self) if node.value.first}][#{node.value.last.accept(self)}"
|
154
|
+
else
|
155
|
+
"#{node.value.first.accept(self) if node.value.first} and #{node.value.last.accept(self)}"
|
156
|
+
end
|
124
157
|
end
|
125
158
|
|
126
159
|
{
|
127
|
-
'combinator' => ' and ',
|
128
160
|
'direct_adjacent_selector' => "/following-sibling::*[1]/self::",
|
129
161
|
'following_selector' => "/following-sibling::",
|
130
162
|
'descendant_selector' => '//',
|
@@ -150,22 +182,79 @@ module Nokogiri
|
|
150
182
|
node.accept(self)
|
151
183
|
end
|
152
184
|
|
153
|
-
|
154
|
-
|
185
|
+
private
|
186
|
+
|
187
|
+
def nth node, options={}
|
155
188
|
raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
|
156
189
|
|
157
|
-
a = node.value
|
158
|
-
|
159
|
-
|
190
|
+
a, b = read_a_and_positive_b node.value
|
191
|
+
position = if options[:child]
|
192
|
+
options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
|
193
|
+
else
|
194
|
+
options[:last] ? "(last()-position()+1)" : "position()"
|
195
|
+
end
|
160
196
|
|
161
|
-
if
|
162
|
-
|
197
|
+
if b.zero?
|
198
|
+
"(#{position} mod #{a})=0"
|
163
199
|
else
|
164
|
-
compare =
|
165
|
-
|
200
|
+
compare = a < 0 ? "<=" : ">="
|
201
|
+
if a.abs == 1
|
202
|
+
"#{position}#{compare}#{b}"
|
203
|
+
else
|
204
|
+
"(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
|
205
|
+
end
|
166
206
|
end
|
167
207
|
end
|
168
208
|
|
209
|
+
def read_a_and_positive_b values
|
210
|
+
op = values[2]
|
211
|
+
if op == "+"
|
212
|
+
a = values[0].to_i
|
213
|
+
b = values[3].to_i
|
214
|
+
elsif op == "-"
|
215
|
+
a = values[0].to_i
|
216
|
+
b = a - (values[3].to_i % a)
|
217
|
+
else
|
218
|
+
raise ArgumentError, "expected an+b node to have either + or - as the operator, but is #{op.inspect}"
|
219
|
+
end
|
220
|
+
[a, b]
|
221
|
+
end
|
222
|
+
|
223
|
+
def is_of_type_pseudo_class? node
|
224
|
+
if node.type==:PSEUDO_CLASS
|
225
|
+
if node.value[0].is_a?(Nokogiri::CSS::Node) and node.value[0].type == :FUNCTION
|
226
|
+
node.value[0].value[0]
|
227
|
+
else
|
228
|
+
node.value[0]
|
229
|
+
end =~ /(nth|first|last|only)-of-type(\()?/
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
# use only ordinary xpath functions
|
234
|
+
def css_class_standard(hay, needle)
|
235
|
+
"contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
|
236
|
+
end
|
237
|
+
|
238
|
+
# use the builtin implementation
|
239
|
+
def css_class_builtin(hay, needle)
|
240
|
+
"nokogiri-builtin:css-class(#{hay},'#{needle}')"
|
241
|
+
end
|
242
|
+
|
243
|
+
alias_method :css_class, :css_class_standard
|
244
|
+
end
|
245
|
+
|
246
|
+
class XPathVisitorAlwaysUseBuiltins < XPathVisitor # :nodoc:
|
247
|
+
private
|
248
|
+
alias_method :css_class, :css_class_builtin
|
249
|
+
end
|
250
|
+
|
251
|
+
class XPathVisitorOptimallyUseBuiltins < XPathVisitor # :nodoc:
|
252
|
+
private
|
253
|
+
if Nokogiri.uses_libxml?
|
254
|
+
alias_method :css_class, :css_class_builtin
|
255
|
+
else
|
256
|
+
alias_method :css_class, :css_class_standard
|
257
|
+
end
|
169
258
|
end
|
170
259
|
end
|
171
260
|
end
|
data/lib/nokogiri/css.rb
CHANGED
@@ -1,27 +1,28 @@
|
|
1
|
-
|
2
|
-
require 'nokogiri/css/xpath_visitor'
|
3
|
-
x = $-w
|
4
|
-
$-w = false
|
5
|
-
require 'nokogiri/css/parser'
|
6
|
-
$-w = x
|
7
|
-
|
8
|
-
require 'nokogiri/css/tokenizer'
|
9
|
-
require 'nokogiri/css/syntax_error'
|
10
|
-
|
1
|
+
# frozen_string_literal: true
|
11
2
|
module Nokogiri
|
12
3
|
module CSS
|
13
4
|
class << self
|
14
5
|
###
|
15
6
|
# Parse this CSS selector in +selector+. Returns an AST.
|
16
|
-
def parse
|
17
|
-
Parser.new.parse
|
7
|
+
def parse(selector)
|
8
|
+
Parser.new.parse(selector)
|
18
9
|
end
|
19
10
|
|
20
11
|
###
|
21
12
|
# Get the XPath for +selector+.
|
22
|
-
def xpath_for
|
23
|
-
Parser.new(options[:ns] || {}).xpath_for
|
13
|
+
def xpath_for(selector, options = {})
|
14
|
+
Parser.new(options[:ns] || {}).xpath_for(selector, options)
|
24
15
|
end
|
25
16
|
end
|
26
17
|
end
|
27
18
|
end
|
19
|
+
|
20
|
+
require_relative "css/node"
|
21
|
+
require_relative "css/xpath_visitor"
|
22
|
+
x = $-w
|
23
|
+
$-w = false
|
24
|
+
require_relative "css/parser"
|
25
|
+
$-w = x
|
26
|
+
|
27
|
+
require_relative "css/tokenizer"
|
28
|
+
require_relative "css/syntax_error"
|
@@ -1,28 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module Decorators
|
3
4
|
###
|
4
5
|
# The Slop decorator implements method missing such that a methods may be
|
5
6
|
# used instead of XPath or CSS. See Nokogiri.Slop
|
6
7
|
module Slop
|
8
|
+
# The default XPath search context for Slop
|
9
|
+
XPATH_PREFIX = "./"
|
10
|
+
|
7
11
|
###
|
8
12
|
# look for node with +name+. See Nokogiri.Slop
|
9
13
|
def method_missing name, *args, &block
|
10
|
-
prefix = implied_xpath_context
|
11
|
-
|
12
14
|
if args.empty?
|
13
|
-
list = xpath("#{
|
15
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
|
14
16
|
elsif args.first.is_a? Hash
|
15
17
|
hash = args.first
|
16
18
|
if hash[:css]
|
17
19
|
list = css("#{name}#{hash[:css]}")
|
18
20
|
elsif hash[:xpath]
|
19
21
|
conds = Array(hash[:xpath]).join(' and ')
|
20
|
-
list = xpath("#{
|
22
|
+
list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
|
21
23
|
end
|
22
24
|
else
|
23
25
|
CSS::Parser.without_cache do
|
24
26
|
list = xpath(
|
25
|
-
*CSS.xpath_for("#{name}#{args.first}", :prefix =>
|
27
|
+
*CSS.xpath_for("#{name}#{args.first}", :prefix => XPATH_PREFIX)
|
26
28
|
)
|
27
29
|
end
|
28
30
|
end
|
@@ -30,6 +32,12 @@ module Nokogiri
|
|
30
32
|
super if list.empty?
|
31
33
|
list.length == 1 ? list.first : list
|
32
34
|
end
|
35
|
+
|
36
|
+
def respond_to_missing? name, include_private = false
|
37
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
|
38
|
+
|
39
|
+
!list.empty?
|
40
|
+
end
|
33
41
|
end
|
34
42
|
end
|
35
43
|
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# load the C or Java extension
|
4
|
+
begin
|
5
|
+
# native precompiled gems package shared libraries in <gem_dir>/lib/nokogiri/<ruby_version>
|
6
|
+
::RUBY_VERSION =~ /(\d+\.\d+)/
|
7
|
+
require_relative "#{Regexp.last_match(1)}/nokogiri"
|
8
|
+
rescue LoadError => e
|
9
|
+
if e.message =~ /GLIBC/
|
10
|
+
warn(<<~EOM)
|
11
|
+
|
12
|
+
ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system with glibc < 2.17:
|
13
|
+
|
14
|
+
#{e.message}
|
15
|
+
|
16
|
+
If that's the case, then please install Nokogiri via the `ruby` platform gem:
|
17
|
+
gem install nokogiri --platform=ruby
|
18
|
+
or:
|
19
|
+
bundle config set force_ruby_platform true
|
20
|
+
|
21
|
+
Please visit https://nokogiri.org/tutorials/installing_nokogiri.html for more help.
|
22
|
+
|
23
|
+
EOM
|
24
|
+
raise e
|
25
|
+
end
|
26
|
+
|
27
|
+
# use "require" instead of "require_relative" because non-native gems will place C extension files
|
28
|
+
# in Gem::BasicSpecification#extension_dir after compilation (during normal installation), which
|
29
|
+
# is in $LOAD_PATH but not necessarily relative to this file (see #2300)
|
30
|
+
require "nokogiri/nokogiri"
|
31
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
module Gumbo
|
4
|
+
# The default maximum number of attributes per element.
|
5
|
+
DEFAULT_MAX_ATTRIBUTES = 400
|
6
|
+
|
7
|
+
# The default maximum number of errors for parsing a document or a fragment.
|
8
|
+
DEFAULT_MAX_ERRORS = 0
|
9
|
+
|
10
|
+
# The default maximum depth of the DOM tree produced by parsing a document
|
11
|
+
# or fragment.
|
12
|
+
DEFAULT_MAX_TREE_DEPTH = 400
|
13
|
+
end
|
14
|
+
end
|
data/lib/nokogiri/html.rb
CHANGED
@@ -1,37 +1,42 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'nokogiri/html/document_fragment'
|
4
|
-
require 'nokogiri/html/sax/parser_context'
|
5
|
-
require 'nokogiri/html/sax/parser'
|
6
|
-
require 'nokogiri/html/sax/push_parser'
|
7
|
-
require 'nokogiri/html/element_description'
|
8
|
-
require 'nokogiri/html/element_description_defaults'
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require_relative "html4"
|
9
3
|
|
10
4
|
module Nokogiri
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
end
|
5
|
+
HTML = Nokogiri::HTML4
|
6
|
+
|
7
|
+
# @!method HTML(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
|
8
|
+
# Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
|
9
|
+
# @!scope class
|
10
|
+
define_singleton_method(:HTML, Nokogiri.method(:HTML4))
|
18
11
|
|
12
|
+
# @note This module/namespace is an alias for {Nokogiri::HTML4} as of v1.12.0. Before v1.12.0,
|
13
|
+
# {Nokogiri::HTML4} did not exist, and this was the module/namespace for all HTML-related
|
14
|
+
# classes.
|
19
15
|
module HTML
|
20
|
-
class
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
16
|
+
# @note This class is an alias for {Nokogiri::HTML4::Document} as of v1.12.0.
|
17
|
+
class Document < Nokogiri::XML::Document
|
18
|
+
end
|
19
|
+
|
20
|
+
# @note This class is an alias for {Nokogiri::HTML4::DocumentFragment} as of v1.12.0.
|
21
|
+
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
22
|
+
end
|
23
|
+
|
24
|
+
# @note This class is an alias for {Nokogiri::HTML4::Builder} as of v1.12.0.
|
25
|
+
class Builder < Nokogiri::XML::Builder
|
26
|
+
end
|
27
|
+
|
28
|
+
module SAX
|
29
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::Parser} as of v1.12.0.
|
30
|
+
class Parser < Nokogiri::XML::SAX::Parser
|
25
31
|
end
|
26
32
|
|
27
|
-
|
28
|
-
|
29
|
-
def fragment string, encoding = nil
|
30
|
-
HTML::DocumentFragment.parse string, encoding
|
33
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::ParserContext} as of v1.12.0.
|
34
|
+
class ParserContext < Nokogiri::XML::SAX::ParserContext
|
31
35
|
end
|
32
|
-
end
|
33
36
|
|
34
|
-
|
35
|
-
|
37
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::PushParser} as of v1.12.0.
|
38
|
+
class PushParser
|
39
|
+
end
|
40
|
+
end
|
36
41
|
end
|
37
42
|
end
|
@@ -1,5 +1,6 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
|
-
module
|
3
|
+
module HTML4
|
3
4
|
###
|
4
5
|
# Nokogiri HTML builder is used for building HTML documents. It is very
|
5
6
|
# similar to the Nokogiri::XML::Builder. In fact, you should go read the
|
@@ -11,7 +12,7 @@ module Nokogiri
|
|
11
12
|
# Create an HTML document with a body that has an onload attribute, and a
|
12
13
|
# span tag with a class of "bold" that has content of "Hello world".
|
13
14
|
#
|
14
|
-
# builder = Nokogiri::
|
15
|
+
# builder = Nokogiri::HTML4::Builder.new do |doc|
|
15
16
|
# doc.html {
|
16
17
|
# doc.body(:onload => 'some_func();') {
|
17
18
|
# doc.span.bold {
|