nokogiri 1.5.10 → 1.13.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +5 -0
- data/LICENSE-DEPENDENCIES.md +1903 -0
- data/LICENSE.md +9 -0
- data/README.md +280 -0
- data/bin/nokogiri +84 -31
- data/dependencies.yml +73 -0
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +956 -100
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +120 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +232 -87
- data/ext/nokogiri/nokogiri.h +188 -129
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +49 -40
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +24 -23
- data/ext/nokogiri/xml_comment.c +29 -21
- data/ext/nokogiri/xml_document.c +327 -223
- data/ext/nokogiri/xml_document_fragment.c +12 -16
- data/ext/nokogiri/xml_dtd.c +56 -50
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +45 -20
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +74 -32
- data/ext/nokogiri/xml_node.c +1290 -680
- data/ext/nokogiri/xml_node_set.c +239 -208
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +227 -189
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +123 -125
- data/ext/nokogiri/xml_sax_parser_context.c +138 -79
- data/ext/nokogiri/xml_sax_push_parser.c +88 -35
- data/ext/nokogiri/xml_schema.c +112 -33
- data/ext/nokogiri/xml_syntax_error.c +50 -23
- data/ext/nokogiri/xml_text.c +14 -18
- data/ext/nokogiri/xml_xpath_context.c +227 -140
- data/ext/nokogiri/xslt_stylesheet.c +162 -168
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4875 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -58
- data/lib/nokogiri/css/parser.rb +327 -288
- data/lib/nokogiri/css/parser.y +67 -45
- data/lib/nokogiri/css/parser_extras.rb +52 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +107 -104
- data/lib/nokogiri/css/tokenizer.rex +7 -6
- data/lib/nokogiri/css/xpath_visitor.rb +263 -75
- data/lib/nokogiri/css.rb +50 -17
- data/lib/nokogiri/decorators/slop.rb +17 -8
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +331 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -15
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +46 -0
- data/lib/nokogiri/html5/document.rb +88 -0
- data/lib/nokogiri/html5/document_fragment.rb +83 -0
- data/lib/nokogiri/html5/node.rb +96 -0
- data/lib/nokogiri/html5.rb +477 -0
- data/lib/nokogiri/jruby/dependencies.rb +21 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +221 -0
- data/lib/nokogiri/version.rb +3 -90
- data/lib/nokogiri/xml/attr.rb +6 -3
- data/lib/nokogiri/xml/attribute_decl.rb +3 -1
- data/lib/nokogiri/xml/builder.rb +96 -54
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +234 -95
- data/lib/nokogiri/xml/document_fragment.rb +86 -36
- data/lib/nokogiri/xml/dtd.rb +16 -4
- data/lib/nokogiri/xml/element_content.rb +2 -0
- data/lib/nokogiri/xml/element_decl.rb +3 -1
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +3 -0
- data/lib/nokogiri/xml/node/save_options.rb +8 -4
- data/lib/nokogiri/xml/node.rb +947 -502
- data/lib/nokogiri/xml/node_set.rb +168 -159
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +40 -5
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +25 -26
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +23 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +43 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +259 -0
- data/lib/nokogiri/xml/syntax_error.rb +25 -1
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -36
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +18 -16
- data/lib/nokogiri.rb +69 -69
- data/lib/xsd/xmlparser/nokogiri.rb +26 -24
- data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- metadata +382 -460
- data/.autotest +0 -26
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +0 -785
- data/CHANGELOG.rdoc +0 -783
- data/C_CODING_STYLE.rdoc +0 -33
- data/Manifest.txt +0 -303
- data/README.ja.rdoc +0 -106
- data/README.rdoc +0 -175
- data/ROADMAP.md +0 -90
- data/Rakefile +0 -228
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/build_all +0 -105
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -56
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -13
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -14
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -254
- data/lib/nokogiri/html/document_fragment.rb +0 -41
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/lib/nokogiri/html/sax/push_parser.rb +0 -16
- data/tasks/cross_compile.rb +0 -150
- data/tasks/nokogiri.org.rb +0 -24
- data/tasks/test.rb +0 -95
- data/test/css/test_nthiness.rb +0 -159
- data/test/css/test_parser.rb +0 -341
- data/test/css/test_tokenizer.rb +0 -198
- data/test/css/test_xpath_visitor.rb +0 -91
- data/test/decorators/test_slop.rb +0 -16
- data/test/files/2ch.html +0 -108
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -850
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -154
- data/test/html/sax/test_parser.rb +0 -141
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -552
- data/test/html/test_document_encoding.rb +0 -138
- data/test/html/test_document_fragment.rb +0 -261
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -196
- data/test/html/test_node_encoding.rb +0 -27
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -45
- data/test/test_encoding_handler.rb +0 -46
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -132
- data/test/test_reader.rb +0 -555
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -254
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -366
- data/test/xml/sax/test_parser_context.rb +0 -106
- data/test/xml/sax/test_push_parser.rb +0 -157
- data/test/xml/test_attr.rb +0 -64
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -306
- data/test/xml/test_c14n.rb +0 -151
- data/test/xml/test_cdata.rb +0 -48
- data/test/xml/test_comment.rb +0 -29
- data/test/xml/test_document.rb +0 -828
- data/test/xml/test_document_encoding.rb +0 -28
- data/test/xml/test_document_fragment.rb +0 -223
- data/test/xml/test_dtd.rb +0 -103
- data/test/xml/test_dtd_encoding.rb +0 -33
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -245
- data/test/xml/test_namespace.rb +0 -95
- data/test/xml/test_node.rb +0 -1137
- data/test/xml/test_node_attributes.rb +0 -96
- data/test/xml/test_node_encoding.rb +0 -107
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -374
- data/test/xml/test_node_set.rb +0 -755
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader_encoding.rb +0 -142
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -103
- data/test/xml/test_syntax_error.rb +0 -12
- data/test/xml/test_text.rb +0 -45
- data/test/xml/test_unparented_node.rb +0 -422
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -295
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
- data/test_all +0 -81
data/lib/nokogiri/css/parser.y
CHANGED
@@ -10,13 +10,12 @@ rule
|
|
10
10
|
result = [val.first, val.last].flatten
|
11
11
|
}
|
12
12
|
| prefixless_combinator_selector { result = val.flatten }
|
13
|
-
| simple_selector_1toN { result = val.flatten }
|
13
|
+
| optional_S simple_selector_1toN { result = [val.last].flatten }
|
14
14
|
;
|
15
15
|
combinator
|
16
16
|
: PLUS { result = :DIRECT_ADJACENT_SELECTOR }
|
17
17
|
| GREATER { result = :CHILD_SELECTOR }
|
18
18
|
| TILDE { result = :FOLLOWING_SELECTOR }
|
19
|
-
| S { result = :DESCENDANT_SELECTOR }
|
20
19
|
| DOUBLESLASH { result = :DESCENDANT_SELECTOR }
|
21
20
|
| SLASH { result = :CHILD_SELECTOR }
|
22
21
|
;
|
@@ -28,17 +27,6 @@ rule
|
|
28
27
|
Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
|
29
28
|
end
|
30
29
|
}
|
31
|
-
| element_name hcap_1toN negation {
|
32
|
-
result = Node.new(:CONDITIONAL_SELECTOR,
|
33
|
-
[
|
34
|
-
val.first,
|
35
|
-
Node.new(:COMBINATOR, [val[1], val.last])
|
36
|
-
]
|
37
|
-
)
|
38
|
-
}
|
39
|
-
| element_name negation {
|
40
|
-
result = Node.new(:CONDITIONAL_SELECTOR, val)
|
41
|
-
}
|
42
30
|
| function
|
43
31
|
| function pseudo {
|
44
32
|
result = Node.new(:CONDITIONAL_SELECTOR, val)
|
@@ -46,14 +34,6 @@ rule
|
|
46
34
|
| function attrib {
|
47
35
|
result = Node.new(:CONDITIONAL_SELECTOR, val)
|
48
36
|
}
|
49
|
-
| hcap_1toN negation {
|
50
|
-
result = Node.new(:CONDITIONAL_SELECTOR,
|
51
|
-
[
|
52
|
-
Node.new(:ELEMENT_NAME, ['*']),
|
53
|
-
Node.new(:COMBINATOR, val)
|
54
|
-
]
|
55
|
-
)
|
56
|
-
}
|
57
37
|
| hcap_1toN {
|
58
38
|
result = Node.new(:CONDITIONAL_SELECTOR,
|
59
39
|
[Node.new(:ELEMENT_NAME, ['*']), val.first]
|
@@ -69,10 +49,13 @@ rule
|
|
69
49
|
: simple_selector combinator simple_selector_1toN {
|
70
50
|
result = Node.new(val[1], [val.first, val.last])
|
71
51
|
}
|
52
|
+
| simple_selector S simple_selector_1toN {
|
53
|
+
result = Node.new(:DESCENDANT_SELECTOR, [val.first, val.last])
|
54
|
+
}
|
72
55
|
| simple_selector
|
73
56
|
;
|
74
57
|
class
|
75
|
-
: '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
|
58
|
+
: '.' IDENT { result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])]) }
|
76
59
|
;
|
77
60
|
element_name
|
78
61
|
: namespaced_ident
|
@@ -105,7 +88,7 @@ rule
|
|
105
88
|
)
|
106
89
|
}
|
107
90
|
| LSQUARE NUMBER RSQUARE {
|
108
|
-
#
|
91
|
+
# non-standard, from hpricot
|
109
92
|
result = Node.new(:PSEUDO_CLASS,
|
110
93
|
[Node.new(:FUNCTION, ['nth-child(', val[1]])]
|
111
94
|
)
|
@@ -113,14 +96,14 @@ rule
|
|
113
96
|
;
|
114
97
|
attrib_name
|
115
98
|
: namespace '|' IDENT {
|
116
|
-
result = Node.new(:
|
99
|
+
result = Node.new(:ATTRIB_NAME,
|
117
100
|
[[val.first, val.last].compact.join(':')]
|
118
101
|
)
|
119
102
|
}
|
120
103
|
| IDENT {
|
121
104
|
# Default namespace is not applied to attributes.
|
122
105
|
# So we don't add prefix "xmlns:" as in namespaced_ident.
|
123
|
-
result = Node.new(:
|
106
|
+
result = Node.new(:ATTRIB_NAME, [val.first])
|
124
107
|
}
|
125
108
|
;
|
126
109
|
function
|
@@ -130,7 +113,7 @@ rule
|
|
130
113
|
| FUNCTION expr RPAREN {
|
131
114
|
result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
|
132
115
|
}
|
133
|
-
| FUNCTION
|
116
|
+
| FUNCTION nth RPAREN {
|
134
117
|
result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
|
135
118
|
}
|
136
119
|
| NOT expr RPAREN {
|
@@ -148,14 +131,15 @@ rule
|
|
148
131
|
| STRING
|
149
132
|
| IDENT # even, odd
|
150
133
|
{
|
151
|
-
|
152
|
-
|
153
|
-
result = Node.new(:
|
154
|
-
|
155
|
-
|
156
|
-
|
134
|
+
case val[0]
|
135
|
+
when 'even'
|
136
|
+
result = Node.new(:NTH, ['2','n','+','0'])
|
137
|
+
when 'odd'
|
138
|
+
result = Node.new(:NTH, ['2','n','+','1'])
|
139
|
+
when 'n'
|
140
|
+
result = Node.new(:NTH, ['1','n','+','0'])
|
157
141
|
else
|
158
|
-
#
|
142
|
+
# non-standard to support custom functions:
|
159
143
|
# assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
|
160
144
|
# assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
|
161
145
|
# assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
|
@@ -163,11 +147,11 @@ rule
|
|
163
147
|
end
|
164
148
|
}
|
165
149
|
;
|
166
|
-
|
150
|
+
nth
|
167
151
|
: NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
|
168
152
|
{
|
169
153
|
if val[1] == 'n'
|
170
|
-
result = Node.new(:
|
154
|
+
result = Node.new(:NTH, val)
|
171
155
|
else
|
172
156
|
raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
|
173
157
|
end
|
@@ -175,21 +159,27 @@ rule
|
|
175
159
|
| IDENT PLUS NUMBER { # n+3, -n+3
|
176
160
|
if val[0] == 'n'
|
177
161
|
val.unshift("1")
|
178
|
-
result = Node.new(:
|
162
|
+
result = Node.new(:NTH, val)
|
179
163
|
elsif val[0] == '-n'
|
180
164
|
val[0] = 'n'
|
181
165
|
val.unshift("-1")
|
182
|
-
result = Node.new(:
|
166
|
+
result = Node.new(:NTH, val)
|
183
167
|
else
|
184
168
|
raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
|
185
169
|
end
|
186
170
|
}
|
187
|
-
| NUMBER IDENT
|
188
|
-
|
189
|
-
if
|
171
|
+
| NUMBER IDENT { # 5n, -5n, 10n-1
|
172
|
+
n = val[1]
|
173
|
+
if n[0, 2] == 'n-'
|
174
|
+
val[1] = 'n'
|
175
|
+
val << "-"
|
176
|
+
# b is contained in n as n is the string "n-b"
|
177
|
+
val << n[2, n.size]
|
178
|
+
result = Node.new(:NTH, val)
|
179
|
+
elsif n == 'n'
|
190
180
|
val << "+"
|
191
181
|
val << "0"
|
192
|
-
result = Node.new(:
|
182
|
+
result = Node.new(:NTH, val)
|
193
183
|
else
|
194
184
|
raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
|
195
185
|
end
|
@@ -218,17 +208,22 @@ rule
|
|
218
208
|
| pseudo hcap_1toN {
|
219
209
|
result = Node.new(:COMBINATOR, val)
|
220
210
|
}
|
211
|
+
| negation hcap_1toN {
|
212
|
+
result = Node.new(:COMBINATOR, val)
|
213
|
+
}
|
221
214
|
| attribute_id
|
222
215
|
| class
|
223
216
|
| attrib
|
224
217
|
| pseudo
|
218
|
+
| negation
|
225
219
|
;
|
226
220
|
attribute_id
|
227
|
-
: HASH { result = Node.new(:ID, val) }
|
221
|
+
: HASH { result = Node.new(:ID, [unescape_css_identifier(val.first)]) }
|
228
222
|
;
|
229
223
|
attrib_val_0or1
|
230
|
-
: eql_incl_dash IDENT { result = [val.first, val[1]] }
|
231
|
-
| eql_incl_dash STRING { result = [val.first, val[1]] }
|
224
|
+
: eql_incl_dash IDENT { result = [val.first, unescape_css_identifier(val[1])] }
|
225
|
+
| eql_incl_dash STRING { result = [val.first, unescape_css_string(val[1])] }
|
226
|
+
| eql_incl_dash NUMBER { result = [val.first, val[1]] }
|
232
227
|
|
|
233
228
|
;
|
234
229
|
eql_incl_dash
|
@@ -250,9 +245,36 @@ rule
|
|
250
245
|
| element_name hcap_1toN
|
251
246
|
| hcap_1toN
|
252
247
|
;
|
248
|
+
optional_S
|
249
|
+
: S
|
250
|
+
|
|
251
|
+
;
|
253
252
|
end
|
254
253
|
|
255
254
|
---- header
|
256
255
|
|
257
|
-
|
256
|
+
require_relative "parser_extras"
|
258
257
|
|
258
|
+
module Nokogiri
|
259
|
+
module CSS
|
260
|
+
# :nodoc: all
|
261
|
+
class Parser < Racc::Parser
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
---- inner
|
267
|
+
|
268
|
+
def unescape_css_identifier(identifier)
|
269
|
+
identifier.gsub(/\\(?:([^0-9a-fA-F])|([0-9a-fA-F]{1,6})\s?)/){ |m| $1 || [$2.hex].pack('U') }
|
270
|
+
end
|
271
|
+
|
272
|
+
def unescape_css_string(str)
|
273
|
+
str.gsub(/\\(?:([^0-9a-fA-F])|([0-9a-fA-F]{1,6})\s?)/) do |m|
|
274
|
+
if $1=="\n"
|
275
|
+
''
|
276
|
+
else
|
277
|
+
$1 || [$2.hex].pack('U')
|
278
|
+
end
|
279
|
+
end
|
280
|
+
end
|
@@ -1,64 +1,68 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "thread"
|
2
4
|
|
3
5
|
module Nokogiri
|
4
6
|
module CSS
|
5
|
-
class Parser < Racc::Parser
|
6
|
-
|
7
|
-
|
8
|
-
@
|
7
|
+
class Parser < Racc::Parser # :nodoc:
|
8
|
+
CACHE_SWITCH_NAME = :nokogiri_css_parser_cache_is_off
|
9
|
+
|
10
|
+
@cache = {}
|
11
|
+
@mutex = Mutex.new
|
9
12
|
|
10
13
|
class << self
|
11
|
-
#
|
12
|
-
|
13
|
-
|
14
|
-
|
14
|
+
# Return a thread-local boolean indicating whether the CSS-to-XPath cache is active. (Default is `true`.)
|
15
|
+
def cache_on?
|
16
|
+
!Thread.current[CACHE_SWITCH_NAME]
|
17
|
+
end
|
18
|
+
|
19
|
+
# Set a thread-local boolean to turn cacheing on and off. Truthy values turn the cache on, falsey values turn the cache off.
|
20
|
+
def set_cache(value) # rubocop:disable Naming/AccessorMethodName
|
21
|
+
Thread.current[CACHE_SWITCH_NAME] = !value
|
22
|
+
end
|
15
23
|
|
16
24
|
# Get the css selector in +string+ from the cache
|
17
|
-
def []
|
18
|
-
return unless
|
25
|
+
def [](string)
|
26
|
+
return nil unless cache_on?
|
19
27
|
@mutex.synchronize { @cache[string] }
|
20
28
|
end
|
21
29
|
|
22
30
|
# Set the css selector in +string+ in the cache to +value+
|
23
|
-
def []=
|
24
|
-
return value unless
|
31
|
+
def []=(string, value)
|
32
|
+
return value unless cache_on?
|
25
33
|
@mutex.synchronize { @cache[string] = value }
|
26
34
|
end
|
27
35
|
|
28
36
|
# Clear the cache
|
29
|
-
def clear_cache
|
30
|
-
@mutex.synchronize
|
37
|
+
def clear_cache(create_new_object = false)
|
38
|
+
@mutex.synchronize do
|
39
|
+
if create_new_object
|
40
|
+
@cache = {}
|
41
|
+
else
|
42
|
+
@cache.clear
|
43
|
+
end
|
44
|
+
end
|
31
45
|
end
|
32
46
|
|
33
47
|
# Execute +block+ without cache
|
34
|
-
def without_cache
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
###
|
42
|
-
# Parse this CSS selector in +selector+. Returns an AST.
|
43
|
-
def parse selector
|
44
|
-
@warned ||= false
|
45
|
-
unless @warned
|
46
|
-
$stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
|
47
|
-
@warned = true
|
48
|
-
end
|
49
|
-
new.parse selector
|
48
|
+
def without_cache(&block)
|
49
|
+
original_cache_setting = cache_on?
|
50
|
+
set_cache(false)
|
51
|
+
yield
|
52
|
+
ensure
|
53
|
+
set_cache(original_cache_setting)
|
50
54
|
end
|
51
55
|
end
|
52
56
|
|
53
57
|
# Create a new CSS parser with respect to +namespaces+
|
54
|
-
def initialize
|
55
|
-
@tokenizer
|
58
|
+
def initialize(namespaces = {})
|
59
|
+
@tokenizer = Tokenizer.new
|
56
60
|
@namespaces = namespaces
|
57
61
|
super()
|
58
62
|
end
|
59
63
|
|
60
|
-
def parse
|
61
|
-
@tokenizer.scan_setup
|
64
|
+
def parse(string)
|
65
|
+
@tokenizer.scan_setup(string)
|
62
66
|
do_parse
|
63
67
|
end
|
64
68
|
|
@@ -67,24 +71,23 @@ module Nokogiri
|
|
67
71
|
end
|
68
72
|
|
69
73
|
# Get the xpath for +string+ using +options+
|
70
|
-
def xpath_for
|
71
|
-
key =
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
args = [
|
76
|
-
options[:prefix] || '//',
|
77
|
-
options[:visitor] || XPathVisitor.new
|
78
|
-
]
|
79
|
-
self.class[key] = parse(string).map { |ast|
|
80
|
-
ast.to_xpath(*args)
|
81
|
-
}
|
74
|
+
def xpath_for(string, prefix, visitor)
|
75
|
+
key = cache_key(string, prefix, visitor)
|
76
|
+
self.class[key] ||= parse(string).map do |ast|
|
77
|
+
ast.to_xpath(prefix, visitor)
|
78
|
+
end
|
82
79
|
end
|
83
80
|
|
84
81
|
# On CSS parser error, raise an exception
|
85
|
-
def on_error
|
82
|
+
def on_error(error_token_id, error_value, value_stack)
|
86
83
|
after = value_stack.compact.last
|
87
|
-
raise SyntaxError
|
84
|
+
raise SyntaxError, "unexpected '#{error_value}' after '#{after}'"
|
85
|
+
end
|
86
|
+
|
87
|
+
def cache_key(query, prefix, visitor)
|
88
|
+
if self.class.cache_on?
|
89
|
+
[query, prefix, @namespaces, visitor.config]
|
90
|
+
end
|
88
91
|
end
|
89
92
|
end
|
90
93
|
end
|
@@ -1,151 +1,154 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
#--
|
2
3
|
# DO NOT MODIFY!!!!
|
3
|
-
# This file is automatically generated by rex 1.0.
|
4
|
+
# This file is automatically generated by rex 1.0.7
|
4
5
|
# from lexical definition file "lib/nokogiri/css/tokenizer.rex".
|
5
6
|
#++
|
6
7
|
|
7
8
|
module Nokogiri
|
8
9
|
module CSS
|
9
|
-
|
10
|
-
|
10
|
+
# :nodoc: all
|
11
|
+
class Tokenizer
|
12
|
+
require 'strscan'
|
11
13
|
|
12
|
-
|
14
|
+
class ScanError < StandardError ; end
|
13
15
|
|
14
|
-
|
15
|
-
|
16
|
-
|
16
|
+
attr_reader :lineno
|
17
|
+
attr_reader :filename
|
18
|
+
attr_accessor :state
|
17
19
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
def scan_setup(str)
|
21
|
+
@ss = StringScanner.new(str)
|
22
|
+
@lineno = 1
|
23
|
+
@state = nil
|
24
|
+
end
|
23
25
|
|
24
|
-
|
25
|
-
|
26
|
-
|
26
|
+
def action
|
27
|
+
yield
|
28
|
+
end
|
27
29
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
30
|
+
def scan_str(str)
|
31
|
+
scan_setup(str)
|
32
|
+
do_parse
|
33
|
+
end
|
34
|
+
alias :scan :scan_str
|
33
35
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
def load_file( filename )
|
37
|
+
@filename = filename
|
38
|
+
File.open(filename, "r") do |f|
|
39
|
+
scan_setup(f.read)
|
40
|
+
end
|
41
|
+
end
|
40
42
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
43
|
+
def scan_file( filename )
|
44
|
+
load_file(filename)
|
45
|
+
do_parse
|
46
|
+
end
|
45
47
|
|
46
48
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
# skips empty actions
|
51
|
-
until token = _next_token or @ss.eos?; end
|
52
|
-
token
|
53
|
-
end
|
49
|
+
def next_token
|
50
|
+
return if @ss.eos?
|
54
51
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
when nil
|
60
|
-
case
|
61
|
-
when (text = @ss.scan(/has\([\s]*/))
|
62
|
-
action { [:HAS, text] }
|
52
|
+
# skips empty actions
|
53
|
+
until token = _next_token or @ss.eos?; end
|
54
|
+
token
|
55
|
+
end
|
63
56
|
|
64
|
-
|
65
|
-
|
57
|
+
def _next_token
|
58
|
+
text = @ss.peek(1)
|
59
|
+
@lineno += 1 if text == "\n"
|
60
|
+
token = case @state
|
61
|
+
when nil
|
62
|
+
case
|
63
|
+
when (text = @ss.scan(/has\([\s]*/))
|
64
|
+
action { [:HAS, text] }
|
66
65
|
|
67
|
-
|
68
|
-
|
66
|
+
when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
|
67
|
+
action { [:FUNCTION, text] }
|
69
68
|
|
70
|
-
|
71
|
-
|
69
|
+
when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
|
70
|
+
action { [:IDENT, text] }
|
72
71
|
|
73
|
-
|
74
|
-
|
72
|
+
when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
|
73
|
+
action { [:HASH, text] }
|
75
74
|
|
76
|
-
|
77
|
-
|
75
|
+
when (text = @ss.scan(/[\s]*~=[\s]*/))
|
76
|
+
action { [:INCLUDES, text] }
|
78
77
|
|
79
|
-
|
80
|
-
|
78
|
+
when (text = @ss.scan(/[\s]*\|=[\s]*/))
|
79
|
+
action { [:DASHMATCH, text] }
|
81
80
|
|
82
|
-
|
83
|
-
|
81
|
+
when (text = @ss.scan(/[\s]*\^=[\s]*/))
|
82
|
+
action { [:PREFIXMATCH, text] }
|
84
83
|
|
85
|
-
|
86
|
-
|
84
|
+
when (text = @ss.scan(/[\s]*\$=[\s]*/))
|
85
|
+
action { [:SUFFIXMATCH, text] }
|
87
86
|
|
88
|
-
|
89
|
-
|
87
|
+
when (text = @ss.scan(/[\s]*\*=[\s]*/))
|
88
|
+
action { [:SUBSTRINGMATCH, text] }
|
90
89
|
|
91
|
-
|
92
|
-
|
90
|
+
when (text = @ss.scan(/[\s]*!=[\s]*/))
|
91
|
+
action { [:NOT_EQUAL, text] }
|
93
92
|
|
94
|
-
|
95
|
-
|
93
|
+
when (text = @ss.scan(/[\s]*=[\s]*/))
|
94
|
+
action { [:EQUAL, text] }
|
96
95
|
|
97
|
-
|
98
|
-
|
96
|
+
when (text = @ss.scan(/[\s]*\)/))
|
97
|
+
action { [:RPAREN, text] }
|
99
98
|
|
100
|
-
|
101
|
-
|
99
|
+
when (text = @ss.scan(/\[[\s]*/))
|
100
|
+
action { [:LSQUARE, text] }
|
102
101
|
|
103
|
-
|
104
|
-
|
102
|
+
when (text = @ss.scan(/[\s]*\]/))
|
103
|
+
action { [:RSQUARE, text] }
|
105
104
|
|
106
|
-
|
107
|
-
|
105
|
+
when (text = @ss.scan(/[\s]*\+[\s]*/))
|
106
|
+
action { [:PLUS, text] }
|
108
107
|
|
109
|
-
|
110
|
-
|
108
|
+
when (text = @ss.scan(/[\s]*>[\s]*/))
|
109
|
+
action { [:GREATER, text] }
|
111
110
|
|
112
|
-
|
113
|
-
|
111
|
+
when (text = @ss.scan(/[\s]*,[\s]*/))
|
112
|
+
action { [:COMMA, text] }
|
114
113
|
|
115
|
-
|
116
|
-
|
114
|
+
when (text = @ss.scan(/[\s]*~[\s]*/))
|
115
|
+
action { [:TILDE, text] }
|
117
116
|
|
118
|
-
|
119
|
-
|
117
|
+
when (text = @ss.scan(/\:not\([\s]*/))
|
118
|
+
action { [:NOT, text] }
|
120
119
|
|
121
|
-
|
122
|
-
|
120
|
+
when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
|
121
|
+
action { [:NUMBER, text] }
|
123
122
|
|
124
|
-
|
125
|
-
|
123
|
+
when (text = @ss.scan(/[\s]*\/\/[\s]*/))
|
124
|
+
action { [:DOUBLESLASH, text] }
|
126
125
|
|
127
|
-
|
128
|
-
|
126
|
+
when (text = @ss.scan(/[\s]*\/[\s]*/))
|
127
|
+
action { [:SLASH, text] }
|
129
128
|
|
130
|
-
|
131
|
-
|
129
|
+
when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
|
130
|
+
action {[:UNICODE_RANGE, text] }
|
132
131
|
|
133
|
-
|
134
|
-
|
132
|
+
when (text = @ss.scan(/[\s]+/))
|
133
|
+
action { [:S, text] }
|
135
134
|
|
136
|
-
|
137
|
-
|
135
|
+
when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
|
136
|
+
action { [:STRING, text] }
|
138
137
|
|
139
|
-
|
140
|
-
|
141
|
-
raise ScanError, "can not match: '" + text + "'"
|
142
|
-
end # if
|
138
|
+
when (text = @ss.scan(/./))
|
139
|
+
action { [text, text] }
|
143
140
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
141
|
+
|
142
|
+
else
|
143
|
+
text = @ss.string[@ss.pos .. -1]
|
144
|
+
raise ScanError, "can not match: '" + text + "'"
|
145
|
+
end # if
|
146
|
+
|
147
|
+
else
|
148
|
+
raise ScanError, "undefined state: '" + state.to_s + "'"
|
149
|
+
end # case state
|
150
|
+
token
|
151
|
+
end # def _next_token
|
149
152
|
|
150
153
|
end # class
|
151
154
|
end
|