nokogiri 1.8.5 → 1.13.6
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -21
- data/LICENSE-DEPENDENCIES.md +1159 -868
- data/LICENSE.md +5 -28
- data/README.md +196 -90
- data/bin/nokogiri +63 -50
- data/dependencies.yml +13 -59
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +750 -420
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +119 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +228 -91
- data/ext/nokogiri/nokogiri.h +191 -89
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +41 -36
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +291 -216
- data/ext/nokogiri/xml_document_fragment.c +12 -16
- data/ext/nokogiri/xml_dtd.c +56 -50
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +43 -18
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +61 -52
- data/ext/nokogiri/xml_node.c +1044 -616
- data/ext/nokogiri/xml_node_set.c +174 -162
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +226 -175
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +112 -112
- data/ext/nokogiri/xml_sax_parser_context.c +112 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +112 -33
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +223 -115
- data/ext/nokogiri/xslt_stylesheet.c +265 -173
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4875 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +107 -104
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +218 -91
- data/lib/nokogiri/css.rb +50 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/{html → html4}/document.rb +103 -105
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +46 -0
- data/lib/nokogiri/html5/document.rb +91 -0
- data/lib/nokogiri/html5/document_fragment.rb +83 -0
- data/lib/nokogiri/html5/node.rb +100 -0
- data/lib/nokogiri/html5.rb +478 -0
- data/lib/nokogiri/jruby/dependencies.rb +21 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +222 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +6 -3
- data/lib/nokogiri/xml/attribute_decl.rb +3 -1
- data/lib/nokogiri/xml/builder.rb +97 -53
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +224 -86
- data/lib/nokogiri/xml/document_fragment.rb +57 -44
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +2 -0
- data/lib/nokogiri/xml/element_decl.rb +3 -1
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +3 -0
- data/lib/nokogiri/xml/node/save_options.rb +10 -5
- data/lib/nokogiri/xml/node.rb +895 -377
- data/lib/nokogiri/xml/node_set.rb +92 -65
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +22 -8
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +25 -26
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +23 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +38 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +112 -72
- data/lib/nokogiri/xml/syntax_error.rb +6 -4
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -37
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +29 -20
- data/lib/nokogiri.rb +49 -65
- data/lib/xsd/xmlparser/nokogiri.rb +26 -24
- data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
- data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
- metadata +220 -266
- data/.autotest +0 -22
- data/.cross_rubies +0 -8
- data/.editorconfig +0 -17
- data/.gemtest +0 -0
- data/.travis.yml +0 -63
- data/CHANGELOG.md +0 -1368
- data/CONTRIBUTING.md +0 -42
- data/C_CODING_STYLE.rdoc +0 -33
- data/Gemfile-libxml-ruby +0 -3
- data/Manifest.txt +0 -370
- data/ROADMAP.md +0 -111
- data/Rakefile +0 -348
- data/SECURITY.md +0 -19
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/appveyor.yml +0 -29
- data/build_all +0 -44
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -15
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
- data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
- data/patches/sort-patches-by-date +0 -25
- data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
- data/suppressions/README.txt +0 -1
- data/suppressions/nokogiri_ruby-2.supp +0 -10
- data/tasks/test.rb +0 -100
- data/test/css/test_nthiness.rb +0 -226
- data/test/css/test_parser.rb +0 -386
- data/test/css/test_tokenizer.rb +0 -215
- data/test/css/test_xpath_visitor.rb +0 -96
- data/test/decorators/test_slop.rb +0 -23
- data/test/files/2ch.html +0 -108
- data/test/files/GH_1042.html +0 -18
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/atom.xml +0 -344
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/bogus.xml +0 -0
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/namespace_pressure_test.xml +0 -1684
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/saml/saml20assertion_schema.xsd +0 -283
- data/test/files/saml/saml20protocol_schema.xsd +0 -302
- data/test/files/saml/xenc_schema.xsd +0 -146
- data/test/files/saml/xmldsig_schema.xsd +0 -318
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/shift_jis_no_charset.html +0 -9
- data/test/files/slow-xpath.xml +0 -25509
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -851
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -271
- data/test/html/sax/test_parser.rb +0 -168
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/sax/test_parser_text.rb +0 -163
- data/test/html/sax/test_push_parser.rb +0 -87
- data/test/html/test_attributes.rb +0 -85
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -712
- data/test/html/test_document_encoding.rb +0 -143
- data/test/html/test_document_fragment.rb +0 -310
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -212
- data/test/html/test_node_encoding.rb +0 -91
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
- data/test/namespaces/test_namespaces_preservation.rb +0 -31
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -47
- data/test/test_encoding_handler.rb +0 -48
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -138
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -314
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -402
- data/test/xml/sax/test_parser_context.rb +0 -115
- data/test/xml/sax/test_parser_text.rb +0 -202
- data/test/xml/sax/test_push_parser.rb +0 -265
- data/test/xml/test_attr.rb +0 -74
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -341
- data/test/xml/test_c14n.rb +0 -180
- data/test/xml/test_cdata.rb +0 -54
- data/test/xml/test_comment.rb +0 -40
- data/test/xml/test_document.rb +0 -982
- data/test/xml/test_document_encoding.rb +0 -31
- data/test/xml/test_document_fragment.rb +0 -298
- data/test/xml/test_dtd.rb +0 -187
- data/test/xml/test_dtd_encoding.rb +0 -31
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -262
- data/test/xml/test_namespace.rb +0 -96
- data/test/xml/test_node.rb +0 -1325
- data/test/xml/test_node_attributes.rb +0 -115
- data/test/xml/test_node_encoding.rb +0 -75
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -592
- data/test/xml/test_node_set.rb +0 -809
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader.rb +0 -620
- data/test/xml/test_reader_encoding.rb +0 -134
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -142
- data/test/xml/test_syntax_error.rb +0 -36
- data/test/xml/test_text.rb +0 -60
- data/test/xml/test_unparented_node.rb +0 -483
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -470
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
data/LICENSE.md
CHANGED
@@ -1,32 +1,9 @@
|
|
1
|
-
|
1
|
+
The MIT License
|
2
2
|
|
3
|
-
Copyright
|
3
|
+
Copyright 2008 -- 2021 by Mike Dalessio, Aaron Patterson, Yoko Harada, Akinori MUSHA, John Shahid, Karol Bucek, Sam Ruby, Craig Barnes, Stephen Checkoway, Lars Kanis, Sergio Arbeo, Timothy Elliott, Nobuyoshi Nakada, Charles Nutter, Patrick Mahoney.
|
4
4
|
|
5
|
-
|
6
|
-
* [Mike Dalessio](http://mike.daless.io)
|
7
|
-
* [Charles Nutter](http://blog.headius.com)
|
8
|
-
* [Sergio Arbeo](http://www.serabe.com)
|
9
|
-
* [Patrick Mahoney](http://polycrystal.org)
|
10
|
-
* [Yoko Harada](http://yokolet.blogspot.com)
|
11
|
-
* [Akinori MUSHA](https://akinori.org)
|
12
|
-
* [John Shahid](https://github.com/jvshahid)
|
13
|
-
* [Lars Kanis](https://github.com/larskanis)
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
14
6
|
|
15
|
-
|
16
|
-
a copy of this software and associated documentation files (the
|
17
|
-
'Software'), to deal in the Software without restriction, including
|
18
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
19
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
20
|
-
permit persons to whom the Software is furnished to do so, subject to
|
21
|
-
the following conditions:
|
7
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
22
8
|
|
23
|
-
|
24
|
-
included in all copies or substantial portions of the Software.
|
25
|
-
|
26
|
-
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
27
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
28
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
29
|
-
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
30
|
-
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
31
|
-
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
32
|
-
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
9
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
CHANGED
@@ -1,90 +1,161 @@
|
|
1
|
+
<div><img src="https://nokogiri.org/images/nokogiri-serif-black.png" align="right"/></div>
|
2
|
+
|
1
3
|
# Nokogiri
|
2
4
|
|
3
|
-
|
5
|
+
Nokogiri (鋸) makes it easy and painless to work with XML and HTML from Ruby. It provides a sensible, easy-to-understand API for [reading](https://nokogiri.org/tutorials/parsing_an_html_xml_document.html), writing, [modifying](https://nokogiri.org/tutorials/modifying_an_html_xml_document.html), and [querying](https://nokogiri.org/tutorials/searching_a_xml_html_document.html) documents. It is fast and standards-compliant by relying on native parsers like libxml2 (CRuby) and xerces (JRuby).
|
6
|
+
|
7
|
+
## Guiding Principles
|
8
|
+
|
9
|
+
Some guiding principles Nokogiri tries to follow:
|
10
|
+
|
11
|
+
- be secure-by-default by treating all documents as **untrusted** by default
|
12
|
+
- be a **thin-as-reasonable layer** on top of the underlying parsers, and don't attempt to fix behavioral differences between the parsers
|
13
|
+
|
14
|
+
|
15
|
+
## Features Overview
|
16
|
+
|
17
|
+
- DOM Parser for XML, HTML4, and HTML5
|
18
|
+
- SAX Parser for XML and HTML4
|
19
|
+
- Push Parser for XML and HTML4
|
20
|
+
- Document search via XPath 1.0
|
21
|
+
- Document search via CSS3 selectors, with some jquery-like extensions
|
22
|
+
- XSD Schema validation
|
23
|
+
- XSLT transformation
|
24
|
+
- "Builder" DSL for XML and HTML documents
|
25
|
+
|
26
|
+
|
27
|
+
## Status
|
28
|
+
|
29
|
+
[![Github Actions CI](https://github.com/sparklemotion/nokogiri/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/sparklemotion/nokogiri/actions/workflows/ci.yml)
|
30
|
+
[![Appveyor CI](https://ci.appveyor.com/api/projects/status/xj2pqwvlxwuwgr06/branch/main?svg=true)](https://ci.appveyor.com/project/flavorjones/nokogiri/branch/main)
|
31
|
+
|
32
|
+
[![Gem Version](https://badge.fury.io/rb/nokogiri.svg)](https://rubygems.org/gems/nokogiri)
|
33
|
+
[![SemVer compatibility](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=nokogiri&package-manager=bundler&previous-version=1.11.7&new-version=1.12.5)](https://docs.github.com/en/code-security/supply-chain-security/managing-vulnerabilities-in-your-projects-dependencies/about-dependabot-security-updates#about-compatibility-scores)
|
34
|
+
|
35
|
+
[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/5344/badge)](https://bestpractices.coreinfrastructure.org/projects/5344)
|
36
|
+
[![Tidelift dependencies](https://tidelift.com/badges/package/rubygems/nokogiri)](https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme)
|
37
|
+
|
38
|
+
|
39
|
+
## Support, Getting Help, and Reporting Issues
|
40
|
+
|
41
|
+
All official documentation is posted at https://nokogiri.org (the source for which is at https://github.com/sparklemotion/nokogiri.org/, and we welcome contributions).
|
42
|
+
|
43
|
+
Consider subscribing to [Tidelift][tidelift] which provides license assurances and timely security notifications for your open source dependencies, including Nokogiri. [Tidelift][tidelift] subscriptions also help the Nokogiri maintainers fund our [automated testing](https://ci.nokogiri.org) which in turn allows us to ship releases, bugfixes, and security updates more often.
|
44
|
+
|
45
|
+
[tidelift]: https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme
|
46
|
+
|
47
|
+
### Reading
|
48
|
+
|
49
|
+
Your first stops for learning more about Nokogiri should be:
|
50
|
+
|
51
|
+
- [API Documentation](https://nokogiri.org/rdoc/index.html)
|
52
|
+
- [Tutorials](https://nokogiri.org/tutorials/toc.html)
|
53
|
+
- An excellent community-maintained [Cheat Sheet](https://github.com/sparklemotion/nokogiri/wiki/Cheat-sheet)
|
54
|
+
|
4
55
|
|
5
|
-
|
6
|
-
Nokogiri's many features is the ability to search documents via XPath
|
7
|
-
or CSS3 selectors.
|
56
|
+
### Ask For Help
|
8
57
|
|
9
|
-
|
58
|
+
There are a few ways to ask exploratory questions:
|
10
59
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
* [GitHub](https://github.com/sparklemotion/nokogiri)
|
15
|
-
* [Mailing List](https://groups.google.com/group/nokogiri-talk)
|
16
|
-
* [Bug Reports](https://github.com/sparklemotion/nokogiri/issues)
|
17
|
-
* [Chat/Gitter](https://gitter.im/sparklemotion/nokogiri)
|
60
|
+
- The Ruby Discord chat server is active at https://discord.gg/UyQnKrT
|
61
|
+
- The Nokogiri mailing list is active at https://groups.google.com/group/nokogiri-talk
|
62
|
+
- Open an issue using the "Help Request" template at https://github.com/sparklemotion/nokogiri/issues
|
18
63
|
|
19
|
-
|
20
|
-
[![Code Climate](https://codeclimate.com/github/sparklemotion/nokogiri.svg)](https://codeclimate.com/github/sparklemotion/nokogiri)
|
21
|
-
[![Version Eye](https://www.versioneye.com/ruby/nokogiri/badge.png)](https://www.versioneye.com/ruby/nokogiri)
|
22
|
-
[![Join the chat at https://gitter.im/sparklemotion/nokogiri](https://badges.gitter.im/sparklemotion/nokogiri.svg)](https://gitter.im/sparklemotion/nokogiri?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
64
|
+
Please do not mail the maintainers at their personal addresses.
|
23
65
|
|
24
66
|
|
25
|
-
|
67
|
+
### Report A Bug
|
26
68
|
|
27
|
-
|
28
|
-
* XML/HTML SAX parser
|
29
|
-
* XML/HTML Push parser
|
30
|
-
* XPath 1.0 support for document searching
|
31
|
-
* CSS3 selector support for document searching
|
32
|
-
* XML/HTML builder
|
33
|
-
* XSLT transformer
|
69
|
+
The Nokogiri bug tracker is at https://github.com/sparklemotion/nokogiri/issues
|
34
70
|
|
35
|
-
|
36
|
-
|
37
|
-
|
71
|
+
Please use the "Bug Report" or "Installation Difficulties" templates.
|
72
|
+
|
73
|
+
|
74
|
+
### Security and Vulnerability Reporting
|
75
|
+
|
76
|
+
Please report vulnerabilities at https://hackerone.com/nokogiri
|
77
|
+
|
78
|
+
Full information and description of our security policy is in [`SECURITY.md`](SECURITY.md)
|
79
|
+
|
80
|
+
|
81
|
+
### Semantic Versioning Policy
|
82
|
+
|
83
|
+
Nokogiri follows [Semantic Versioning](https://semver.org/) (since 2017 or so). [![Dependabot's SemVer compatibility score for Nokogiri](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=nokogiri&package-manager=bundler&previous-version=1.11.7&new-version=1.12.5)](https://docs.github.com/en/code-security/supply-chain-security/managing-vulnerabilities-in-your-projects-dependencies/about-dependabot-security-updates#about-compatibility-scores)
|
84
|
+
|
85
|
+
We bump `Major.Minor.Patch` versions following this guidance:
|
86
|
+
|
87
|
+
`Major`: (we've never done this)
|
88
|
+
|
89
|
+
- Significant backwards-incompatible changes to the public API that would require rewriting existing application code.
|
90
|
+
- Some examples of backwards-incompatible changes we might someday consider for a Major release are at [`ROADMAP.md`](ROADMAP.md).
|
91
|
+
|
92
|
+
`Minor`:
|
93
|
+
|
94
|
+
- Features and bugfixes.
|
95
|
+
- Updating packaged libraries for non-security-related reasons.
|
96
|
+
- Dropping support for EOLed Ruby versions. [Some folks find this objectionable](https://github.com/sparklemotion/nokogiri/issues/1568), but [SemVer says this is OK if the public API hasn't changed](https://semver.org/#what-should-i-do-if-i-update-my-own-dependencies-without-changing-the-public-api).
|
97
|
+
- Backwards-incompatible changes to internal or private methods and constants. These are detailed in the "Changes" section of each changelog entry.
|
98
|
+
|
99
|
+
`Patch`:
|
100
|
+
|
101
|
+
- Bugfixes.
|
102
|
+
- Security updates.
|
103
|
+
- Updating packaged libraries for security-related reasons.
|
38
104
|
|
39
105
|
|
40
106
|
## Installation
|
41
107
|
|
42
|
-
|
108
|
+
Requirements:
|
43
109
|
|
44
|
-
|
45
|
-
|
46
|
-
```
|
110
|
+
- Ruby >= 2.6
|
111
|
+
- JRuby >= 9.3.0.0
|
47
112
|
|
48
|
-
then please start troubleshooting here:
|
49
113
|
|
50
|
-
|
114
|
+
### Native Gems: Faster, more reliable installation
|
51
115
|
|
52
|
-
|
53
|
-
installation. The vast majority of them are out of date and therefore
|
54
|
-
incorrect. __Please do not use Stack Overflow.__
|
116
|
+
"Native gems" contain pre-compiled libraries for a specific machine architecture. On supported platforms, this removes the need for compiling the C extension and the packaged libraries, or for system dependencies to exist. This results in **much faster installation** and **more reliable installation**, which as you probably know are the biggest headaches for Nokogiri users.
|
55
117
|
|
56
|
-
|
57
|
-
when the above instructions don't work for you. This allows us to both
|
58
|
-
help you directly and improve the documentation.
|
118
|
+
### Supported Platforms
|
59
119
|
|
120
|
+
Nokogiri ships pre-compiled, "native" gems for the following platforms:
|
60
121
|
|
61
|
-
|
122
|
+
- Linux: `x86-linux` and `x86_64-linux` (req: `glibc >= 2.17`), including musl platforms like Alpine
|
123
|
+
- Darwin/MacOS: `x86_64-darwin` and `arm64-darwin`
|
124
|
+
- Windows: `x86-mingw32` and `x64-mingw32`
|
125
|
+
- Java: any platform running JRuby 9.3 or higher
|
62
126
|
|
63
|
-
|
127
|
+
To determine whether your system supports one of these gems, look at the output of `bundle platform` or `ruby -e 'puts Gem::Platform.local.to_s'`.
|
64
128
|
|
65
|
-
|
66
|
-
* SuSE: https://download.opensuse.org/repositories/devel:/languages:/ruby:/extensions/
|
67
|
-
* Fedora: http://s390.koji.fedoraproject.org/koji/packageinfo?packageID=6756
|
129
|
+
If you're on a supported platform, either `gem install` or `bundle install` should install a native gem without any additional action on your part. This installation should only take a few seconds, and your output should look something like:
|
68
130
|
|
131
|
+
``` sh
|
132
|
+
$ gem install nokogiri
|
133
|
+
Fetching nokogiri-1.11.0-x86_64-linux.gem
|
134
|
+
Successfully installed nokogiri-1.11.0-x86_64-linux
|
135
|
+
1 gem installed
|
136
|
+
```
|
69
137
|
|
70
|
-
## Support
|
71
138
|
|
72
|
-
|
139
|
+
### Other Installation Options
|
73
140
|
|
74
|
-
|
75
|
-
* The Nokogiri bug tracker is here: https://github.com/sparklemotion/nokogiri/issues
|
76
|
-
* Before filing a bug report, please read our submission guidelines: http://nokogiri.org/tutorials/getting_help.html
|
77
|
-
* The IRC channel is #nokogiri on freenode.
|
141
|
+
Because Nokogiri is a C extension, it requires that you have a C compiler toolchain, Ruby development header files, and some system dependencies installed.
|
78
142
|
|
143
|
+
The following may work for you if you have an appropriately-configured system:
|
79
144
|
|
80
|
-
|
145
|
+
``` bash
|
146
|
+
gem install nokogiri
|
147
|
+
```
|
81
148
|
|
82
|
-
|
149
|
+
If you have any issues, please visit [Installing Nokogiri](https://nokogiri.org/tutorials/installing_nokogiri.html) for more complete instructions and troubleshooting.
|
83
150
|
|
84
151
|
|
85
|
-
##
|
152
|
+
## How To Use Nokogiri
|
86
153
|
|
87
|
-
Nokogiri is a large library,
|
154
|
+
Nokogiri is a large library, and so it's challenging to briefly summarize it. We've tried to provide long, real-world examples at [Tutorials](https://nokogiri.org/tutorials/toc.html).
|
155
|
+
|
156
|
+
### Parsing and Querying
|
157
|
+
|
158
|
+
Here is example usage for parsing and querying a document:
|
88
159
|
|
89
160
|
```ruby
|
90
161
|
#! /usr/bin/env ruby
|
@@ -93,51 +164,26 @@ require 'nokogiri'
|
|
93
164
|
require 'open-uri'
|
94
165
|
|
95
166
|
# Fetch and parse HTML document
|
96
|
-
doc = Nokogiri::HTML(open('
|
167
|
+
doc = Nokogiri::HTML(URI.open('https://nokogiri.org/tutorials/installing_nokogiri.html'))
|
97
168
|
|
98
|
-
|
169
|
+
# Search for nodes by css
|
99
170
|
doc.css('nav ul.menu li a', 'article h2').each do |link|
|
100
171
|
puts link.content
|
101
172
|
end
|
102
173
|
|
103
|
-
|
174
|
+
# Search for nodes by xpath
|
104
175
|
doc.xpath('//nav//ul//li/a', '//article//h2').each do |link|
|
105
176
|
puts link.content
|
106
177
|
end
|
107
178
|
|
108
|
-
|
179
|
+
# Or mix and match
|
109
180
|
doc.search('nav ul.menu li a', '//article//h2').each do |link|
|
110
181
|
puts link.content
|
111
182
|
end
|
112
183
|
```
|
113
184
|
|
114
185
|
|
115
|
-
|
116
|
-
|
117
|
-
* Ruby 2.1.0 or higher, including any development packages necessary
|
118
|
-
to compile native extensions.
|
119
|
-
|
120
|
-
* In Nokogiri 1.6.0 and later libxml2 and libxslt are bundled with the
|
121
|
-
gem, but if you want to use the system versions:
|
122
|
-
|
123
|
-
* First, check out [the long list](http://www.xmlsoft.org/news.html)
|
124
|
-
of fixes and changes between releases before deciding to use any
|
125
|
-
version older than is bundled with Nokogiri.
|
126
|
-
|
127
|
-
* At install time, set the environment variable
|
128
|
-
`NOKOGIRI_USE_SYSTEM_LIBRARIES` or else use the
|
129
|
-
`--use-system-libraries` argument. (See
|
130
|
-
http://nokogiri.org/tutorials/installing_nokogiri.html#using_your_system_libraries
|
131
|
-
for specifics.)
|
132
|
-
|
133
|
-
* libxml2 >=2.6.21 with iconv support
|
134
|
-
(libxml2-dev/-devel is also required)
|
135
|
-
|
136
|
-
* libxslt, built with and supported by the given libxml2
|
137
|
-
(libxslt-dev/-devel is also required)
|
138
|
-
|
139
|
-
|
140
|
-
## Encoding
|
186
|
+
### Encoding
|
141
187
|
|
142
188
|
Strings are always stored as UTF-8 internally. Methods that return
|
143
189
|
text values will always return UTF-8 encoded strings. Methods that
|
@@ -162,13 +208,73 @@ explicitly setting the encoding to EUC-JP on the parser:
|
|
162
208
|
doc = Nokogiri.XML('<foo><bar /></foo>', nil, 'EUC-JP')
|
163
209
|
```
|
164
210
|
|
165
|
-
## Development
|
166
211
|
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
212
|
+
## Technical Overview
|
213
|
+
|
214
|
+
### Guiding Principles
|
215
|
+
|
216
|
+
As noted above, two guiding principles of the software are:
|
217
|
+
|
218
|
+
- be secure-by-default by treating all documents as **untrusted** by default
|
219
|
+
- be a **thin-as-reasonable layer** on top of the underlying parsers, and don't attempt to fix behavioral differences between the parsers
|
220
|
+
|
221
|
+
Notably, despite all parsers being standards-compliant, there are behavioral inconsistencies between the parsers used in the CRuby and JRuby implementations, and Nokogiri does not and should not attempt to remove these inconsistencies. Instead, we surface these differences in the test suite when they are important/semantic; or we intentionally write tests to depend only on the important/semantic bits (omitting whitespace from regex matchers on results, for example).
|
222
|
+
|
223
|
+
|
224
|
+
### CRuby
|
225
|
+
|
226
|
+
The Ruby (a.k.a., CRuby, MRI, YARV) implementation is a C extension that depends on libxml2 and libxslt (which in turn depend on zlib and possibly libiconv).
|
227
|
+
|
228
|
+
These dependencies are met by default by Nokogiri's packaged versions of the libxml2 and libxslt source code, but a configuration option `--use-system-libraries` is provided to allow specification of alternative library locations. See [Installing Nokogiri](https://nokogiri.org/tutorials/installing_nokogiri.html) for full documentation.
|
229
|
+
|
230
|
+
We provide native gems by pre-compiling libxml2 and libxslt (and potentially zlib and libiconv) and packaging them into the gem file. In this case, no compilation is necessary at installation time, which leads to faster and more reliable installation.
|
231
|
+
|
232
|
+
See [`LICENSE-DEPENDENCIES.md`](LICENSE-DEPENDENCIES.md) for more information on which dependencies are provided in which native and source gems.
|
233
|
+
|
234
|
+
|
235
|
+
### JRuby
|
236
|
+
|
237
|
+
The Java (a.k.a. JRuby) implementation is a Java extension that depends primarily on Xerces and NekoHTML for parsing, though additional dependencies are on `isorelax`, `nekodtd`, `jing`, `serializer`, `xalan-j`, and `xml-apis`.
|
238
|
+
|
239
|
+
These dependencies are provided by pre-compiled jar files packaged in the `java` platform gem.
|
240
|
+
|
241
|
+
See [`LICENSE-DEPENDENCIES.md`](LICENSE-DEPENDENCIES.md) for more information on which dependencies are provided in which native and source gems.
|
242
|
+
|
243
|
+
|
244
|
+
## Contributing
|
245
|
+
|
246
|
+
See [`CONTRIBUTING.md`](CONTRIBUTING.md) for an intro guide to developing Nokogiri.
|
247
|
+
|
248
|
+
|
249
|
+
## Code of Conduct
|
250
|
+
|
251
|
+
We've adopted the Contributor Covenant code of conduct, which you can read in full in [`CODE_OF_CONDUCT.md`](CODE_OF_CONDUCT.md).
|
252
|
+
|
171
253
|
|
172
254
|
## License
|
173
255
|
|
174
|
-
MIT
|
256
|
+
This project is licensed under the terms of the MIT license.
|
257
|
+
|
258
|
+
See this license at [`LICENSE.md`](LICENSE.md).
|
259
|
+
|
260
|
+
|
261
|
+
### Dependencies
|
262
|
+
|
263
|
+
Some additional libraries may be distributed with your version of Nokogiri. Please see [`LICENSE-DEPENDENCIES.md`](LICENSE-DEPENDENCIES.md) for a discussion of the variations as well as the licenses thereof.
|
264
|
+
|
265
|
+
|
266
|
+
## Authors
|
267
|
+
|
268
|
+
- Mike Dalessio
|
269
|
+
- Aaron Patterson
|
270
|
+
- Yoko Harada
|
271
|
+
- Akinori MUSHA
|
272
|
+
- John Shahid
|
273
|
+
- Karol Bucek
|
274
|
+
- Sam Ruby
|
275
|
+
- Craig Barnes
|
276
|
+
- Stephen Checkoway
|
277
|
+
- Lars Kanis
|
278
|
+
- Sergio Arbeo
|
279
|
+
- Timothy Elliott
|
280
|
+
- Nobuyoshi Nakada
|
data/bin/nokogiri
CHANGED
@@ -1,61 +1,77 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "optparse"
|
5
|
+
require "open-uri"
|
6
|
+
require "uri"
|
7
|
+
require "rubygems"
|
8
|
+
require "nokogiri"
|
9
|
+
autoload :IRB, "irb"
|
8
10
|
|
9
11
|
parse_class = Nokogiri
|
10
12
|
encoding = nil
|
11
13
|
|
12
14
|
# This module provides some tunables with the nokogiri CLI for use in
|
13
15
|
# your ~/.nokogirirc.
|
14
|
-
module Nokogiri
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
16
|
+
module Nokogiri
|
17
|
+
module CLI
|
18
|
+
class << self
|
19
|
+
# Specify the console engine, defaulted to IRB.
|
20
|
+
#
|
21
|
+
# call-seq:
|
22
|
+
# require 'pry'
|
23
|
+
# Nokogiri::CLI.console = Pry
|
24
|
+
attr_writer :console
|
25
|
+
|
26
|
+
def console
|
27
|
+
case @console
|
28
|
+
when Symbol
|
29
|
+
Kernel.const_get(@console)
|
30
|
+
else
|
31
|
+
@console
|
32
|
+
end
|
29
33
|
end
|
34
|
+
|
35
|
+
attr_accessor :rcfile
|
30
36
|
end
|
31
37
|
|
32
|
-
|
38
|
+
self.rcfile = File.expand_path("~/.nokogirirc")
|
39
|
+
self.console = :IRB
|
33
40
|
end
|
41
|
+
end
|
34
42
|
|
35
|
-
|
36
|
-
|
43
|
+
def safe_read(uri_or_path)
|
44
|
+
uri = URI.parse(uri_or_path)
|
45
|
+
case uri
|
46
|
+
when URI::HTTP
|
47
|
+
uri.read
|
48
|
+
when URI::File
|
49
|
+
File.read(uri.path)
|
50
|
+
else
|
51
|
+
File.read(uri_or_path)
|
52
|
+
end
|
37
53
|
end
|
38
54
|
|
39
55
|
opts = OptionParser.new do |opts|
|
40
56
|
opts.banner = "Nokogiri: an HTML, XML, SAX, and Reader parser"
|
41
|
-
opts.define_head
|
42
|
-
opts.separator
|
43
|
-
opts.separator
|
44
|
-
opts.separator
|
45
|
-
opts.separator
|
46
|
-
opts.separator
|
47
|
-
opts.separator
|
48
|
-
opts.separator
|
57
|
+
opts.define_head("Usage: nokogiri <uri|path> [options]")
|
58
|
+
opts.separator("")
|
59
|
+
opts.separator("Examples:")
|
60
|
+
opts.separator(" nokogiri https://www.ruby-lang.org/")
|
61
|
+
opts.separator(" nokogiri ./public/index.html")
|
62
|
+
opts.separator(" curl -s http://www.nokogiri.org | nokogiri -e'p $_.css(\"h1\").length'")
|
63
|
+
opts.separator("")
|
64
|
+
opts.separator("Options:")
|
49
65
|
|
50
66
|
opts.on("--type type", "Parse as type: xml or html (default: auto)", [:xml, :html]) do |v|
|
51
|
-
parse_class = {:
|
67
|
+
parse_class = { xml: Nokogiri::XML, html: Nokogiri::HTML }[v]
|
52
68
|
end
|
53
69
|
|
54
70
|
opts.on("-C file", "Specifies initialization file to load (default #{Nokogiri::CLI.rcfile})") do |v|
|
55
71
|
Nokogiri::CLI.rcfile = v
|
56
72
|
end
|
57
73
|
|
58
|
-
opts.on("-E", "--encoding encoding", "Read as encoding (default: #{encoding ||
|
74
|
+
opts.on("-E", "--encoding encoding", "Read as encoding (default: #{encoding || "none"})") do |v|
|
59
75
|
encoding = v
|
60
76
|
end
|
61
77
|
|
@@ -64,7 +80,7 @@ opts = OptionParser.new do |opts|
|
|
64
80
|
end
|
65
81
|
|
66
82
|
opts.on("--rng <uri|path>", "Validate using this rng file.") do |v|
|
67
|
-
@rng =
|
83
|
+
@rng = Nokogiri::XML::RelaxNG(safe_read(v))
|
68
84
|
end
|
69
85
|
|
70
86
|
opts.on_tail("-?", "--help", "Show this message") do
|
@@ -90,15 +106,10 @@ if File.file?(Nokogiri::CLI.rcfile)
|
|
90
106
|
load Nokogiri::CLI.rcfile
|
91
107
|
end
|
92
108
|
|
93
|
-
if url || $stdin.tty?
|
94
|
-
|
95
|
-
when URI::HTTP
|
96
|
-
@doc = parse_class.parse(uri.read, url, encoding)
|
97
|
-
else
|
98
|
-
@doc = parse_class.parse(open(url).read, nil, encoding)
|
99
|
-
end
|
109
|
+
@doc = if url || $stdin.tty?
|
110
|
+
parse_class.parse(safe_read(url), url, encoding)
|
100
111
|
else
|
101
|
-
|
112
|
+
parse_class.parse($stdin, nil, encoding)
|
102
113
|
end
|
103
114
|
|
104
115
|
$_ = @doc
|
@@ -107,12 +118,14 @@ if @rng
|
|
107
118
|
@rng.validate(@doc).each do |error|
|
108
119
|
puts error.message
|
109
120
|
end
|
110
|
-
|
111
|
-
|
112
|
-
eval
|
113
|
-
|
114
|
-
|
115
|
-
|
121
|
+
elsif @script
|
122
|
+
begin
|
123
|
+
eval(@script, binding, "<main>") # rubocop:disable Security/Eval
|
124
|
+
rescue Exception => e # rubocop:disable Lint/RescueException
|
125
|
+
warn("ERROR: Exception raised while evaluating '#{@script}'")
|
126
|
+
raise e
|
116
127
|
end
|
128
|
+
else
|
129
|
+
puts "Your document is stored in @doc..."
|
130
|
+
Nokogiri::CLI.console.start
|
117
131
|
end
|
118
|
-
|
data/dependencies.yml
CHANGED
@@ -1,69 +1,23 @@
|
|
1
1
|
libxml2:
|
2
|
-
version: "2.9.
|
3
|
-
sha256: "
|
4
|
-
#
|
5
|
-
#
|
6
|
-
# $ gpg --verify libxml2-2.9.8.tar.gz.asc ./ports/archives/libxml2-2.9.8.tar.gz
|
7
|
-
# gpg: Signature made Mon 05 Mar 2018 11:07:45 AM EST using RSA key ID 596BEA5D
|
8
|
-
# gpg: Good signature from "Daniel Veillard (Red Hat work email) <veillard@redhat.com>"
|
9
|
-
# gpg: aka "Daniel Veillard <Daniel.Veillard@w3.org>"
|
10
|
-
# gpg: WARNING: This key is not certified with a trusted signature!
|
11
|
-
# gpg: There is no indication that the signature belongs to the owner.
|
12
|
-
# Primary key fingerprint: C744 15BA 7C9C 7F78 F02E 1DC3 4606 B8A5 DE95 BC1F
|
13
|
-
# Subkey fingerprint: DB46 681B B91A DCEA 170F A2D4 1558 8B26 596B EA5D
|
14
|
-
#
|
15
|
-
# using this pgp signature:
|
16
|
-
# -----BEGIN PGP SIGNATURE-----
|
17
|
-
#
|
18
|
-
# iQEcBAABAgAGBQJanWtRAAoJEBVYiyZZa+pdV7oIAJWdFahwt+reN/Zt2RPmjjcr
|
19
|
-
# eSsY7UV1RXjScnNjTzJT1h2hJ7SnUjCkqjR6VdtKDUIzpuX+S2U83joafJH6mxUb
|
20
|
-
# yw2nO4RfjYTPxpz5JkvqT7jmgEIaD81BuwcMehqpMpIfiKa2NgO1DSfZxgs8a9E2
|
21
|
-
# +ehc/kZWuI5gmNGrd84EEWUqpYW/Xx7jy02osioJuU5IMPjzZKNR3maXp9oAKeBc
|
22
|
-
# S2QNa1ID/pUk3K3M/5nlwNgAtQ7lxQrqhrSma2dsKt/IpL6VXomxuD4Bh1r2MZhX
|
23
|
-
# uZ456X/xJN8UmPewLZWGBU1MK9wqu3Zx5Qwz64H6UdlYIzXZ2jXj2YWZa6xkxPA=
|
24
|
-
# =69xn
|
25
|
-
# -----END PGP SIGNATURE-----
|
26
|
-
#
|
2
|
+
version: "2.9.14"
|
3
|
+
sha256: "60d74a257d1ccec0475e749cba2f21559e48139efba6ff28224357c7c798dfee"
|
4
|
+
# sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.9/libxml2-2.9.14.sha256sum
|
27
5
|
|
28
6
|
libxslt:
|
29
|
-
version: "1.1.
|
30
|
-
sha256: "
|
31
|
-
#
|
32
|
-
#
|
33
|
-
# $ gpg --verify libxslt-1.1.32.tar.gz.asc libxslt-1.1.32.tar.gz
|
34
|
-
# gpg: Signature made Thu 02 Nov 2017 04:35:04 PM EDT using RSA key ID 596BEA5D
|
35
|
-
# gpg: Good signature from "Daniel Veillard (Red Hat work email) <veillard@redhat.com>"
|
36
|
-
# gpg: aka "Daniel Veillard <Daniel.Veillard@w3.org>"
|
37
|
-
# gpg: WARNING: This key is not certified with a trusted signature!
|
38
|
-
# gpg: There is no indication that the signature belongs to the owner.
|
39
|
-
# Primary key fingerprint: C744 15BA 7C9C 7F78 F02E 1DC3 4606 B8A5 DE95 BC1F
|
40
|
-
# Subkey fingerprint: DB46 681B B91A DCEA 170F A2D4 1558 8B26 596B EA5D
|
41
|
-
#
|
42
|
-
# using this pgp signature:
|
43
|
-
#
|
44
|
-
# -----BEGIN PGP SIGNATURE-----
|
45
|
-
#
|
46
|
-
# iQEcBAABAgAGBQJZ+4F4AAoJEBVYiyZZa+pdy1IIAMX1DpzYGdnv6GCPSKeZ0woD
|
47
|
-
# sHmSkygJep0/sUQD1cYunNsNZnGDgWhnsLAvHOn3opJgsiaZhmhJ8Uo7QNlT+ni1
|
48
|
-
# AvRFgQoSXLWSF5kkun4u7RvnpDI6jYfCuYSwb9SO4EAYFAQQJXQaKCeFq71gad+p
|
49
|
-
# XGHJFAy2TqUVLNZ5I1mQz/oBeDsJ7RzHpYqaBxsLDqrCzRQ9ai23q+dFGS3jvLBr
|
50
|
-
# 0gXw0MK73ceOwW12L5aLj4erNbATWmMFMDYZZwftysv3bgx2YfiOoZUTzufrB/Bc
|
51
|
-
# MG8hP76aYBwIKNbhiDFGa2qdHGZGF7YQ4mi1/ZDX1K1G2tKKeEYxscM13JwiGb8=
|
52
|
-
# =NuQO
|
53
|
-
# -----END PGP SIGNATURE-----
|
54
|
-
#
|
7
|
+
version: "1.1.35"
|
8
|
+
sha256: "8247f33e9a872c6ac859aa45018bc4c4d00b97e2feac9eebc10c93ce1f34dd79"
|
9
|
+
# sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.35.sha256sum
|
55
10
|
|
56
11
|
zlib:
|
57
|
-
version: "1.2.
|
58
|
-
sha256: "
|
12
|
+
version: "1.2.12"
|
13
|
+
sha256: "91844808532e5ce316b3c010929493c0244f3d37593afd6de04f71821d5136d9"
|
59
14
|
# SHA-256 hash provided on http://zlib.net/
|
60
15
|
|
61
16
|
libiconv:
|
62
|
-
version: "1.
|
63
|
-
sha256: "
|
64
|
-
# gpg: Signature made Fri
|
17
|
+
version: "1.16"
|
18
|
+
sha256: "e6a1b1b589654277ee790cce3734f07876ac4ccfaecbee8afa0b649cf529cc04"
|
19
|
+
# gpg: Signature made Fri 26 Apr 2019 03:36:38 PM EDT
|
65
20
|
# gpg: using RSA key 4F494A942E4616C2
|
66
|
-
# gpg: Good signature from "Bruno Haible (Open Source Development) <bruno@clisp.org>" [
|
67
|
-
# gpg:
|
68
|
-
# gpg: There is no indication that the signature belongs to the owner.
|
21
|
+
# gpg: Good signature from "Bruno Haible (Open Source Development) <bruno@clisp.org>" [expired]
|
22
|
+
# gpg: Note: This key has expired!
|
69
23
|
# Primary key fingerprint: 68D9 4D8A AEEA D48A E7DC 5B90 4F49 4A94 2E46 16C2
|