nokogiri 1.0.0 → 1.6.8.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/.autotest +26 -0
- data/.cross_rubies +9 -0
- data/.editorconfig +17 -0
- data/.gemtest +0 -0
- data/.travis.yml +51 -0
- data/CHANGELOG.rdoc +1160 -0
- data/CONTRIBUTING.md +42 -0
- data/C_CODING_STYLE.rdoc +33 -0
- data/Gemfile +22 -0
- data/LICENSE.txt +31 -0
- data/Manifest.txt +284 -40
- data/README.md +166 -0
- data/ROADMAP.md +111 -0
- data/Rakefile +310 -199
- data/STANDARD_RESPONSES.md +47 -0
- data/Y_U_NO_GEMSPEC.md +155 -0
- data/appveyor.yml +22 -0
- data/bin/nokogiri +118 -0
- data/build_all +45 -0
- data/dependencies.yml +29 -0
- data/ext/nokogiri/depend +358 -0
- data/ext/nokogiri/extconf.rb +664 -34
- data/ext/nokogiri/html_document.c +120 -33
- data/ext/nokogiri/html_document.h +1 -1
- data/ext/nokogiri/html_element_description.c +279 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +116 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/html_sax_push_parser.c +87 -0
- data/ext/nokogiri/html_sax_push_parser.h +9 -0
- data/ext/nokogiri/nokogiri.c +145 -0
- data/ext/nokogiri/nokogiri.h +131 -0
- data/ext/nokogiri/xml_attr.c +94 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +23 -19
- data/ext/nokogiri/xml_cdata.h +1 -1
- data/ext/nokogiri/xml_comment.c +69 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +501 -54
- data/ext/nokogiri/xml_document.h +14 -1
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +109 -24
- data/ext/nokogiri/xml_dtd.h +3 -1
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +110 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +52 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +60 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_namespace.c +117 -0
- data/ext/nokogiri/xml_namespace.h +13 -0
- data/ext/nokogiri/xml_node.c +1285 -315
- data/ext/nokogiri/xml_node.h +4 -6
- data/ext/nokogiri/xml_node_set.c +415 -54
- data/ext/nokogiri/xml_node_set.h +6 -2
- data/ext/nokogiri/xml_processing_instruction.c +56 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +316 -77
- data/ext/nokogiri/xml_reader.h +1 -1
- data/ext/nokogiri/xml_relax_ng.c +161 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +215 -80
- data/ext/nokogiri/xml_sax_parser.h +30 -1
- data/ext/nokogiri/xml_sax_parser_context.c +262 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +115 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +205 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +45 -175
- data/ext/nokogiri/xml_syntax_error.h +4 -2
- data/ext/nokogiri/xml_text.c +37 -14
- data/ext/nokogiri/xml_text.h +1 -1
- data/ext/nokogiri/xml_xpath_context.c +230 -13
- data/ext/nokogiri/xml_xpath_context.h +2 -1
- data/ext/nokogiri/xslt_stylesheet.c +196 -34
- data/ext/nokogiri/xslt_stylesheet.h +6 -1
- data/lib/nokogiri/css/node.rb +18 -61
- data/lib/nokogiri/css/parser.rb +725 -17
- data/lib/nokogiri/css/parser.y +126 -63
- data/lib/nokogiri/css/parser_extras.rb +91 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +148 -5
- data/lib/nokogiri/css/tokenizer.rex +31 -39
- data/lib/nokogiri/css/xpath_visitor.rb +109 -51
- data/lib/nokogiri/css.rb +24 -3
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/html/builder.rb +27 -1
- data/lib/nokogiri/html/document.rb +329 -3
- data/lib/nokogiri/html/document_fragment.rb +39 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/element_description_defaults.rb +671 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +35 -4
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/html/sax/push_parser.rb +36 -0
- data/lib/nokogiri/html.rb +18 -76
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +106 -1
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +395 -31
- data/lib/nokogiri/xml/cdata.rb +4 -2
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +267 -12
- data/lib/nokogiri/xml/document_fragment.rb +149 -0
- data/lib/nokogiri/xml/dtd.rb +27 -1
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node/save_options.rb +61 -0
- data/lib/nokogiri/xml/node.rb +748 -109
- data/lib/nokogiri/xml/node_set.rb +200 -72
- data/lib/nokogiri/xml/parse_options.rb +120 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +102 -4
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax/document.rb +114 -2
- data/lib/nokogiri/xml/sax/parser.rb +97 -7
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/sax.rb +2 -7
- data/lib/nokogiri/xml/schema.rb +63 -0
- data/lib/nokogiri/xml/searchable.rb +221 -0
- data/lib/nokogiri/xml/syntax_error.rb +27 -1
- data/lib/nokogiri/xml/text.rb +4 -1
- data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
- data/lib/nokogiri/xml/xpath.rb +4 -0
- data/lib/nokogiri/xml/xpath_context.rb +3 -1
- data/lib/nokogiri/xml.rb +45 -38
- data/lib/nokogiri/xslt/stylesheet.rb +19 -0
- data/lib/nokogiri/xslt.rb +47 -2
- data/lib/nokogiri.rb +117 -24
- data/lib/xsd/xmlparser/nokogiri.rb +102 -0
- data/patches/sort-patches-by-date +25 -0
- data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
- data/suppressions/README.txt +1 -0
- data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
- data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
- data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
- data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
- data/tasks/test.rb +100 -0
- data/test/css/test_nthiness.rb +73 -6
- data/test/css/test_parser.rb +184 -39
- data/test/css/test_tokenizer.rb +72 -19
- data/test/css/test_xpath_visitor.rb +44 -2
- data/test/decorators/test_slop.rb +20 -0
- data/test/files/2ch.html +108 -0
- data/test/files/GH_1042.html +18 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/atom.xml +344 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/bogus.xml +0 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/encoding.html +82 -0
- data/test/files/encoding.xhtml +84 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/metacharset.html +10 -0
- data/test/files/namespace_pressure_test.xml +1684 -0
- data/test/files/noencoding.html +47 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/saml/saml20assertion_schema.xsd +283 -0
- data/test/files/saml/saml20protocol_schema.xsd +302 -0
- data/test/files/saml/xenc_schema.xsd +146 -0
- data/test/files/saml/xmldsig_schema.xsd +318 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/shift_jis_no_charset.html +9 -0
- data/test/files/slow-xpath.xml +25509 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/test_document_url/bar.xml +2 -0
- data/test/files/test_document_url/document.dtd +4 -0
- data/test/files/test_document_url/document.xml +6 -0
- data/test/files/tlm.html +2 -1
- data/test/files/to_be_xincluded.xml +2 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/files/xinclude.xml +4 -0
- data/test/helper.rb +124 -13
- data/test/html/sax/test_parser.rb +118 -4
- data/test/html/sax/test_parser_context.rb +46 -0
- data/test/html/sax/test_push_parser.rb +87 -0
- data/test/html/test_builder.rb +94 -8
- data/test/html/test_document.rb +626 -11
- data/test/html/test_document_encoding.rb +145 -0
- data/test/html/test_document_fragment.rb +301 -0
- data/test/html/test_element_description.rb +105 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +212 -0
- data/test/html/test_node_encoding.rb +85 -0
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
- data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
- data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
- data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
- data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
- data/test/namespaces/test_namespaces_preservation.rb +31 -0
- data/test/test_convert_xpath.rb +2 -47
- data/test/test_css_cache.rb +45 -0
- data/test/test_encoding_handler.rb +48 -0
- data/test/test_memory_leak.rb +156 -0
- data/test/test_nokogiri.rb +103 -1
- data/test/test_soap4r_sax.rb +52 -0
- data/test/test_xslt_transforms.rb +293 -8
- data/test/xml/node/test_save_options.rb +28 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +309 -8
- data/test/xml/sax/test_parser_context.rb +115 -0
- data/test/xml/sax/test_push_parser.rb +157 -0
- data/test/xml/test_attr.rb +67 -0
- data/test/xml/test_attribute_decl.rb +86 -0
- data/test/xml/test_builder.rb +327 -2
- data/test/xml/test_c14n.rb +180 -0
- data/test/xml/test_cdata.rb +32 -2
- data/test/xml/test_comment.rb +40 -0
- data/test/xml/test_document.rb +846 -35
- data/test/xml/test_document_encoding.rb +31 -0
- data/test/xml/test_document_fragment.rb +271 -0
- data/test/xml/test_dtd.rb +153 -9
- data/test/xml/test_dtd_encoding.rb +31 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +122 -0
- data/test/xml/test_entity_reference.rb +251 -0
- data/test/xml/test_namespace.rb +96 -0
- data/test/xml/test_node.rb +1126 -105
- data/test/xml/test_node_attributes.rb +115 -0
- data/test/xml/test_node_encoding.rb +69 -0
- data/test/xml/test_node_inheritance.rb +32 -0
- data/test/xml/test_node_reparenting.rb +549 -0
- data/test/xml/test_node_set.rb +668 -9
- data/test/xml/test_parse_options.rb +64 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader.rb +589 -0
- data/test/xml/test_reader_encoding.rb +134 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +142 -0
- data/test/xml/test_syntax_error.rb +30 -0
- data/test/xml/test_text.rb +49 -2
- data/test/xml/test_unparented_node.rb +440 -0
- data/test/xml/test_xinclude.rb +83 -0
- data/test/xml/test_xpath.rb +445 -0
- data/test/xslt/test_custom_functions.rb +133 -0
- data/test/xslt/test_exception_handling.rb +37 -0
- data/test_all +107 -0
- metadata +459 -115
- data/History.txt +0 -6
- data/README.ja.txt +0 -86
- data/README.txt +0 -87
- data/ext/nokogiri/html_sax_parser.c +0 -32
- data/ext/nokogiri/html_sax_parser.h +0 -11
- data/ext/nokogiri/native.c +0 -40
- data/ext/nokogiri/native.h +0 -51
- data/ext/nokogiri/xml_xpath.c +0 -46
- data/ext/nokogiri/xml_xpath.h +0 -11
- data/lib/nokogiri/css/generated_parser.rb +0 -653
- data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
- data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
- data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
- data/lib/nokogiri/decorators/hpricot.rb +0 -3
- data/lib/nokogiri/decorators.rb +0 -1
- data/lib/nokogiri/hpricot.rb +0 -47
- data/lib/nokogiri/xml/after_handler.rb +0 -18
- data/lib/nokogiri/xml/before_handler.rb +0 -32
- data/lib/nokogiri/xml/element.rb +0 -6
- data/lib/nokogiri/xml/entity_declaration.rb +0 -9
- data/nokogiri.gemspec +0 -34
- data/test/hpricot/files/basic.xhtml +0 -17
- data/test/hpricot/files/boingboing.html +0 -2266
- data/test/hpricot/files/cy0.html +0 -3653
- data/test/hpricot/files/immob.html +0 -400
- data/test/hpricot/files/pace_application.html +0 -1320
- data/test/hpricot/files/tenderlove.html +0 -16
- data/test/hpricot/files/uswebgen.html +0 -220
- data/test/hpricot/files/utf8.html +0 -1054
- data/test/hpricot/files/week9.html +0 -1723
- data/test/hpricot/files/why.xml +0 -19
- data/test/hpricot/load_files.rb +0 -7
- data/test/hpricot/test_alter.rb +0 -67
- data/test/hpricot/test_builder.rb +0 -27
- data/test/hpricot/test_parser.rb +0 -423
- data/test/hpricot/test_paths.rb +0 -15
- data/test/hpricot/test_preserved.rb +0 -78
- data/test/hpricot/test_xml.rb +0 -30
- data/test/test_reader.rb +0 -222
@@ -1,24 +1,37 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module XML
|
3
|
+
####
|
4
|
+
# A NodeSet contains a list of Nokogiri::XML::Node objects. Typically
|
5
|
+
# a NodeSet is return as a result of searching a Document via
|
6
|
+
# Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath
|
3
7
|
class NodeSet
|
8
|
+
include Nokogiri::XML::Searchable
|
4
9
|
include Enumerable
|
5
10
|
|
11
|
+
# The Document this NodeSet is associated with
|
6
12
|
attr_accessor :document
|
7
13
|
|
8
|
-
|
14
|
+
# Create a NodeSet with +document+ defaulting to +list+
|
15
|
+
def initialize document, list = []
|
16
|
+
@document = document
|
17
|
+
document.decorate(self)
|
18
|
+
list.each { |x| self << x }
|
9
19
|
yield self if block_given?
|
10
20
|
end
|
11
21
|
|
12
22
|
###
|
13
23
|
# Get the first element of the NodeSet.
|
14
|
-
def first
|
15
|
-
self[0]
|
24
|
+
def first n = nil
|
25
|
+
return self[0] unless n
|
26
|
+
list = []
|
27
|
+
n.times { |i| list << self[i] }
|
28
|
+
list
|
16
29
|
end
|
17
30
|
|
18
31
|
###
|
19
32
|
# Get the last element of the NodeSet.
|
20
33
|
def last
|
21
|
-
self[
|
34
|
+
self[-1]
|
22
35
|
end
|
23
36
|
|
24
37
|
###
|
@@ -27,6 +40,13 @@ module Nokogiri
|
|
27
40
|
length == 0
|
28
41
|
end
|
29
42
|
|
43
|
+
###
|
44
|
+
# Returns the index of the first node in self that is == to +node+. Returns nil if no match is found.
|
45
|
+
def index(node)
|
46
|
+
each_with_index { |member, j| return j if member == node }
|
47
|
+
nil
|
48
|
+
end
|
49
|
+
|
30
50
|
###
|
31
51
|
# Insert +datum+ before the first Node in this NodeSet
|
32
52
|
def before datum
|
@@ -39,68 +59,98 @@ module Nokogiri
|
|
39
59
|
last.after datum
|
40
60
|
end
|
41
61
|
|
62
|
+
alias :<< :push
|
63
|
+
alias :remove :unlink
|
64
|
+
|
42
65
|
###
|
43
|
-
#
|
44
|
-
|
45
|
-
|
66
|
+
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
|
67
|
+
#
|
68
|
+
# Search this node set for CSS +rules+. +rules+ must be one or more CSS
|
69
|
+
# selectors. For example:
|
70
|
+
#
|
71
|
+
# For more information see Nokogiri::XML::Searchable#css
|
72
|
+
def css *args
|
73
|
+
rules, handler, ns, _ = extract_params(args)
|
74
|
+
|
75
|
+
inject(NodeSet.new(document)) do |set, node|
|
76
|
+
set += css_internal node, rules, handler, ns
|
77
|
+
end
|
46
78
|
end
|
47
79
|
|
48
80
|
###
|
49
|
-
#
|
50
|
-
#
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
81
|
+
# call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
|
82
|
+
#
|
83
|
+
# Search this node set for XPath +paths+. +paths+ must be one or more XPath
|
84
|
+
# queries.
|
85
|
+
#
|
86
|
+
# For more information see Nokogiri::XML::Searchable#xpath
|
87
|
+
def xpath *args
|
88
|
+
paths, handler, ns, binds = extract_params(args)
|
89
|
+
|
90
|
+
inject(NodeSet.new(document)) do |set, node|
|
91
|
+
set += node.xpath(*(paths + [ns, handler, binds].compact))
|
92
|
+
end
|
55
93
|
end
|
56
|
-
alias :remove :unlink
|
57
94
|
|
58
95
|
###
|
59
|
-
# Search this
|
60
|
-
def
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
96
|
+
# Search this NodeSet's nodes' immediate children using CSS selector +selector+
|
97
|
+
def > selector
|
98
|
+
ns = document.root.namespaces
|
99
|
+
xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
|
100
|
+
end
|
101
|
+
|
102
|
+
###
|
103
|
+
# call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
|
104
|
+
#
|
105
|
+
# Search this object for +paths+, and return only the first
|
106
|
+
# result. +paths+ must be one or more XPath or CSS queries.
|
107
|
+
#
|
108
|
+
# See Searchable#search for more information.
|
109
|
+
#
|
110
|
+
# Or, if passed an integer, index into the NodeSet:
|
111
|
+
#
|
112
|
+
# node_set.at(3) # same as node_set[3]
|
113
|
+
#
|
114
|
+
def at *args
|
115
|
+
if args.length == 1 && args.first.is_a?(Numeric)
|
116
|
+
return self[args.first]
|
67
117
|
end
|
68
|
-
|
69
|
-
|
118
|
+
|
119
|
+
super(*args)
|
70
120
|
end
|
71
|
-
alias
|
72
|
-
alias :xpath :search
|
73
|
-
alias :css :search
|
121
|
+
alias :% :at
|
74
122
|
|
75
123
|
###
|
76
|
-
#
|
77
|
-
|
78
|
-
|
79
|
-
return self[path] if path.is_a?(Numeric)
|
80
|
-
search(path, ns).first
|
124
|
+
# Filter this list for nodes that match +expr+
|
125
|
+
def filter expr
|
126
|
+
find_all { |node| node.matches?(expr) }
|
81
127
|
end
|
82
128
|
|
83
129
|
###
|
84
130
|
# Append the class attribute +name+ to all Node objects in the NodeSet.
|
85
131
|
def add_class name
|
86
132
|
each do |el|
|
87
|
-
|
88
|
-
|
89
|
-
el.set_attribute('class', classes.push(name).uniq.join(" "))
|
133
|
+
classes = el['class'].to_s.split(/\s+/)
|
134
|
+
el['class'] = classes.push(name).uniq.join " "
|
90
135
|
end
|
91
136
|
self
|
92
137
|
end
|
93
138
|
|
94
139
|
###
|
95
140
|
# Remove the class attribute +name+ from all Node objects in the NodeSet.
|
141
|
+
# If +name+ is nil, remove the class attribute from all Nodes in the
|
142
|
+
# NodeSet.
|
96
143
|
def remove_class name = nil
|
97
144
|
each do |el|
|
98
|
-
next unless el.respond_to? :get_attribute
|
99
145
|
if name
|
100
|
-
classes = el
|
101
|
-
|
146
|
+
classes = el['class'].to_s.split(/\s+/)
|
147
|
+
if classes.empty?
|
148
|
+
el.delete 'class'
|
149
|
+
else
|
150
|
+
el['class'] = (classes - [name]).uniq.join " "
|
151
|
+
end
|
102
152
|
else
|
103
|
-
el.
|
153
|
+
el.delete "class"
|
104
154
|
end
|
105
155
|
end
|
106
156
|
self
|
@@ -110,73 +160,151 @@ module Nokogiri
|
|
110
160
|
# Set the attribute +key+ to +value+ or the return value of +blk+
|
111
161
|
# on all Node objects in the NodeSet.
|
112
162
|
def attr key, value = nil, &blk
|
113
|
-
|
114
|
-
|
115
|
-
el.set_attribute(key, value || blk[el])
|
116
|
-
end
|
117
|
-
return self
|
118
|
-
end
|
119
|
-
if key.is_a? Hash
|
120
|
-
key.each { |k,v| self.attr(k,v) }
|
121
|
-
return self
|
122
|
-
else
|
123
|
-
return self[0].get_attribute(key)
|
163
|
+
unless Hash === key || key && (value || blk)
|
164
|
+
return first.attribute(key)
|
124
165
|
end
|
166
|
+
|
167
|
+
hash = key.is_a?(Hash) ? key : { key => value }
|
168
|
+
|
169
|
+
hash.each { |k,v| each { |el| el[k] = v || blk[el] } }
|
170
|
+
|
171
|
+
self
|
125
172
|
end
|
126
|
-
|
173
|
+
alias :set :attr
|
174
|
+
alias :attribute :attr
|
127
175
|
|
128
176
|
###
|
129
177
|
# Remove the attributed named +name+ from all Node objects in the NodeSet
|
130
178
|
def remove_attr name
|
131
|
-
each
|
132
|
-
|
133
|
-
el.remove_attribute(name)
|
134
|
-
end
|
135
|
-
self
|
179
|
+
each { |el| el.delete name }
|
180
|
+
self
|
136
181
|
end
|
137
182
|
|
138
183
|
###
|
139
184
|
# Iterate over each node, yielding to +block+
|
140
185
|
def each(&block)
|
141
|
-
|
142
|
-
while x < length
|
186
|
+
0.upto(length - 1) do |x|
|
143
187
|
yield self[x]
|
144
|
-
x += 1
|
145
188
|
end
|
146
189
|
end
|
147
190
|
|
148
191
|
###
|
149
192
|
# Get the inner text of all contained Node objects
|
150
193
|
def inner_text
|
151
|
-
collect
|
194
|
+
collect(&:inner_text).join('')
|
152
195
|
end
|
153
196
|
alias :text :inner_text
|
154
197
|
|
198
|
+
###
|
199
|
+
# Get the inner html of all contained Node objects
|
200
|
+
def inner_html *args
|
201
|
+
collect{|j| j.inner_html(*args) }.join('')
|
202
|
+
end
|
203
|
+
|
155
204
|
###
|
156
205
|
# Wrap this NodeSet with +html+ or the results of the builder in +blk+
|
157
206
|
def wrap(html, &blk)
|
158
207
|
each do |j|
|
159
|
-
new_parent =
|
160
|
-
j.
|
161
|
-
|
162
|
-
if nest.child
|
163
|
-
nest = nest.child until nest.child.nil?
|
164
|
-
end
|
165
|
-
j.parent = nest
|
208
|
+
new_parent = document.parse(html).first
|
209
|
+
j.add_next_sibling(new_parent)
|
210
|
+
new_parent.add_child(j)
|
166
211
|
end
|
167
212
|
self
|
168
213
|
end
|
169
214
|
|
215
|
+
###
|
216
|
+
# Convert this NodeSet to a string.
|
170
217
|
def to_s
|
171
|
-
map
|
218
|
+
map(&:to_s).join
|
172
219
|
end
|
173
220
|
|
174
|
-
|
175
|
-
|
221
|
+
###
|
222
|
+
# Convert this NodeSet to HTML
|
223
|
+
def to_html *args
|
224
|
+
if Nokogiri.jruby?
|
225
|
+
options = args.first.is_a?(Hash) ? args.shift : {}
|
226
|
+
if !options[:save_with]
|
227
|
+
options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_HTML
|
228
|
+
end
|
229
|
+
args.insert(0, options)
|
230
|
+
end
|
231
|
+
map { |x| x.to_html(*args) }.join
|
232
|
+
end
|
233
|
+
|
234
|
+
###
|
235
|
+
# Convert this NodeSet to XHTML
|
236
|
+
def to_xhtml *args
|
237
|
+
map { |x| x.to_xhtml(*args) }.join
|
238
|
+
end
|
239
|
+
|
240
|
+
###
|
241
|
+
# Convert this NodeSet to XML
|
242
|
+
def to_xml *args
|
243
|
+
map { |x| x.to_xml(*args) }.join
|
244
|
+
end
|
245
|
+
|
246
|
+
alias :size :length
|
247
|
+
alias :to_ary :to_a
|
248
|
+
|
249
|
+
###
|
250
|
+
# Removes the last element from set and returns it, or +nil+ if
|
251
|
+
# the set is empty
|
252
|
+
def pop
|
253
|
+
return nil if length == 0
|
254
|
+
delete last
|
255
|
+
end
|
256
|
+
|
257
|
+
###
|
258
|
+
# Returns the first element of the NodeSet and removes it. Returns
|
259
|
+
# +nil+ if the set is empty.
|
260
|
+
def shift
|
261
|
+
return nil if length == 0
|
262
|
+
delete first
|
263
|
+
end
|
264
|
+
|
265
|
+
###
|
266
|
+
# Equality -- Two NodeSets are equal if the contain the same number
|
267
|
+
# of elements and if each element is equal to the corresponding
|
268
|
+
# element in the other NodeSet
|
269
|
+
def == other
|
270
|
+
return false unless other.is_a?(Nokogiri::XML::NodeSet)
|
271
|
+
return false unless length == other.length
|
272
|
+
each_with_index do |node, i|
|
273
|
+
return false unless node == other[i]
|
274
|
+
end
|
275
|
+
true
|
276
|
+
end
|
277
|
+
|
278
|
+
###
|
279
|
+
# Returns a new NodeSet containing all the children of all the nodes in
|
280
|
+
# the NodeSet
|
281
|
+
def children
|
282
|
+
inject(NodeSet.new(document)) { |set, node| set += node.children }
|
176
283
|
end
|
177
284
|
|
178
|
-
|
179
|
-
|
285
|
+
###
|
286
|
+
# Returns a new NodeSet containing all the nodes in the NodeSet
|
287
|
+
# in reverse order
|
288
|
+
def reverse
|
289
|
+
node_set = NodeSet.new(document)
|
290
|
+
(length - 1).downto(0) do |x|
|
291
|
+
node_set.push self[x]
|
292
|
+
end
|
293
|
+
node_set
|
294
|
+
end
|
295
|
+
|
296
|
+
###
|
297
|
+
# Return a nicely formated string representation
|
298
|
+
def inspect
|
299
|
+
"[#{map(&:inspect).join ', '}]"
|
300
|
+
end
|
301
|
+
|
302
|
+
alias :+ :|
|
303
|
+
|
304
|
+
private
|
305
|
+
|
306
|
+
def implied_xpath_contexts # :nodoc:
|
307
|
+
[".//", "self::"]
|
180
308
|
end
|
181
309
|
end
|
182
310
|
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
###
|
4
|
+
# Parse options for passing to Nokogiri.XML or Nokogiri.HTML
|
5
|
+
#
|
6
|
+
# == Building combinations of parse options
|
7
|
+
# You can build your own combinations of these parse options by using any of the following methods:
|
8
|
+
# *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options. All examples use Ruby 2 optional parameter syntax.
|
9
|
+
# [Ruby's bitwise operators] You can use the Ruby bitwise operators to set various combinations.
|
10
|
+
# <code>Nokogiri.XML('<content>Chapter 1</content', options: Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))</code>
|
11
|
+
# [Method chaining] Every option has an equivalent method in lowercase. You can chain these methods together to set various combinations.
|
12
|
+
# <code>Nokogiri.XML('<content>Chapter 1</content', options: Nokogiri::XML::ParseOptions.new.recover.noent)</code>
|
13
|
+
# [Using Ruby Blocks] You can also setup parse combinations in the block passed to Nokogiri.XML or Nokogiri.HTML
|
14
|
+
# <code>Nokogiri.XML('<content>Chapter 1</content') {|config| config.recover.noent}</code>
|
15
|
+
#
|
16
|
+
# == Removing particular parse options
|
17
|
+
# You can also remove options from an instance of +ParseOptions+ dynamically.
|
18
|
+
# Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these methods on an instance of +ParseOptions+ to remove the option.
|
19
|
+
# Note that this is not available for +STRICT+.
|
20
|
+
#
|
21
|
+
# # Setting the RECOVER & NOENT options...
|
22
|
+
# options = Nokogiri::XML::ParseOptions.new.recover.noent
|
23
|
+
# # later...
|
24
|
+
# options.norecover # Removes the Nokogiri::XML::ParseOptions::RECOVER option
|
25
|
+
# options.nonoent # Removes the Nokogiri::XML::ParseOptions::NOENT option
|
26
|
+
#
|
27
|
+
class ParseOptions
|
28
|
+
# Strict parsing
|
29
|
+
STRICT = 0
|
30
|
+
# Recover from errors
|
31
|
+
RECOVER = 1 << 0
|
32
|
+
# Substitute entities
|
33
|
+
NOENT = 1 << 1
|
34
|
+
# Load external subsets
|
35
|
+
DTDLOAD = 1 << 2
|
36
|
+
# Default DTD attributes
|
37
|
+
DTDATTR = 1 << 3
|
38
|
+
# validate with the DTD
|
39
|
+
DTDVALID = 1 << 4
|
40
|
+
# suppress error reports
|
41
|
+
NOERROR = 1 << 5
|
42
|
+
# suppress warning reports
|
43
|
+
NOWARNING = 1 << 6
|
44
|
+
# pedantic error reporting
|
45
|
+
PEDANTIC = 1 << 7
|
46
|
+
# remove blank nodes
|
47
|
+
NOBLANKS = 1 << 8
|
48
|
+
# use the SAX1 interface internally
|
49
|
+
SAX1 = 1 << 9
|
50
|
+
# Implement XInclude substitution
|
51
|
+
XINCLUDE = 1 << 10
|
52
|
+
# Forbid network access. Recommended for dealing with untrusted documents.
|
53
|
+
NONET = 1 << 11
|
54
|
+
# Do not reuse the context dictionary
|
55
|
+
NODICT = 1 << 12
|
56
|
+
# remove redundant namespaces declarations
|
57
|
+
NSCLEAN = 1 << 13
|
58
|
+
# merge CDATA as text nodes
|
59
|
+
NOCDATA = 1 << 14
|
60
|
+
# do not generate XINCLUDE START/END nodes
|
61
|
+
NOXINCNODE = 1 << 15
|
62
|
+
# compact small text nodes; no modification of the tree allowed afterwards (will possibly crash if you try to modify the tree)
|
63
|
+
COMPACT = 1 << 16
|
64
|
+
# parse using XML-1.0 before update 5
|
65
|
+
OLD10 = 1 << 17
|
66
|
+
# do not fixup XINCLUDE xml:base uris
|
67
|
+
NOBASEFIX = 1 << 18
|
68
|
+
# relax any hardcoded limit from the parser
|
69
|
+
HUGE = 1 << 19
|
70
|
+
|
71
|
+
# the default options used for parsing XML documents
|
72
|
+
DEFAULT_XML = RECOVER | NONET
|
73
|
+
# the default options used for parsing HTML documents
|
74
|
+
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
|
75
|
+
|
76
|
+
attr_accessor :options
|
77
|
+
def initialize options = STRICT
|
78
|
+
@options = options
|
79
|
+
end
|
80
|
+
|
81
|
+
constants.each do |constant|
|
82
|
+
next if constant.to_sym == :STRICT
|
83
|
+
class_eval %{
|
84
|
+
def #{constant.downcase}
|
85
|
+
@options |= #{constant}
|
86
|
+
self
|
87
|
+
end
|
88
|
+
|
89
|
+
def no#{constant.downcase}
|
90
|
+
@options &= ~#{constant}
|
91
|
+
self
|
92
|
+
end
|
93
|
+
|
94
|
+
def #{constant.downcase}?
|
95
|
+
#{constant} & @options == #{constant}
|
96
|
+
end
|
97
|
+
}
|
98
|
+
end
|
99
|
+
|
100
|
+
def strict
|
101
|
+
@options &= ~RECOVER
|
102
|
+
self
|
103
|
+
end
|
104
|
+
|
105
|
+
def strict?
|
106
|
+
@options & RECOVER == STRICT
|
107
|
+
end
|
108
|
+
|
109
|
+
alias :to_i :options
|
110
|
+
|
111
|
+
def inspect
|
112
|
+
options = []
|
113
|
+
self.class.constants.each do |k|
|
114
|
+
options << k.downcase if send(:"#{k.downcase}?")
|
115
|
+
end
|
116
|
+
super.sub(/>$/, " " + options.join(', ') + ">")
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
module PP
|
4
|
+
module CharacterData
|
5
|
+
def pretty_print pp # :nodoc:
|
6
|
+
nice_name = self.class.name.split('::').last
|
7
|
+
pp.group(2, "#(#{nice_name} ", ')') do
|
8
|
+
pp.pp text
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def inspect # :nodoc:
|
13
|
+
"#<#{self.class.name}:#{sprintf("0x%x",object_id)} #{text.inspect}>"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
module PP
|
4
|
+
module Node
|
5
|
+
def inspect # :nodoc:
|
6
|
+
attributes = inspect_attributes.reject { |x|
|
7
|
+
begin
|
8
|
+
attribute = send x
|
9
|
+
!attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
|
10
|
+
rescue NoMethodError
|
11
|
+
true
|
12
|
+
end
|
13
|
+
}.map { |attribute|
|
14
|
+
"#{attribute.to_s.sub(/_\w+/, 's')}=#{send(attribute).inspect}"
|
15
|
+
}.join ' '
|
16
|
+
"#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{attributes}>"
|
17
|
+
end
|
18
|
+
|
19
|
+
def pretty_print pp # :nodoc:
|
20
|
+
nice_name = self.class.name.split('::').last
|
21
|
+
pp.group(2, "#(#{nice_name}:#{sprintf("0x%x", object_id)} {", '})') do
|
22
|
+
|
23
|
+
pp.breakable
|
24
|
+
attrs = inspect_attributes.map { |t|
|
25
|
+
[t, send(t)] if respond_to?(t)
|
26
|
+
}.compact.find_all { |x|
|
27
|
+
if x.last
|
28
|
+
if [:attribute_nodes, :children].include? x.first
|
29
|
+
!x.last.empty?
|
30
|
+
else
|
31
|
+
true
|
32
|
+
end
|
33
|
+
end
|
34
|
+
}
|
35
|
+
|
36
|
+
pp.seplist(attrs) do |v|
|
37
|
+
if [:attribute_nodes, :children].include? v.first
|
38
|
+
pp.group(2, "#{v.first.to_s.sub(/_\w+$/, 's')} = [", "]") do
|
39
|
+
pp.breakable
|
40
|
+
pp.seplist(v.last) do |item|
|
41
|
+
pp.pp item
|
42
|
+
end
|
43
|
+
end
|
44
|
+
else
|
45
|
+
pp.text "#{v.first} = "
|
46
|
+
pp.pp v.last
|
47
|
+
end
|
48
|
+
end
|
49
|
+
pp.breakable
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/lib/nokogiri/xml/reader.rb
CHANGED
@@ -1,14 +1,112 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module XML
|
3
|
+
###
|
4
|
+
# Nokogiri::XML::Reader parses an XML document similar to the way a cursor
|
5
|
+
# would move. The Reader is given an XML document, and yields nodes
|
6
|
+
# to an each block.
|
7
|
+
#
|
8
|
+
# Here is an example of usage:
|
9
|
+
#
|
10
|
+
# reader = Nokogiri::XML::Reader(<<-eoxml)
|
11
|
+
# <x xmlns:tenderlove='http://tenderlovemaking.com/'>
|
12
|
+
# <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
|
13
|
+
# </x>
|
14
|
+
# eoxml
|
15
|
+
#
|
16
|
+
# reader.each do |node|
|
17
|
+
#
|
18
|
+
# # node is an instance of Nokogiri::XML::Reader
|
19
|
+
# puts node.name
|
20
|
+
#
|
21
|
+
# end
|
22
|
+
#
|
23
|
+
# Note that Nokogiri::XML::Reader#each can only be called once!! Once
|
24
|
+
# the cursor moves through the entire document, you must parse the
|
25
|
+
# document again. So make sure that you capture any information you
|
26
|
+
# need during the first iteration.
|
27
|
+
#
|
28
|
+
# The Reader parser is good for when you need the speed of a SAX parser,
|
29
|
+
# but do not want to write a Document handler.
|
3
30
|
class Reader
|
4
31
|
include Enumerable
|
5
32
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
33
|
+
TYPE_NONE = 0
|
34
|
+
# Element node type
|
35
|
+
TYPE_ELEMENT = 1
|
36
|
+
# Attribute node type
|
37
|
+
TYPE_ATTRIBUTE = 2
|
38
|
+
# Text node type
|
39
|
+
TYPE_TEXT = 3
|
40
|
+
# CDATA node type
|
41
|
+
TYPE_CDATA = 4
|
42
|
+
# Entity Reference node type
|
43
|
+
TYPE_ENTITY_REFERENCE = 5
|
44
|
+
# Entity node type
|
45
|
+
TYPE_ENTITY = 6
|
46
|
+
# PI node type
|
47
|
+
TYPE_PROCESSING_INSTRUCTION = 7
|
48
|
+
# Comment node type
|
49
|
+
TYPE_COMMENT = 8
|
50
|
+
# Document node type
|
51
|
+
TYPE_DOCUMENT = 9
|
52
|
+
# Document Type node type
|
53
|
+
TYPE_DOCUMENT_TYPE = 10
|
54
|
+
# Document Fragment node type
|
55
|
+
TYPE_DOCUMENT_FRAGMENT = 11
|
56
|
+
# Notation node type
|
57
|
+
TYPE_NOTATION = 12
|
58
|
+
# Whitespace node type
|
59
|
+
TYPE_WHITESPACE = 13
|
60
|
+
# Significant Whitespace node type
|
61
|
+
TYPE_SIGNIFICANT_WHITESPACE = 14
|
62
|
+
# Element end node type
|
63
|
+
TYPE_END_ELEMENT = 15
|
64
|
+
# Entity end node type
|
65
|
+
TYPE_END_ENTITY = 16
|
66
|
+
# XML Declaration node type
|
67
|
+
TYPE_XML_DECLARATION = 17
|
68
|
+
|
69
|
+
# A list of errors encountered while parsing
|
70
|
+
attr_accessor :errors
|
71
|
+
|
72
|
+
# The encoding for the document
|
73
|
+
attr_reader :encoding
|
74
|
+
|
75
|
+
# The XML source
|
76
|
+
attr_reader :source
|
77
|
+
|
78
|
+
alias :self_closing? :empty_element?
|
79
|
+
|
80
|
+
def initialize source, url = nil, encoding = nil # :nodoc:
|
81
|
+
@source = source
|
82
|
+
@errors = []
|
83
|
+
@encoding = encoding
|
10
84
|
end
|
11
85
|
private :initialize
|
86
|
+
|
87
|
+
###
|
88
|
+
# Get a list of attributes for the current node.
|
89
|
+
def attributes
|
90
|
+
Hash[attribute_nodes.map { |node|
|
91
|
+
[node.name, node.to_s]
|
92
|
+
}].merge(namespaces || {})
|
93
|
+
end
|
94
|
+
|
95
|
+
###
|
96
|
+
# Get a list of attributes for the current node
|
97
|
+
def attribute_nodes
|
98
|
+
nodes = attr_nodes
|
99
|
+
nodes.each { |v| v.instance_variable_set(:@_r, self) }
|
100
|
+
nodes
|
101
|
+
end
|
102
|
+
|
103
|
+
###
|
104
|
+
# Move the cursor through the document yielding the cursor to the block
|
105
|
+
def each
|
106
|
+
while cursor = self.read
|
107
|
+
yield cursor
|
108
|
+
end
|
109
|
+
end
|
12
110
|
end
|
13
111
|
end
|
14
112
|
end
|