nokogiri 1.5.10 → 1.10.4
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/LICENSE-DEPENDENCIES.md +1614 -0
- data/LICENSE.md +9 -0
- data/README.md +198 -0
- data/bin/nokogiri +50 -10
- data/dependencies.yml +72 -0
- data/ext/nokogiri/extconf.rb +634 -92
- data/ext/nokogiri/html_document.c +8 -8
- data/ext/nokogiri/html_element_description.c +15 -15
- data/ext/nokogiri/html_entity_lookup.c +1 -1
- data/ext/nokogiri/html_sax_parser_context.c +4 -4
- data/ext/nokogiri/html_sax_push_parser.c +2 -2
- data/ext/nokogiri/nokogiri.c +20 -12
- data/ext/nokogiri/nokogiri.h +1 -44
- data/ext/nokogiri/xml_attr.c +34 -25
- data/ext/nokogiri/xml_cdata.c +12 -6
- data/ext/nokogiri/xml_comment.c +18 -3
- data/ext/nokogiri/xml_document.c +64 -32
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_encoding_handler.c +3 -3
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_io.c +11 -6
- data/ext/nokogiri/xml_namespace.c +50 -17
- data/ext/nokogiri/xml_namespace.h +3 -2
- data/ext/nokogiri/xml_node.c +459 -240
- data/ext/nokogiri/xml_node_set.c +166 -147
- data/ext/nokogiri/xml_node_set.h +2 -4
- data/ext/nokogiri/xml_processing_instruction.c +2 -2
- data/ext/nokogiri/xml_reader.c +6 -19
- data/ext/nokogiri/xml_sax_parser.c +11 -13
- data/ext/nokogiri/xml_sax_parser_context.c +41 -1
- data/ext/nokogiri/xml_sax_push_parser.c +56 -12
- data/ext/nokogiri/xml_schema.c +1 -1
- data/ext/nokogiri/xml_syntax_error.c +11 -5
- data/ext/nokogiri/xml_syntax_error.h +1 -1
- data/ext/nokogiri/xml_text.c +1 -1
- data/ext/nokogiri/xml_xpath_context.c +17 -38
- data/ext/nokogiri/xslt_stylesheet.c +10 -10
- data/lib/nokogiri/css/node.rb +0 -50
- data/lib/nokogiri/css/parser.rb +263 -233
- data/lib/nokogiri/css/parser.y +54 -40
- data/lib/nokogiri/css/tokenizer.rb +104 -103
- data/lib/nokogiri/css/tokenizer.rex +5 -5
- data/lib/nokogiri/css/xpath_visitor.rb +78 -19
- data/lib/nokogiri/decorators/slop.rb +12 -5
- data/lib/nokogiri/html/document.rb +102 -21
- data/lib/nokogiri/html/document_fragment.rb +11 -3
- data/lib/nokogiri/html/sax/parser.rb +12 -2
- data/lib/nokogiri/html/sax/push_parser.rb +22 -2
- data/lib/nokogiri/version.rb +40 -22
- data/lib/nokogiri/xml/builder.rb +34 -31
- data/lib/nokogiri/xml/document.rb +20 -14
- data/lib/nokogiri/xml/document_fragment.rb +50 -2
- data/lib/nokogiri/xml/dtd.rb +14 -4
- data/lib/nokogiri/xml/entity_reference.rb +18 -0
- data/lib/nokogiri/xml/node.rb +148 -203
- data/lib/nokogiri/xml/node_set.rb +139 -123
- data/lib/nokogiri/xml/parse_options.rb +22 -0
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/sax/parser.rb +7 -8
- data/lib/nokogiri/xml/searchable.rb +230 -0
- data/lib/nokogiri/xml/syntax_error.rb +24 -1
- data/lib/nokogiri/xml.rb +3 -1
- data/lib/nokogiri.rb +40 -24
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +78 -0
- data/patches/libxml2/0002-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0003-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxslt/0001-Fix-security-framework-bypass.patch +120 -0
- data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
- metadata +252 -388
- data/.autotest +0 -26
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +0 -785
- data/CHANGELOG.rdoc +0 -783
- data/C_CODING_STYLE.rdoc +0 -33
- data/Manifest.txt +0 -303
- data/README.ja.rdoc +0 -106
- data/README.rdoc +0 -175
- data/ROADMAP.md +0 -90
- data/Rakefile +0 -228
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/build_all +0 -105
- data/tasks/cross_compile.rb +0 -150
- data/tasks/nokogiri.org.rb +0 -24
- data/tasks/test.rb +0 -95
- data/test/css/test_nthiness.rb +0 -159
- data/test/css/test_parser.rb +0 -341
- data/test/css/test_tokenizer.rb +0 -198
- data/test/css/test_xpath_visitor.rb +0 -91
- data/test/decorators/test_slop.rb +0 -16
- data/test/files/2ch.html +0 -108
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -850
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -154
- data/test/html/sax/test_parser.rb +0 -141
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -552
- data/test/html/test_document_encoding.rb +0 -138
- data/test/html/test_document_fragment.rb +0 -261
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -196
- data/test/html/test_node_encoding.rb +0 -27
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -45
- data/test/test_encoding_handler.rb +0 -46
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -132
- data/test/test_reader.rb +0 -555
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -254
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -366
- data/test/xml/sax/test_parser_context.rb +0 -106
- data/test/xml/sax/test_push_parser.rb +0 -157
- data/test/xml/test_attr.rb +0 -64
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -306
- data/test/xml/test_c14n.rb +0 -151
- data/test/xml/test_cdata.rb +0 -48
- data/test/xml/test_comment.rb +0 -29
- data/test/xml/test_document.rb +0 -828
- data/test/xml/test_document_encoding.rb +0 -28
- data/test/xml/test_document_fragment.rb +0 -223
- data/test/xml/test_dtd.rb +0 -103
- data/test/xml/test_dtd_encoding.rb +0 -33
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -245
- data/test/xml/test_namespace.rb +0 -95
- data/test/xml/test_node.rb +0 -1137
- data/test/xml/test_node_attributes.rb +0 -96
- data/test/xml/test_node_encoding.rb +0 -107
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -374
- data/test/xml/test_node_set.rb +0 -755
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader_encoding.rb +0 -142
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -103
- data/test/xml/test_syntax_error.rb +0 -12
- data/test/xml/test_text.rb +0 -45
- data/test/xml/test_unparented_node.rb +0 -422
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -295
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
- data/test_all +0 -81
@@ -4,25 +4,26 @@ module Nokogiri
|
|
4
4
|
# The Slop decorator implements method missing such that a methods may be
|
5
5
|
# used instead of XPath or CSS. See Nokogiri.Slop
|
6
6
|
module Slop
|
7
|
+
# The default XPath search context for Slop
|
8
|
+
XPATH_PREFIX = "./"
|
9
|
+
|
7
10
|
###
|
8
11
|
# look for node with +name+. See Nokogiri.Slop
|
9
12
|
def method_missing name, *args, &block
|
10
|
-
prefix = implied_xpath_context
|
11
|
-
|
12
13
|
if args.empty?
|
13
|
-
list = xpath("#{
|
14
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
|
14
15
|
elsif args.first.is_a? Hash
|
15
16
|
hash = args.first
|
16
17
|
if hash[:css]
|
17
18
|
list = css("#{name}#{hash[:css]}")
|
18
19
|
elsif hash[:xpath]
|
19
20
|
conds = Array(hash[:xpath]).join(' and ')
|
20
|
-
list = xpath("#{
|
21
|
+
list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
|
21
22
|
end
|
22
23
|
else
|
23
24
|
CSS::Parser.without_cache do
|
24
25
|
list = xpath(
|
25
|
-
*CSS.xpath_for("#{name}#{args.first}", :prefix =>
|
26
|
+
*CSS.xpath_for("#{name}#{args.first}", :prefix => XPATH_PREFIX)
|
26
27
|
)
|
27
28
|
end
|
28
29
|
end
|
@@ -30,6 +31,12 @@ module Nokogiri
|
|
30
31
|
super if list.empty?
|
31
32
|
list.length == 1 ? list.first : list
|
32
33
|
end
|
34
|
+
|
35
|
+
def respond_to_missing? name, include_private = false
|
36
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
|
37
|
+
|
38
|
+
!list.empty?
|
39
|
+
end
|
33
40
|
end
|
34
41
|
end
|
35
42
|
end
|
@@ -5,24 +5,59 @@ module Nokogiri
|
|
5
5
|
# Get the meta tag encoding for this document. If there is no meta tag,
|
6
6
|
# then nil is returned.
|
7
7
|
def meta_encoding
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
case
|
9
|
+
when meta = at('//meta[@charset]')
|
10
|
+
meta[:charset]
|
11
|
+
when meta = meta_content_type
|
12
|
+
meta['content'][/charset\s*=\s*([\w-]+)/i, 1]
|
13
|
+
end
|
11
14
|
end
|
12
15
|
|
13
16
|
###
|
14
|
-
# Set the meta tag encoding for this document.
|
15
|
-
#
|
17
|
+
# Set the meta tag encoding for this document.
|
18
|
+
#
|
19
|
+
# If an meta encoding tag is already present, its content is
|
20
|
+
# replaced with the given text.
|
21
|
+
#
|
22
|
+
# Otherwise, this method tries to create one at an appropriate
|
23
|
+
# place supplying head and/or html elements as necessary, which
|
24
|
+
# is inside a head element if any, and before any text node or
|
25
|
+
# content element (typically <body>) if any.
|
26
|
+
#
|
27
|
+
# The result when trying to set an encoding that is different
|
28
|
+
# from the document encoding is undefined.
|
29
|
+
#
|
30
|
+
# Beware in CRuby, that libxml2 automatically inserts a meta tag
|
31
|
+
# into a head element.
|
16
32
|
def meta_encoding= encoding
|
17
|
-
|
18
|
-
|
33
|
+
case
|
34
|
+
when meta = meta_content_type
|
35
|
+
meta['content'] = 'text/html; charset=%s' % encoding
|
36
|
+
encoding
|
37
|
+
when meta = at('//meta[@charset]')
|
38
|
+
meta['charset'] = encoding
|
39
|
+
else
|
40
|
+
meta = XML::Node.new('meta', self)
|
41
|
+
if dtd = internal_subset and dtd.html5_dtd?
|
42
|
+
meta['charset'] = encoding
|
43
|
+
else
|
44
|
+
meta['http-equiv'] = 'Content-Type'
|
45
|
+
meta['content'] = 'text/html; charset=%s' % encoding
|
46
|
+
end
|
47
|
+
|
48
|
+
case
|
49
|
+
when head = at('//head')
|
50
|
+
head.prepend_child(meta)
|
51
|
+
else
|
52
|
+
set_metadata_element(meta)
|
53
|
+
end
|
54
|
+
encoding
|
55
|
+
end
|
19
56
|
end
|
20
57
|
|
21
58
|
def meta_content_type
|
22
|
-
|
23
|
-
node['http-equiv'] =~ /\AContent-Type\z/i
|
24
|
-
!node['content'].nil? and
|
25
|
-
!node['content'].empty?
|
59
|
+
xpath('//meta[@http-equiv and boolean(@content)]').find { |node|
|
60
|
+
node['http-equiv'] =~ /\AContent-Type\z/i
|
26
61
|
}
|
27
62
|
end
|
28
63
|
private :meta_content_type
|
@@ -31,20 +66,64 @@ module Nokogiri
|
|
31
66
|
# Get the title string of this document. Return nil if there is
|
32
67
|
# no title tag.
|
33
68
|
def title
|
34
|
-
title = at('title') and title.inner_text
|
69
|
+
title = at('//title') and title.inner_text
|
35
70
|
end
|
36
71
|
|
37
72
|
###
|
38
|
-
# Set the title string of this document.
|
39
|
-
#
|
73
|
+
# Set the title string of this document.
|
74
|
+
#
|
75
|
+
# If a title element is already present, its content is replaced
|
76
|
+
# with the given text.
|
77
|
+
#
|
78
|
+
# Otherwise, this method tries to create one at an appropriate
|
79
|
+
# place supplying head and/or html elements as necessary, which
|
80
|
+
# is inside a head element if any, right after a meta
|
81
|
+
# encoding/charset tag if any, and before any text node or
|
82
|
+
# content element (typically <body>) if any.
|
40
83
|
def title=(text)
|
41
|
-
|
42
|
-
|
43
|
-
title =
|
84
|
+
tnode = XML::Text.new(text, self)
|
85
|
+
if title = at('//title')
|
86
|
+
title.children = tnode
|
87
|
+
return text
|
88
|
+
end
|
89
|
+
|
90
|
+
title = XML::Node.new('title', self) << tnode
|
91
|
+
case
|
92
|
+
when head = at('//head')
|
44
93
|
head << title
|
94
|
+
when meta = at('//meta[@charset]') || meta_content_type
|
95
|
+
# better put after charset declaration
|
96
|
+
meta.add_next_sibling(title)
|
97
|
+
else
|
98
|
+
set_metadata_element(title)
|
99
|
+
end
|
100
|
+
text
|
101
|
+
end
|
102
|
+
|
103
|
+
def set_metadata_element(element)
|
104
|
+
case
|
105
|
+
when head = at('//head')
|
106
|
+
head << element
|
107
|
+
when html = at('//html')
|
108
|
+
head = html.prepend_child(XML::Node.new('head', self))
|
109
|
+
head.prepend_child(element)
|
110
|
+
when first = children.find { |node|
|
111
|
+
case node
|
112
|
+
when XML::Element, XML::Text
|
113
|
+
true
|
114
|
+
end
|
115
|
+
}
|
116
|
+
# We reach here only if the underlying document model
|
117
|
+
# allows <html>/<head> elements to be omitted and does not
|
118
|
+
# automatically supply them.
|
119
|
+
first.add_previous_sibling(element)
|
120
|
+
else
|
121
|
+
html = add_child(XML::Node.new('html', self))
|
122
|
+
head = html.add_child(XML::Node.new('head', self))
|
123
|
+
head.prepend_child(element)
|
45
124
|
end
|
46
|
-
title.children = XML::Text.new(text, self)
|
47
125
|
end
|
126
|
+
private :set_metadata_element
|
48
127
|
|
49
128
|
####
|
50
129
|
# Serialize Node using +options+. Save options can also be set using a
|
@@ -82,7 +161,7 @@ module Nokogiri
|
|
82
161
|
# Nokogiri::XML::ParseOptions.
|
83
162
|
def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
|
84
163
|
|
85
|
-
options = Nokogiri::XML::ParseOptions.new(options) if
|
164
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
86
165
|
# Give the options to the user
|
87
166
|
yield options if block_given?
|
88
167
|
|
@@ -94,7 +173,7 @@ module Nokogiri
|
|
94
173
|
|
95
174
|
if string_or_io.respond_to?(:read)
|
96
175
|
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
97
|
-
|
176
|
+
unless encoding
|
98
177
|
# Libxml2's parser has poor support for encoding
|
99
178
|
# detection. First, it does not recognize the HTML5
|
100
179
|
# style meta charset declaration. Secondly, even if it
|
@@ -117,7 +196,9 @@ module Nokogiri
|
|
117
196
|
end
|
118
197
|
|
119
198
|
# read_memory pukes on empty docs
|
120
|
-
|
199
|
+
if string_or_io.nil? or string_or_io.empty?
|
200
|
+
return encoding ? new.tap { |i| i.encoding = encoding } : new
|
201
|
+
end
|
121
202
|
|
122
203
|
encoding ||= EncodingReader.detect_encoding(string_or_io)
|
123
204
|
|
@@ -1,14 +1,22 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module HTML
|
3
3
|
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
4
|
-
attr_accessor :errors
|
5
|
-
|
6
4
|
####
|
7
5
|
# Create a Nokogiri::XML::DocumentFragment from +tags+, using +encoding+
|
8
6
|
def self.parse tags, encoding = nil
|
9
7
|
doc = HTML::Document.new
|
10
8
|
|
11
|
-
encoding ||= tags.respond_to?(:encoding)
|
9
|
+
encoding ||= if tags.respond_to?(:encoding)
|
10
|
+
encoding = tags.encoding
|
11
|
+
if encoding == ::Encoding::ASCII_8BIT
|
12
|
+
'UTF-8'
|
13
|
+
else
|
14
|
+
encoding.name
|
15
|
+
end
|
16
|
+
else
|
17
|
+
'UTF-8'
|
18
|
+
end
|
19
|
+
|
12
20
|
doc.encoding = encoding
|
13
21
|
|
14
22
|
new(doc, tags)
|
@@ -22,7 +22,7 @@ module Nokogiri
|
|
22
22
|
# end
|
23
23
|
#
|
24
24
|
# parser = Nokogiri::HTML::SAX::Parser.new(MyDoc.new)
|
25
|
-
# parser.parse(File.read(ARGV[0], 'rb'))
|
25
|
+
# parser.parse(File.read(ARGV[0], mode: 'rb'))
|
26
26
|
#
|
27
27
|
# For more information on SAX parsers, see Nokogiri::XML::SAX
|
28
28
|
class Parser < Nokogiri::XML::SAX::Parser
|
@@ -36,11 +36,21 @@ module Nokogiri
|
|
36
36
|
ctx.parse_with self
|
37
37
|
end
|
38
38
|
|
39
|
+
###
|
40
|
+
# Parse given +io+
|
41
|
+
def parse_io io, encoding = 'UTF-8'
|
42
|
+
check_encoding(encoding)
|
43
|
+
@encoding = encoding
|
44
|
+
ctx = ParserContext.io(io, ENCODINGS[encoding])
|
45
|
+
yield ctx if block_given?
|
46
|
+
ctx.parse_with self
|
47
|
+
end
|
48
|
+
|
39
49
|
###
|
40
50
|
# Parse a file with +filename+
|
41
51
|
def parse_file filename, encoding = 'UTF-8'
|
42
52
|
raise ArgumentError unless filename
|
43
|
-
raise Errno::ENOENT unless File.
|
53
|
+
raise Errno::ENOENT unless File.exist?(filename)
|
44
54
|
raise Errno::EISDIR if File.directory?(filename)
|
45
55
|
ctx = ParserContext.file(filename, encoding)
|
46
56
|
yield ctx if block_given?
|
@@ -2,13 +2,33 @@ module Nokogiri
|
|
2
2
|
module HTML
|
3
3
|
module SAX
|
4
4
|
class PushParser
|
5
|
-
|
5
|
+
|
6
|
+
# The Nokogiri::HTML::SAX::Document on which the PushParser will be
|
7
|
+
# operating
|
8
|
+
attr_accessor :document
|
9
|
+
|
10
|
+
def initialize(doc = HTML::SAX::Document.new, file_name = nil, encoding = 'UTF-8')
|
6
11
|
@document = doc
|
7
12
|
@encoding = encoding
|
8
13
|
@sax_parser = HTML::SAX::Parser.new(doc, @encoding)
|
9
14
|
|
10
15
|
## Create our push parser context
|
11
|
-
initialize_native(@sax_parser, file_name,
|
16
|
+
initialize_native(@sax_parser, file_name, encoding)
|
17
|
+
end
|
18
|
+
|
19
|
+
###
|
20
|
+
# Write a +chunk+ of HTML to the PushParser. Any callback methods
|
21
|
+
# that can be called will be called immediately.
|
22
|
+
def write chunk, last_chunk = false
|
23
|
+
native_write(chunk, last_chunk)
|
24
|
+
end
|
25
|
+
alias :<< :write
|
26
|
+
|
27
|
+
###
|
28
|
+
# Finish the parsing. This method is only necessary for
|
29
|
+
# Nokogiri::HTML::SAX::Document#end_document to be called.
|
30
|
+
def finish
|
31
|
+
write '', true
|
12
32
|
end
|
13
33
|
end
|
14
34
|
end
|
data/lib/nokogiri/version.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
# The version of Nokogiri you are using
|
3
|
-
VERSION =
|
3
|
+
VERSION = "1.10.4"
|
4
4
|
|
5
5
|
class VersionInfo # :nodoc:
|
6
6
|
def jruby?
|
@@ -8,13 +8,14 @@ module Nokogiri
|
|
8
8
|
end
|
9
9
|
|
10
10
|
def engine
|
11
|
-
defined?(RUBY_ENGINE) ? RUBY_ENGINE :
|
11
|
+
defined?(RUBY_ENGINE) ? RUBY_ENGINE : "mri"
|
12
12
|
end
|
13
13
|
|
14
14
|
def loaded_parser_version
|
15
|
-
LIBXML_PARSER_VERSION.
|
16
|
-
|
17
|
-
|
15
|
+
LIBXML_PARSER_VERSION.
|
16
|
+
scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first.
|
17
|
+
collect(&:to_i).
|
18
|
+
join(".")
|
18
19
|
end
|
19
20
|
|
20
21
|
def compiled_parser_version
|
@@ -25,6 +26,14 @@ module Nokogiri
|
|
25
26
|
defined?(LIBXML_VERSION)
|
26
27
|
end
|
27
28
|
|
29
|
+
def libxml2_using_system?
|
30
|
+
!libxml2_using_packaged?
|
31
|
+
end
|
32
|
+
|
33
|
+
def libxml2_using_packaged?
|
34
|
+
NOKOGIRI_USE_PACKAGED_LIBRARIES
|
35
|
+
end
|
36
|
+
|
28
37
|
def warnings
|
29
38
|
return [] unless libxml2?
|
30
39
|
|
@@ -37,24 +46,33 @@ module Nokogiri
|
|
37
46
|
|
38
47
|
def to_hash
|
39
48
|
hash_info = {}
|
40
|
-
hash_info[
|
41
|
-
hash_info[
|
42
|
-
hash_info[
|
43
|
-
hash_info[
|
44
|
-
hash_info[
|
45
|
-
hash_info[
|
46
|
-
hash_info[
|
47
|
-
hash_info[
|
49
|
+
hash_info["warnings"] = []
|
50
|
+
hash_info["nokogiri"] = Nokogiri::VERSION
|
51
|
+
hash_info["ruby"] = {}
|
52
|
+
hash_info["ruby"]["version"] = ::RUBY_VERSION
|
53
|
+
hash_info["ruby"]["platform"] = ::RUBY_PLATFORM
|
54
|
+
hash_info["ruby"]["description"] = ::RUBY_DESCRIPTION
|
55
|
+
hash_info["ruby"]["engine"] = engine
|
56
|
+
hash_info["ruby"]["jruby"] = jruby? if jruby?
|
48
57
|
|
49
58
|
if libxml2?
|
50
|
-
hash_info[
|
51
|
-
hash_info[
|
52
|
-
|
53
|
-
|
54
|
-
|
59
|
+
hash_info["libxml"] = {}
|
60
|
+
hash_info["libxml"]["binding"] = "extension"
|
61
|
+
if libxml2_using_packaged?
|
62
|
+
hash_info["libxml"]["source"] = "packaged"
|
63
|
+
hash_info["libxml"]["libxml2_path"] = NOKOGIRI_LIBXML2_PATH
|
64
|
+
hash_info["libxml"]["libxslt_path"] = NOKOGIRI_LIBXSLT_PATH
|
65
|
+
hash_info["libxml"]["libxml2_patches"] = NOKOGIRI_LIBXML2_PATCHES
|
66
|
+
hash_info["libxml"]["libxslt_patches"] = NOKOGIRI_LIBXSLT_PATCHES
|
67
|
+
else
|
68
|
+
hash_info["libxml"]["source"] = "system"
|
69
|
+
end
|
70
|
+
hash_info["libxml"]["compiled"] = compiled_parser_version
|
71
|
+
hash_info["libxml"]["loaded"] = loaded_parser_version
|
72
|
+
hash_info["warnings"] = warnings
|
55
73
|
elsif jruby?
|
56
|
-
hash_info[
|
57
|
-
hash_info[
|
74
|
+
hash_info["xerces"] = Nokogiri::XERCES_VERSION
|
75
|
+
hash_info["nekohtml"] = Nokogiri::NEKO_VERSION
|
58
76
|
end
|
59
77
|
|
60
78
|
hash_info
|
@@ -62,10 +80,10 @@ module Nokogiri
|
|
62
80
|
|
63
81
|
def to_markdown
|
64
82
|
begin
|
65
|
-
require
|
83
|
+
require "psych"
|
66
84
|
rescue LoadError
|
67
85
|
end
|
68
|
-
require
|
86
|
+
require "yaml"
|
69
87
|
"# Nokogiri (#{Nokogiri::VERSION})\n" +
|
70
88
|
YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
|
71
89
|
end
|
data/lib/nokogiri/xml/builder.rb
CHANGED
@@ -213,7 +213,7 @@ module Nokogiri
|
|
213
213
|
# xml.foo
|
214
214
|
# end
|
215
215
|
# end
|
216
|
-
#
|
216
|
+
#
|
217
217
|
# puts builder.to_xml
|
218
218
|
#
|
219
219
|
# Will output this xml:
|
@@ -250,7 +250,7 @@ module Nokogiri
|
|
250
250
|
# xml.awesome # add the "awesome" tag below "some_tag"
|
251
251
|
# end
|
252
252
|
#
|
253
|
-
def self.with
|
253
|
+
def self.with(root, &block)
|
254
254
|
new({}, root, &block)
|
255
255
|
end
|
256
256
|
|
@@ -263,23 +263,25 @@ module Nokogiri
|
|
263
263
|
# Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
|
264
264
|
# ...
|
265
265
|
# end
|
266
|
-
def initialize
|
267
|
-
|
266
|
+
def initialize(options = {}, root = nil, &block)
|
268
267
|
if root
|
269
|
-
@doc
|
268
|
+
@doc = root.document
|
270
269
|
@parent = root
|
271
270
|
else
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
271
|
+
klassname = "::" + (self.class.name.split("::")[0..-2] + ["Document"]).join("::")
|
272
|
+
klass = begin
|
273
|
+
Object.const_get(klassname)
|
274
|
+
rescue NameError
|
275
|
+
Nokogiri::XML::Document
|
276
|
+
end
|
277
|
+
@parent = @doc = klass.new
|
276
278
|
end
|
277
279
|
|
278
|
-
@context
|
279
|
-
@arity
|
280
|
-
@ns
|
280
|
+
@context = nil
|
281
|
+
@arity = nil
|
282
|
+
@ns = nil
|
281
283
|
|
282
|
-
options.each do |k,v|
|
284
|
+
options.each do |k, v|
|
283
285
|
@doc.send(:"#{k}=", v)
|
284
286
|
end
|
285
287
|
|
@@ -287,7 +289,7 @@ module Nokogiri
|
|
287
289
|
|
288
290
|
@arity = block.arity
|
289
291
|
if @arity <= 0
|
290
|
-
@context = eval(
|
292
|
+
@context = eval("self", block.binding)
|
291
293
|
instance_eval(&block)
|
292
294
|
else
|
293
295
|
yield self
|
@@ -298,26 +300,26 @@ module Nokogiri
|
|
298
300
|
|
299
301
|
###
|
300
302
|
# Create a Text Node with content of +string+
|
301
|
-
def text
|
303
|
+
def text(string)
|
302
304
|
insert @doc.create_text_node(string)
|
303
305
|
end
|
304
306
|
|
305
307
|
###
|
306
308
|
# Create a CDATA Node with content of +string+
|
307
|
-
def cdata
|
309
|
+
def cdata(string)
|
308
310
|
insert doc.create_cdata(string)
|
309
311
|
end
|
310
312
|
|
311
313
|
###
|
312
314
|
# Create a Comment Node with content of +string+
|
313
|
-
def comment
|
315
|
+
def comment(string)
|
314
316
|
insert doc.create_comment(string)
|
315
317
|
end
|
316
318
|
|
317
319
|
###
|
318
320
|
# Build a tag that is associated with namespace +ns+. Raises an
|
319
321
|
# ArgumentError if +ns+ has not been defined higher in the tree.
|
320
|
-
def []
|
322
|
+
def [](ns)
|
321
323
|
if @parent != @doc
|
322
324
|
@ns = @parent.namespace_definitions.find { |x| x.prefix == ns.to_s }
|
323
325
|
end
|
@@ -348,15 +350,15 @@ module Nokogiri
|
|
348
350
|
|
349
351
|
###
|
350
352
|
# Append the given raw XML +string+ to the document
|
351
|
-
def <<
|
353
|
+
def <<(string)
|
352
354
|
@doc.fragment(string).children.each { |x| insert(x) }
|
353
355
|
end
|
354
356
|
|
355
|
-
def method_missing
|
357
|
+
def method_missing(method, *args, &block) # :nodoc:
|
356
358
|
if @context && @context.respond_to?(method)
|
357
359
|
@context.send(method, *args, &block)
|
358
360
|
else
|
359
|
-
node = @doc.create_element(method.to_s.sub(/[_!]$/,
|
361
|
+
node = @doc.create_element(method.to_s.sub(/[_!]$/, ""), *args) { |n|
|
360
362
|
# Set up the namespace
|
361
363
|
if @ns.is_a? Nokogiri::XML::Namespace
|
362
364
|
n.namespace = @ns
|
@@ -377,13 +379,14 @@ module Nokogiri
|
|
377
379
|
end
|
378
380
|
|
379
381
|
private
|
382
|
+
|
380
383
|
###
|
381
384
|
# Insert +node+ as a child of the current Node
|
382
385
|
def insert(node, &block)
|
383
|
-
node
|
386
|
+
node = @parent.add_child(node)
|
384
387
|
if block_given?
|
385
388
|
old_parent = @parent
|
386
|
-
@parent
|
389
|
+
@parent = node
|
387
390
|
@arity ||= block.arity
|
388
391
|
if @arity <= 0
|
389
392
|
instance_eval(&block)
|
@@ -396,16 +399,16 @@ module Nokogiri
|
|
396
399
|
end
|
397
400
|
|
398
401
|
class NodeBuilder # :nodoc:
|
399
|
-
def initialize
|
402
|
+
def initialize(node, doc_builder)
|
400
403
|
@node = node
|
401
404
|
@doc_builder = doc_builder
|
402
405
|
end
|
403
406
|
|
404
|
-
def []=
|
407
|
+
def []=(k, v)
|
405
408
|
@node[k] = v
|
406
409
|
end
|
407
410
|
|
408
|
-
def []
|
411
|
+
def [](k)
|
409
412
|
@node[k]
|
410
413
|
end
|
411
414
|
|
@@ -413,19 +416,19 @@ module Nokogiri
|
|
413
416
|
opts = args.last.is_a?(Hash) ? args.pop : {}
|
414
417
|
case method.to_s
|
415
418
|
when /^(.*)!$/
|
416
|
-
@node[
|
419
|
+
@node["id"] = $1
|
417
420
|
@node.content = args.first if args.first
|
418
421
|
when /^(.*)=/
|
419
422
|
@node[$1] = args.first
|
420
423
|
else
|
421
|
-
@node[
|
422
|
-
((@node[
|
424
|
+
@node["class"] =
|
425
|
+
((@node["class"] || "").split(/\s/) + [method.to_s]).join(" ")
|
423
426
|
@node.content = args.first if args.first
|
424
427
|
end
|
425
428
|
|
426
429
|
# Assign any extra options
|
427
|
-
opts.each do |k,v|
|
428
|
-
@node[k.to_s] = ((@node[k.to_s] ||
|
430
|
+
opts.each do |k, v|
|
431
|
+
@node[k.to_s] = ((@node[k.to_s] || "").split(/\s/) + [v]).join(" ")
|
429
432
|
end
|
430
433
|
|
431
434
|
if block_given?
|
@@ -5,8 +5,8 @@ module Nokogiri
|
|
5
5
|
# XML documents. The Document is created by parsing an XML document.
|
6
6
|
# See Nokogiri::XML::Document.parse() for more information on parsing.
|
7
7
|
#
|
8
|
-
# For searching a Document, see Nokogiri::XML::
|
9
|
-
# Nokogiri::XML::
|
8
|
+
# For searching a Document, see Nokogiri::XML::Searchable#css and
|
9
|
+
# Nokogiri::XML::Searchable#xpath
|
10
10
|
#
|
11
11
|
class Document < Nokogiri::XML::Node
|
12
12
|
# I'm ignoring unicode characters here.
|
@@ -33,19 +33,26 @@ module Nokogiri
|
|
33
33
|
# +block+ (optional) is passed a configuration object on which
|
34
34
|
# parse options may be set.
|
35
35
|
#
|
36
|
-
#
|
37
|
-
#
|
38
|
-
#
|
39
|
-
#
|
36
|
+
# By default, Nokogiri treats documents as untrusted, and so
|
37
|
+
# does not attempt to load DTDs or access the network. See
|
38
|
+
# Nokogiri::XML::ParseOptions for a complete list of options;
|
39
|
+
# and that module's DEFAULT_XML constant for what's set (and not
|
40
|
+
# set) by default.
|
40
41
|
#
|
41
42
|
# Nokogiri.XML() is a convenience method which will call this method.
|
42
43
|
#
|
43
|
-
def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
|
44
|
-
options = Nokogiri::XML::ParseOptions.new(options) if
|
44
|
+
def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
|
45
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
45
46
|
# Give the options to the user
|
46
47
|
yield options if block_given?
|
47
48
|
|
48
|
-
|
49
|
+
if empty_doc?(string_or_io)
|
50
|
+
if options.strict?
|
51
|
+
raise Nokogiri::XML::SyntaxError.new("Empty document")
|
52
|
+
else
|
53
|
+
return encoding ? new.tap { |i| i.encoding = encoding } : new
|
54
|
+
end
|
55
|
+
end
|
49
56
|
|
50
57
|
doc = if string_or_io.respond_to?(:read)
|
51
58
|
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
@@ -233,10 +240,10 @@ module Nokogiri
|
|
233
240
|
undef_method :namespace_definitions, :line, :add_namespace
|
234
241
|
|
235
242
|
def add_child node_or_tags
|
236
|
-
raise "
|
243
|
+
raise "A document may not have multiple root nodes." if (root && root.name != 'nokogiri_text_wrapper') && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
237
244
|
node_or_tags = coerce(node_or_tags)
|
238
245
|
if node_or_tags.is_a?(XML::NodeSet)
|
239
|
-
raise "
|
246
|
+
raise "A document may not have multiple root nodes." if node_or_tags.size > 1
|
240
247
|
super(node_or_tags.first)
|
241
248
|
else
|
242
249
|
super
|
@@ -267,9 +274,8 @@ module Nokogiri
|
|
267
274
|
(string_or_io.respond_to?(:eof?) && string_or_io.eof?)
|
268
275
|
end
|
269
276
|
|
270
|
-
|
271
|
-
|
272
|
-
end
|
277
|
+
# @private
|
278
|
+
IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
|
273
279
|
|
274
280
|
def inspect_attributes
|
275
281
|
[:name, :children]
|