nokogiri 1.4.2 → 1.4.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +28 -8
- data/CHANGELOG.rdoc +24 -1
- data/Manifest.txt +2 -1
- data/README.ja.rdoc +1 -1
- data/README.rdoc +22 -4
- data/Rakefile +6 -2
- data/ext/nokogiri/extconf.rb +55 -32
- data/ext/nokogiri/nokogiri.h +2 -0
- data/ext/nokogiri/xml_document.c +5 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_node.c +58 -12
- data/ext/nokogiri/xml_node_set.c +7 -7
- data/ext/nokogiri/xml_reader.c +20 -1
- data/ext/nokogiri/xml_xpath_context.c +2 -0
- data/lib/nokogiri/css/generated_parser.rb +155 -148
- data/lib/nokogiri/css/generated_tokenizer.rb +2 -1
- data/lib/nokogiri/css/parser.y +3 -0
- data/lib/nokogiri/css/xpath_visitor.rb +1 -7
- data/lib/nokogiri/ffi/libxml.rb +29 -4
- data/lib/nokogiri/ffi/xml/document.rb +4 -0
- data/lib/nokogiri/ffi/xml/node.rb +27 -19
- data/lib/nokogiri/ffi/xml/node_set.rb +3 -3
- data/lib/nokogiri/ffi/xml/reader.rb +4 -0
- data/lib/nokogiri/html.rb +2 -2
- data/lib/nokogiri/html/document_fragment.rb +7 -4
- data/lib/nokogiri/version.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +1 -2
- data/lib/nokogiri/xml/document_fragment.rb +7 -0
- data/lib/nokogiri/xml/node.rb +4 -2
- data/lib/nokogiri/xml/node_set.rb +25 -0
- data/lib/nokogiri/xml/reader.rb +2 -0
- data/lib/nokogiri/xml/sax/document.rb +3 -1
- data/test/css/test_parser.rb +11 -1
- data/test/html/sax/test_parser_context.rb +2 -2
- data/test/html/test_document.rb +2 -2
- data/test/html/test_document_fragment.rb +34 -6
- data/test/test_memory_leak.rb +2 -2
- data/test/test_reader.rb +28 -6
- data/test/test_xslt_transforms.rb +29 -28
- data/test/xml/test_attr.rb +31 -4
- data/test/xml/test_builder.rb +5 -5
- data/test/xml/test_cdata.rb +3 -3
- data/test/xml/test_document.rb +8 -8
- data/test/xml/test_document_fragment.rb +2 -2
- data/test/xml/test_node.rb +1 -1
- data/test/xml/test_node_reparenting.rb +26 -11
- data/test/xml/test_node_set.rb +38 -2
- data/test/xml/test_text.rb +11 -2
- data/test/xml/test_unparented_node.rb +1 -1
- data/test/xml/test_xpath.rb +78 -11
- metadata +24 -5
- data/lib/nokogiri/version_warning.rb +0 -14
data/CHANGELOG.ja.rdoc
CHANGED
@@ -1,8 +1,28 @@
|
|
1
|
-
=== 1.4.
|
1
|
+
=== 1.4.3 2010年7月28日
|
2
2
|
|
3
3
|
* 新しい機能
|
4
4
|
|
5
|
-
* XML::
|
5
|
+
* XML::Reader#empty_element? - 子の無いエレメントにtrueを返す #262
|
6
|
+
* Node#remove_namespaces! - 1.4.2では 名前空間のみを取り除いていたが、
|
7
|
+
1.4.3 では名前空間及び、名前空間宣言も取り除く #294
|
8
|
+
|
9
|
+
* バグの修正
|
10
|
+
|
11
|
+
* XML::NodeSet#{include?,delete,push} はXML::Namespaceを受入れる
|
12
|
+
* XML::Document#parse - 1.4.3より文書内の文脈を解析する機能を追加
|
13
|
+
* XML::DocumentFragment#inner_html= 文脈解析を共に実行する #298, #281
|
14
|
+
* lib/nokogiri/css/parser.y はCSSと疑似選別の両方を機能
|
15
|
+
* 演算によって近隣に存在する併合型ノードへの遊離問題の有無に関わらず、一切の
|
16
|
+
弊害なしにテキストノードの繰り返しが実行可能 #283
|
17
|
+
* xmlFirstElementChild et al.による libxml2バージョンでの不適合性を修正 #303
|
18
|
+
* XML::Attr#add_namespace (!)文字通りの機能実現! #252
|
19
|
+
* HTML::DocumentFragment が文字列に存在するエンコードを使用 #305
|
20
|
+
|
21
|
+
=== 1.4.2 2010年5月22日
|
22
|
+
|
23
|
+
* 新機能
|
24
|
+
|
25
|
+
* XML::Node#parse 定義されたコンテキストノードでXML 又はHTMLのフレグメント
|
6
26
|
を解析する
|
7
27
|
* XML::Node#namespacesが子ノードとその祖先ノード内で定義された全ての名前空間
|
8
28
|
を返すようになった(以前は祖先ノードの名前空間は返されなかった)
|
@@ -30,18 +50,18 @@
|
|
30
50
|
* XML::Node#xpath はNodeSetのオブジェクト以外のオブジェクトを返す GH #208
|
31
51
|
* XSLT::StyleSheet#transformはパラメーターのハッシュを受け入れる GH #223
|
32
52
|
* CSSのnot()の疑似セレクタの修正 GH #205
|
33
|
-
* XML::Builder
|
53
|
+
* XML::Builder はノードらが切り離されても破壊しない(vihaiの協力に感謝)
|
34
54
|
GH #228
|
35
55
|
* SAX parser経由でエンコードを強制することが出来る Eugene Pimenovに感謝!
|
36
56
|
GH #204
|
37
|
-
* XML::DocumentFragment はML::Node#parse
|
38
|
-
* XML Reader
|
57
|
+
* XML::DocumentFragment はML::Node#parse を使用して子を限定する
|
58
|
+
* XML Reader内のメモリリーク修正 sdorさん、ありがとう! GH#244
|
39
59
|
|
40
60
|
* ノート
|
41
61
|
|
42
|
-
* 今日4月18日現在、Windows gems は libxml 2.7.
|
43
|
-
1.1.26にDLLs
|
44
|
-
2.7.3 と libxslt 1.1.24
|
62
|
+
* 今日4月18日現在、Windows gems は libxml 2.7.7 とlibxslt
|
63
|
+
1.1.26にDLLsを標準装備しています。このリリース以前にも既にDLLsはlibxml
|
64
|
+
2.7.3 と libxslt 1.1.24に標準装備済み。
|
45
65
|
|
46
66
|
=== 1.4.1 2009年12月10日
|
47
67
|
|
data/CHANGELOG.rdoc
CHANGED
@@ -1,4 +1,27 @@
|
|
1
|
-
=== 1.4.
|
1
|
+
=== 1.4.3 / 2010/07/28
|
2
|
+
|
3
|
+
* New Features
|
4
|
+
|
5
|
+
* XML::Reader#empty_element? returns true for empty elements. #262
|
6
|
+
* Node#remove_namespaces! now removes namespace *declarations* as well. #294
|
7
|
+
* NodeSet#at_xpath, NodeSet#at_css and NodeSet#> do what the corresponding
|
8
|
+
methods of Node do.
|
9
|
+
|
10
|
+
* Bugfixes
|
11
|
+
|
12
|
+
* XML::NodeSet#{include?,delete,push} accept an XML::Namespace
|
13
|
+
* XML::Document#parse added for parsing in the context of a document
|
14
|
+
* XML::DocumentFragment#inner_html= works with contextual parsing! #298, #281
|
15
|
+
* lib/nokogiri/css/parser.y Combined CSS functions + pseudo selectors fixed
|
16
|
+
* Reparenting text nodes is safe, even when the operation frees adjacent merged nodes. #283
|
17
|
+
* Fixed libxml2 versionitis issue with xmlFirstElementChild et al. #303
|
18
|
+
* XML::Attr#add_namespace now works as expected. #252
|
19
|
+
* HTML::DocumentFragment uses the string's encoding. #305
|
20
|
+
* Fix the CSS3 selector translation rule for the general sibling combinator
|
21
|
+
(a.k.a. preceding selector) that incorrectly converted "E + F G" to
|
22
|
+
"//F//G[preceding-sibling::E]".
|
23
|
+
|
24
|
+
=== 1.4.2 / 2010/05/22
|
2
25
|
|
3
26
|
* New Features
|
4
27
|
|
data/Manifest.txt
CHANGED
@@ -44,6 +44,8 @@ ext/nokogiri/xml_entity_reference.c
|
|
44
44
|
ext/nokogiri/xml_entity_reference.h
|
45
45
|
ext/nokogiri/xml_io.c
|
46
46
|
ext/nokogiri/xml_io.h
|
47
|
+
ext/nokogiri/xml_libxml2_hacks.c
|
48
|
+
ext/nokogiri/xml_libxml2_hacks.h
|
47
49
|
ext/nokogiri/xml_namespace.c
|
48
50
|
ext/nokogiri/xml_namespace.h
|
49
51
|
ext/nokogiri/xml_node.c
|
@@ -157,7 +159,6 @@ lib/nokogiri/html/sax/parser.rb
|
|
157
159
|
lib/nokogiri/html/sax/parser_context.rb
|
158
160
|
lib/nokogiri/syntax_error.rb
|
159
161
|
lib/nokogiri/version.rb
|
160
|
-
lib/nokogiri/version_warning.rb
|
161
162
|
lib/nokogiri/xml.rb
|
162
163
|
lib/nokogiri/xml/attr.rb
|
163
164
|
lib/nokogiri/xml/attribute_decl.rb
|
data/README.ja.rdoc
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
* http://nokogiri.org/
|
4
4
|
* http://github.com/tenderlove/nokogiri/wikis
|
5
5
|
* http://github.com/tenderlove/nokogiri/tree/master
|
6
|
-
* http://groups.google.com/group/nokogiri-
|
6
|
+
* http://groups.google.com/group/nokogiri-list
|
7
7
|
* http://github.com/tenderlove/nokogiri/issues
|
8
8
|
|
9
9
|
== DESCRIPTION:
|
data/README.rdoc
CHANGED
@@ -81,9 +81,27 @@ The IRC channel is #nokogiri on freenode.
|
|
81
81
|
|
82
82
|
== ENCODING:
|
83
83
|
|
84
|
-
Strings are always stored as UTF-8 internally. Methods that return
|
85
|
-
will always return UTF-8 encoded strings. Methods that
|
86
|
-
to_html and inner_html) will return a string
|
84
|
+
Strings are always stored as UTF-8 internally. Methods that return
|
85
|
+
text values will always return UTF-8 encoded strings. Methods that
|
86
|
+
return XML (like to_xml, to_html and inner_html) will return a string
|
87
|
+
encoded like the source document.
|
88
|
+
|
89
|
+
*WARNING*
|
90
|
+
|
91
|
+
Some documents declare one particular encoding, but use a different
|
92
|
+
one. So, which encoding should the parser choose?
|
93
|
+
|
94
|
+
Remember that data is just a stream of bytes. Only us humans add
|
95
|
+
meaning to that stream. Any particular set of bytes could be valid
|
96
|
+
characters in multiple encodings, so detecting encoding with 100%
|
97
|
+
accuracy is not possible. libxml2 does its best, but it can't be right
|
98
|
+
100% of the time.
|
99
|
+
|
100
|
+
If you want Nokogiri to handle the document encoding properly, your
|
101
|
+
best bet is to explicitly set the encoding. Here is an example of
|
102
|
+
explicitly setting the encoding to EUC-JP on the parser:
|
103
|
+
|
104
|
+
doc = Nokogiri.XML('<foo><bar /><foo>', nil, 'EUC-JP')
|
87
105
|
|
88
106
|
== INSTALL:
|
89
107
|
|
@@ -94,7 +112,7 @@ to_html and inner_html) will return a string encoded like the source document.
|
|
94
112
|
Binary packages are available for:
|
95
113
|
|
96
114
|
* SuSE[http://download.opensuse.org/repositories/devel:/languages:/ruby:/extensions/]
|
97
|
-
* Fedora[
|
115
|
+
* Fedora[http://s390.koji.fedoraproject.org/koji/packageinfo?packageID=6756]
|
98
116
|
|
99
117
|
== DEVELOPMENT:
|
100
118
|
|
data/Rakefile
CHANGED
@@ -112,7 +112,7 @@ end
|
|
112
112
|
|
113
113
|
file GENERATED_PARSER => "lib/nokogiri/css/parser.y" do |t|
|
114
114
|
begin
|
115
|
-
racc = `which racc`.strip
|
115
|
+
racc = Config::CONFIG['target_os'] =~ /mswin32/ ? '' : `which racc`.strip
|
116
116
|
racc = "#{::Config::CONFIG['bindir']}/racc" if racc.empty?
|
117
117
|
sh "#{racc} -l -o #{t.name} #{t.prerequisites.first}"
|
118
118
|
rescue
|
@@ -129,7 +129,11 @@ file GENERATED_TOKENIZER => "lib/nokogiri/css/tokenizer.rex" do |t|
|
|
129
129
|
end
|
130
130
|
|
131
131
|
require 'tasks/test'
|
132
|
-
|
132
|
+
begin
|
133
|
+
require 'tasks/cross_compile' unless java
|
134
|
+
rescue RuntimeError => e
|
135
|
+
warn "WARNING: Could not perform some cross-compiling: #{e}"
|
136
|
+
end
|
133
137
|
|
134
138
|
desc "set environment variables to build and/or test with debug options"
|
135
139
|
task :debug do
|
data/ext/nokogiri/extconf.rb
CHANGED
@@ -17,7 +17,7 @@ end
|
|
17
17
|
$CFLAGS << " #{ENV["CFLAGS"]}"
|
18
18
|
$LIBS << " #{ENV["LIBS"]}"
|
19
19
|
|
20
|
-
if Config::CONFIG['target_os'] == 'mingw32'
|
20
|
+
if Config::CONFIG['target_os'] == 'mingw32' || Config::CONFIG['target_os'] =~ /mswin32/
|
21
21
|
$CFLAGS << " -DXP_WIN -DXP_WIN32 -DUSE_INCLUDED_VASPRINTF"
|
22
22
|
elsif Config::CONFIG['target_os'] =~ /solaris/
|
23
23
|
$CFLAGS << " -DUSE_INCLUDED_VASPRINTF"
|
@@ -29,58 +29,81 @@ if RbConfig::MAKEFILE_CONFIG['CC'] =~ /mingw/
|
|
29
29
|
$CFLAGS << " -DIN_LIBXML"
|
30
30
|
end
|
31
31
|
|
32
|
-
|
32
|
+
if RbConfig::MAKEFILE_CONFIG['CC'] =~ /gcc/
|
33
|
+
$CFLAGS << " -O3 -Wall -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
|
34
|
+
end
|
35
|
+
|
36
|
+
if Config::CONFIG['target_os'] =~ /mswin32/
|
37
|
+
lib_prefix = 'lib'
|
38
|
+
|
39
|
+
# There's no default include/lib dir on Windows. Let's just add the Ruby ones
|
40
|
+
# and resort on the search path specified by INCLUDE and LIB environment
|
41
|
+
# variables
|
42
|
+
HEADER_DIRS = [INCLUDEDIR]
|
43
|
+
LIB_DIRS = [LIBDIR]
|
44
|
+
XML2_HEADER_DIRS = [File.join(INCLUDEDIR, "libxml2"), INCLUDEDIR]
|
45
|
+
|
46
|
+
else
|
47
|
+
lib_prefix = ''
|
48
|
+
|
49
|
+
HEADER_DIRS = [
|
50
|
+
# First search /opt/local for macports
|
51
|
+
'/opt/local/include',
|
33
52
|
|
34
|
-
|
35
|
-
|
36
|
-
'/opt/local/include',
|
53
|
+
# Then search /usr/local for people that installed from source
|
54
|
+
'/usr/local/include',
|
37
55
|
|
38
|
-
|
39
|
-
|
56
|
+
# Check the ruby install locations
|
57
|
+
INCLUDEDIR,
|
40
58
|
|
41
|
-
|
42
|
-
|
59
|
+
# Finally fall back to /usr
|
60
|
+
'/usr/include',
|
61
|
+
'/usr/include/libxml2',
|
62
|
+
]
|
43
63
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
]
|
64
|
+
LIB_DIRS = [
|
65
|
+
# First search /opt/local for macports
|
66
|
+
'/opt/local/lib',
|
48
67
|
|
49
|
-
|
50
|
-
|
51
|
-
'/opt/local/lib',
|
68
|
+
# Then search /usr/local for people that installed from source
|
69
|
+
'/usr/local/lib',
|
52
70
|
|
53
|
-
|
54
|
-
|
71
|
+
# Check the ruby install locations
|
72
|
+
LIBDIR,
|
55
73
|
|
56
|
-
|
57
|
-
|
74
|
+
# Finally fall back to /usr
|
75
|
+
'/usr/lib',
|
76
|
+
]
|
58
77
|
|
59
|
-
|
60
|
-
|
61
|
-
|
78
|
+
XML2_HEADER_DIRS = [
|
79
|
+
'/opt/local/include/libxml2',
|
80
|
+
'/usr/local/include/libxml2',
|
81
|
+
File.join(INCLUDEDIR, "libxml2")
|
82
|
+
] + HEADER_DIRS
|
83
|
+
end
|
62
84
|
|
63
85
|
dir_config('zlib', HEADER_DIRS, LIB_DIRS)
|
64
86
|
dir_config('iconv', HEADER_DIRS, LIB_DIRS)
|
65
|
-
dir_config('xml2',
|
66
|
-
'/opt/local/include/libxml2',
|
67
|
-
'/usr/local/include/libxml2',
|
68
|
-
File.join(INCLUDEDIR, "libxml2")] + HEADER_DIRS, LIB_DIRS)
|
87
|
+
dir_config('xml2', XML2_HEADER_DIRS, LIB_DIRS)
|
69
88
|
dir_config('xslt', HEADER_DIRS, LIB_DIRS)
|
70
89
|
|
71
90
|
def asplode(lib)
|
72
91
|
abort "-----\n#{lib} is missing. please visit http://nokogiri.org/tutorials/installing_nokogiri.html for help with installing dependencies.\n-----"
|
73
92
|
end
|
74
93
|
|
75
|
-
|
94
|
+
# Use this with cross compiling
|
95
|
+
# PKG_CONFIG_PATH=/Users/apatterson/git/nokogiri/tmp/cross/lib/pkgconfig/ \
|
96
|
+
# rake cross compile RUBY_CC_VERSION=1.9.1
|
97
|
+
pkg_config('libxslt') if RUBY_PLATFORM =~ /mingw/
|
98
|
+
|
76
99
|
asplode "libxml2" unless find_header('libxml/parser.h')
|
77
100
|
asplode "libxslt" unless find_header('libxslt/xslt.h')
|
78
101
|
asplode "libexslt" unless find_header('libexslt/exslt.h')
|
79
|
-
asplode "
|
80
|
-
asplode "
|
81
|
-
asplode "
|
82
|
-
asplode "libexslt" unless find_library('exslt', 'exsltFuncRegister')
|
102
|
+
asplode "libxml2" unless find_library("#{lib_prefix}xml2", 'xmlParseDoc')
|
103
|
+
asplode "libxslt" unless find_library("#{lib_prefix}xslt", 'xsltParseStylesheetDoc')
|
104
|
+
asplode "libexslt" unless find_library("#{lib_prefix}exslt", 'exsltFuncRegister')
|
83
105
|
|
106
|
+
have_func 'xmlFirstElementChild'
|
84
107
|
have_func('xmlRelaxNGSetParserStructuredErrors')
|
85
108
|
have_func('xmlRelaxNGSetParserStructuredErrors')
|
86
109
|
have_func('xmlRelaxNGSetValidStructuredErrors')
|
data/ext/nokogiri/nokogiri.h
CHANGED
data/ext/nokogiri/xml_document.c
CHANGED
@@ -46,6 +46,11 @@ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
|
|
46
46
|
|
47
47
|
for (child = node->children ; child ; child = child->next)
|
48
48
|
recursively_remove_namespaces_from_node(child);
|
49
|
+
|
50
|
+
if (node->nsDef) {
|
51
|
+
xmlFreeNsList(node->nsDef);
|
52
|
+
node->nsDef = NULL;
|
53
|
+
}
|
49
54
|
}
|
50
55
|
|
51
56
|
/*
|
@@ -0,0 +1,112 @@
|
|
1
|
+
#ifndef HAVE_XMLFIRSTELEMENTCHILD
|
2
|
+
#include <libxml/tree.h>
|
3
|
+
/**
|
4
|
+
* xmlFirstElementChild:
|
5
|
+
* @parent: the parent node
|
6
|
+
*
|
7
|
+
* Finds the first child node of that element which is a Element node
|
8
|
+
* Note the handling of entities references is different than in
|
9
|
+
* the W3C DOM element traversal spec since we don't have back reference
|
10
|
+
* from entities content to entities references.
|
11
|
+
*
|
12
|
+
* Returns the first element child or NULL if not available
|
13
|
+
*/
|
14
|
+
xmlNodePtr
|
15
|
+
xmlFirstElementChild(xmlNodePtr parent) {
|
16
|
+
xmlNodePtr cur = NULL;
|
17
|
+
|
18
|
+
if (parent == NULL)
|
19
|
+
return(NULL);
|
20
|
+
switch (parent->type) {
|
21
|
+
case XML_ELEMENT_NODE:
|
22
|
+
case XML_ENTITY_NODE:
|
23
|
+
case XML_DOCUMENT_NODE:
|
24
|
+
case XML_HTML_DOCUMENT_NODE:
|
25
|
+
cur = parent->children;
|
26
|
+
break;
|
27
|
+
default:
|
28
|
+
return(NULL);
|
29
|
+
}
|
30
|
+
while (cur != NULL) {
|
31
|
+
if (cur->type == XML_ELEMENT_NODE)
|
32
|
+
return(cur);
|
33
|
+
cur = cur->next;
|
34
|
+
}
|
35
|
+
return(NULL);
|
36
|
+
}
|
37
|
+
|
38
|
+
/**
|
39
|
+
* xmlNextElementSibling:
|
40
|
+
* @node: the current node
|
41
|
+
*
|
42
|
+
* Finds the first closest next sibling of the node which is an
|
43
|
+
* element node.
|
44
|
+
* Note the handling of entities references is different than in
|
45
|
+
* the W3C DOM element traversal spec since we don't have back reference
|
46
|
+
* from entities content to entities references.
|
47
|
+
*
|
48
|
+
* Returns the next element sibling or NULL if not available
|
49
|
+
*/
|
50
|
+
xmlNodePtr
|
51
|
+
xmlNextElementSibling(xmlNodePtr node) {
|
52
|
+
if (node == NULL)
|
53
|
+
return(NULL);
|
54
|
+
switch (node->type) {
|
55
|
+
case XML_ELEMENT_NODE:
|
56
|
+
case XML_TEXT_NODE:
|
57
|
+
case XML_CDATA_SECTION_NODE:
|
58
|
+
case XML_ENTITY_REF_NODE:
|
59
|
+
case XML_ENTITY_NODE:
|
60
|
+
case XML_PI_NODE:
|
61
|
+
case XML_COMMENT_NODE:
|
62
|
+
case XML_DTD_NODE:
|
63
|
+
case XML_XINCLUDE_START:
|
64
|
+
case XML_XINCLUDE_END:
|
65
|
+
node = node->next;
|
66
|
+
break;
|
67
|
+
default:
|
68
|
+
return(NULL);
|
69
|
+
}
|
70
|
+
while (node != NULL) {
|
71
|
+
if (node->type == XML_ELEMENT_NODE)
|
72
|
+
return(node);
|
73
|
+
node = node->next;
|
74
|
+
}
|
75
|
+
return(NULL);
|
76
|
+
}
|
77
|
+
|
78
|
+
/**
|
79
|
+
* xmlLastElementChild:
|
80
|
+
* @parent: the parent node
|
81
|
+
*
|
82
|
+
* Finds the last child node of that element which is a Element node
|
83
|
+
* Note the handling of entities references is different than in
|
84
|
+
* the W3C DOM element traversal spec since we don't have back reference
|
85
|
+
* from entities content to entities references.
|
86
|
+
*
|
87
|
+
* Returns the last element child or NULL if not available
|
88
|
+
*/
|
89
|
+
xmlNodePtr
|
90
|
+
xmlLastElementChild(xmlNodePtr parent) {
|
91
|
+
xmlNodePtr cur = NULL;
|
92
|
+
|
93
|
+
if (parent == NULL)
|
94
|
+
return(NULL);
|
95
|
+
switch (parent->type) {
|
96
|
+
case XML_ELEMENT_NODE:
|
97
|
+
case XML_ENTITY_NODE:
|
98
|
+
case XML_DOCUMENT_NODE:
|
99
|
+
case XML_HTML_DOCUMENT_NODE:
|
100
|
+
cur = parent->last;
|
101
|
+
break;
|
102
|
+
default:
|
103
|
+
return(NULL);
|
104
|
+
}
|
105
|
+
while (cur != NULL) {
|
106
|
+
if (cur->type == XML_ELEMENT_NODE)
|
107
|
+
return(cur);
|
108
|
+
cur = cur->prev;
|
109
|
+
}
|
110
|
+
return(NULL);
|
111
|
+
}
|
112
|
+
#endif
|
@@ -0,0 +1,12 @@
|
|
1
|
+
#ifndef HAVE_XMLFIRSTELEMENTCHILD
|
2
|
+
|
3
|
+
#ifndef XML_LIBXML2_HACKS
|
4
|
+
#define XML_LIBXML2_HACKS
|
5
|
+
|
6
|
+
xmlNodePtr xmlFirstElementChild(xmlNodePtr parent);
|
7
|
+
xmlNodePtr xmlNextElementSibling(xmlNodePtr node);
|
8
|
+
xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
|
9
|
+
|
10
|
+
#endif
|
11
|
+
|
12
|
+
#endif
|
data/ext/nokogiri/xml_node.c
CHANGED
@@ -103,7 +103,7 @@ static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
|
|
103
103
|
static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf)
|
104
104
|
{
|
105
105
|
VALUE reparented_obj ;
|
106
|
-
xmlNodePtr reparentee, pivot, reparented ;
|
106
|
+
xmlNodePtr reparentee, pivot, reparented, next_text, new_next_text ;
|
107
107
|
|
108
108
|
if(!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode))
|
109
109
|
rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
|
@@ -141,6 +141,34 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep
|
|
141
141
|
}
|
142
142
|
}
|
143
143
|
|
144
|
+
if (reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
|
145
|
+
/*
|
146
|
+
* libxml merges text nodes in a right-to-left fashion, meaning that if
|
147
|
+
* there are two text nodes who would be adjacent, the right (or following,
|
148
|
+
* or next) node will be merged into the left (or preceding, or previous)
|
149
|
+
* node.
|
150
|
+
*
|
151
|
+
* and by "merged" I mean the string contents will be concatenated onto the
|
152
|
+
* left node's contents, and then the node will be freed.
|
153
|
+
*
|
154
|
+
* which means that if we have a ruby object wrapped around the right node,
|
155
|
+
* its memory would be freed out from under it.
|
156
|
+
*
|
157
|
+
* so, we detect this edge case and unlink-and-root the text node before it gets
|
158
|
+
* merged. then we dup the node and insert that duplicate back into the
|
159
|
+
* document where the real node was.
|
160
|
+
*
|
161
|
+
* yes, this is totally lame.
|
162
|
+
*/
|
163
|
+
next_text = pivot->next ;
|
164
|
+
new_next_text = xmlDocCopyNode(next_text, pivot->doc, 1) ;
|
165
|
+
|
166
|
+
xmlUnlinkNode(next_text);
|
167
|
+
NOKOGIRI_ROOT_NODE(next_text);
|
168
|
+
|
169
|
+
xmlAddNextSibling(pivot, new_next_text);
|
170
|
+
}
|
171
|
+
|
144
172
|
/* TODO: I really want to remove this. We shouldn't support 2.6.16 anymore */
|
145
173
|
if ( reparentee->type == XML_TEXT_NODE && pivot->type == XML_TEXT_NODE && is_2_6_16() ) {
|
146
174
|
/* work around a string-handling bug in libxml 2.6.16. we'd rather leak than segfault. */
|
@@ -1021,26 +1049,32 @@ static VALUE line(VALUE self)
|
|
1021
1049
|
*/
|
1022
1050
|
static VALUE add_namespace_definition(VALUE self, VALUE prefix, VALUE href)
|
1023
1051
|
{
|
1024
|
-
xmlNodePtr node;
|
1052
|
+
xmlNodePtr node, namespacee;
|
1025
1053
|
xmlNsPtr ns;
|
1026
1054
|
|
1027
1055
|
Data_Get_Struct(self, xmlNode, node);
|
1056
|
+
namespacee = node ;
|
1028
1057
|
|
1029
|
-
ns =
|
1058
|
+
ns = xmlSearchNs(
|
1059
|
+
node->doc,
|
1030
1060
|
node,
|
1031
|
-
(const xmlChar *)StringValuePtr(href),
|
1032
1061
|
(const xmlChar *)(NIL_P(prefix) ? NULL : StringValuePtr(prefix))
|
1033
1062
|
);
|
1034
1063
|
|
1035
1064
|
if(!ns) {
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1065
|
+
if (node->type != XML_ELEMENT_NODE) {
|
1066
|
+
namespacee = node->parent;
|
1067
|
+
}
|
1068
|
+
ns = xmlNewNs(
|
1069
|
+
namespacee,
|
1070
|
+
(const xmlChar *)StringValuePtr(href),
|
1039
1071
|
(const xmlChar *)(NIL_P(prefix) ? NULL : StringValuePtr(prefix))
|
1040
1072
|
);
|
1041
1073
|
}
|
1042
1074
|
|
1043
|
-
if(
|
1075
|
+
if (!ns) return Qnil ;
|
1076
|
+
|
1077
|
+
if(NIL_P(prefix) || node != namespacee) xmlSetNs(node, ns);
|
1044
1078
|
|
1045
1079
|
return Nokogiri_wrap_xml_namespace(node->doc, ns);
|
1046
1080
|
}
|
@@ -1122,13 +1156,11 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
|
|
1122
1156
|
xmlNodePtr node;
|
1123
1157
|
xmlNodePtr list;
|
1124
1158
|
xmlNodeSetPtr set;
|
1159
|
+
xmlParserErrors error;
|
1125
1160
|
VALUE doc, err;
|
1126
1161
|
|
1127
1162
|
Data_Get_Struct(self, xmlNode, node);
|
1128
1163
|
|
1129
|
-
if(!node->parent)
|
1130
|
-
rb_raise(rb_eRuntimeError, "no contextual parsing on unlinked nodes");
|
1131
|
-
|
1132
1164
|
doc = DOC_RUBY_OBJECT(node->doc);
|
1133
1165
|
err = rb_iv_get(doc, "@errors");
|
1134
1166
|
|
@@ -1141,7 +1173,7 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
|
|
1141
1173
|
htmlHandleOmittedElem(0);
|
1142
1174
|
#endif
|
1143
1175
|
|
1144
|
-
xmlParseInNodeContext(
|
1176
|
+
error = xmlParseInNodeContext(
|
1145
1177
|
node,
|
1146
1178
|
StringValuePtr(_str),
|
1147
1179
|
(int)RSTRING_LEN(_str),
|
@@ -1154,6 +1186,20 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
|
|
1154
1186
|
|
1155
1187
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
1156
1188
|
|
1189
|
+
/* FIXME: This probably needs to handle more constants... */
|
1190
|
+
switch(error) {
|
1191
|
+
case XML_ERR_OK:
|
1192
|
+
break;
|
1193
|
+
|
1194
|
+
case XML_ERR_INTERNAL_ERROR:
|
1195
|
+
case XML_ERR_NO_MEMORY:
|
1196
|
+
rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
|
1197
|
+
break;
|
1198
|
+
|
1199
|
+
default:
|
1200
|
+
break;
|
1201
|
+
}
|
1202
|
+
|
1157
1203
|
set = xmlXPathNodeSetCreate(NULL);
|
1158
1204
|
|
1159
1205
|
while(list) {
|