nokogiri 1.4.2 → 1.4.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (54) hide show
  1. data/CHANGELOG.ja.rdoc +28 -8
  2. data/CHANGELOG.rdoc +24 -1
  3. data/Manifest.txt +2 -1
  4. data/README.ja.rdoc +1 -1
  5. data/README.rdoc +22 -4
  6. data/Rakefile +6 -2
  7. data/ext/nokogiri/extconf.rb +55 -32
  8. data/ext/nokogiri/nokogiri.h +2 -0
  9. data/ext/nokogiri/xml_document.c +5 -0
  10. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  11. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  12. data/ext/nokogiri/xml_node.c +58 -12
  13. data/ext/nokogiri/xml_node_set.c +7 -7
  14. data/ext/nokogiri/xml_reader.c +20 -1
  15. data/ext/nokogiri/xml_xpath_context.c +2 -0
  16. data/lib/nokogiri/css/generated_parser.rb +155 -148
  17. data/lib/nokogiri/css/generated_tokenizer.rb +2 -1
  18. data/lib/nokogiri/css/parser.y +3 -0
  19. data/lib/nokogiri/css/xpath_visitor.rb +1 -7
  20. data/lib/nokogiri/ffi/libxml.rb +29 -4
  21. data/lib/nokogiri/ffi/xml/document.rb +4 -0
  22. data/lib/nokogiri/ffi/xml/node.rb +27 -19
  23. data/lib/nokogiri/ffi/xml/node_set.rb +3 -3
  24. data/lib/nokogiri/ffi/xml/reader.rb +4 -0
  25. data/lib/nokogiri/html.rb +2 -2
  26. data/lib/nokogiri/html/document_fragment.rb +7 -4
  27. data/lib/nokogiri/version.rb +2 -1
  28. data/lib/nokogiri/xml/builder.rb +1 -1
  29. data/lib/nokogiri/xml/document.rb +1 -2
  30. data/lib/nokogiri/xml/document_fragment.rb +7 -0
  31. data/lib/nokogiri/xml/node.rb +4 -2
  32. data/lib/nokogiri/xml/node_set.rb +25 -0
  33. data/lib/nokogiri/xml/reader.rb +2 -0
  34. data/lib/nokogiri/xml/sax/document.rb +3 -1
  35. data/test/css/test_parser.rb +11 -1
  36. data/test/html/sax/test_parser_context.rb +2 -2
  37. data/test/html/test_document.rb +2 -2
  38. data/test/html/test_document_fragment.rb +34 -6
  39. data/test/test_memory_leak.rb +2 -2
  40. data/test/test_reader.rb +28 -6
  41. data/test/test_xslt_transforms.rb +29 -28
  42. data/test/xml/test_attr.rb +31 -4
  43. data/test/xml/test_builder.rb +5 -5
  44. data/test/xml/test_cdata.rb +3 -3
  45. data/test/xml/test_document.rb +8 -8
  46. data/test/xml/test_document_fragment.rb +2 -2
  47. data/test/xml/test_node.rb +1 -1
  48. data/test/xml/test_node_reparenting.rb +26 -11
  49. data/test/xml/test_node_set.rb +38 -2
  50. data/test/xml/test_text.rb +11 -2
  51. data/test/xml/test_unparented_node.rb +1 -1
  52. data/test/xml/test_xpath.rb +78 -11
  53. metadata +24 -5
  54. data/lib/nokogiri/version_warning.rb +0 -14
@@ -1,8 +1,28 @@
1
- === 1.4.2
1
+ === 1.4.3 2010年7月28日
2
2
 
3
3
  * 新しい機能
4
4
 
5
- * XML::Node#parse 定義されたコンテキストノードで、XML 又はHTMLのフレグメント
5
+ * XML::Reader#empty_element? - 子の無いエレメントにtrueを返す  #262
6
+ * Node#remove_namespaces! - 1.4.2では 名前空間のみを取り除いていたが、
7
+ 1.4.3 では名前空間及び、名前空間宣言も取り除く #294
8
+
9
+ * バグの修正
10
+
11
+ * XML::NodeSet#{include?,delete,push} はXML::Namespaceを受入れる
12
+ * XML::Document#parse - 1.4.3より文書内の文脈を解析する機能を追加
13
+ * XML::DocumentFragment#inner_html= 文脈解析を共に実行する #298, #281
14
+ * lib/nokogiri/css/parser.y はCSSと疑似選別の両方を機能
15
+ * 演算によって近隣に存在する併合型ノードへの遊離問題の有無に関わらず、一切の
16
+ 弊害なしにテキストノードの繰り返しが実行可能  #283
17
+ * xmlFirstElementChild et al.による libxml2バージョンでの不適合性を修正 #303
18
+ * XML::Attr#add_namespace (!)文字通りの機能実現!  #252
19
+ * HTML::DocumentFragment が文字列に存在するエンコードを使用 #305
20
+
21
+ === 1.4.2 2010年5月22日
22
+
23
+ * 新機能
24
+
25
+ * XML::Node#parse 定義されたコンテキストノードでXML 又はHTMLのフレグメント
6
26
  を解析する
7
27
  * XML::Node#namespacesが子ノードとその祖先ノード内で定義された全ての名前空間
8
28
  を返すようになった(以前は祖先ノードの名前空間は返されなかった)
@@ -30,18 +50,18 @@
30
50
  * XML::Node#xpath はNodeSetのオブジェクト以外のオブジェクトを返す GH #208
31
51
  * XSLT::StyleSheet#transformはパラメーターのハッシュを受け入れる GH #223
32
52
  * CSSのnot()の疑似セレクタの修正  GH #205
33
- * XML::Builder はノード達が切り離されても破壊しない(vihaiの協力に感謝)
53
+ * XML::Builder はノードらが切り離されても破壊しない(vihaiの協力に感謝)
34
54
  GH #228
35
55
  * SAX parser経由でエンコードを強制することが出来る  Eugene Pimenovに感謝!
36
56
  GH #204
37
- * XML::DocumentFragment はML::Node#parse を使用して子供を限定する
38
- * XML Reader内のメモリリークを修正  sdorさん、ありがとう! GH#244
57
+ * XML::DocumentFragment はML::Node#parse を使用して子を限定する
58
+ * XML Reader内のメモリリーク修正  sdorさん、ありがとう! GH#244
39
59
 
40
60
  * ノート
41
61
 
42
- * 今日4月18日現在、Windows gems は libxml 2.7.6 とlibxslt
43
- 1.1.26にDLLsを正規装備しています。このリリース以前にも既にDLLsはlibxml
44
- 2.7.3 と libxslt 1.1.24に正規装備されています。
62
+ * 今日4月18日現在、Windows gems は libxml 2.7.7 とlibxslt
63
+ 1.1.26にDLLsを標準装備しています。このリリース以前にも既にDLLsはlibxml
64
+ 2.7.3 と libxslt 1.1.24に標準装備済み。
45
65
 
46
66
  === 1.4.1 2009年12月10日
47
67
 
@@ -1,4 +1,27 @@
1
- === 1.4.2 (TBA)
1
+ === 1.4.3 / 2010/07/28
2
+
3
+ * New Features
4
+
5
+ * XML::Reader#empty_element? returns true for empty elements. #262
6
+ * Node#remove_namespaces! now removes namespace *declarations* as well. #294
7
+ * NodeSet#at_xpath, NodeSet#at_css and NodeSet#> do what the corresponding
8
+ methods of Node do.
9
+
10
+ * Bugfixes
11
+
12
+ * XML::NodeSet#{include?,delete,push} accept an XML::Namespace
13
+ * XML::Document#parse added for parsing in the context of a document
14
+ * XML::DocumentFragment#inner_html= works with contextual parsing! #298, #281
15
+ * lib/nokogiri/css/parser.y Combined CSS functions + pseudo selectors fixed
16
+ * Reparenting text nodes is safe, even when the operation frees adjacent merged nodes. #283
17
+ * Fixed libxml2 versionitis issue with xmlFirstElementChild et al. #303
18
+ * XML::Attr#add_namespace now works as expected. #252
19
+ * HTML::DocumentFragment uses the string's encoding. #305
20
+ * Fix the CSS3 selector translation rule for the general sibling combinator
21
+ (a.k.a. preceding selector) that incorrectly converted "E + F G" to
22
+ "//F//G[preceding-sibling::E]".
23
+
24
+ === 1.4.2 / 2010/05/22
2
25
 
3
26
  * New Features
4
27
 
@@ -44,6 +44,8 @@ ext/nokogiri/xml_entity_reference.c
44
44
  ext/nokogiri/xml_entity_reference.h
45
45
  ext/nokogiri/xml_io.c
46
46
  ext/nokogiri/xml_io.h
47
+ ext/nokogiri/xml_libxml2_hacks.c
48
+ ext/nokogiri/xml_libxml2_hacks.h
47
49
  ext/nokogiri/xml_namespace.c
48
50
  ext/nokogiri/xml_namespace.h
49
51
  ext/nokogiri/xml_node.c
@@ -157,7 +159,6 @@ lib/nokogiri/html/sax/parser.rb
157
159
  lib/nokogiri/html/sax/parser_context.rb
158
160
  lib/nokogiri/syntax_error.rb
159
161
  lib/nokogiri/version.rb
160
- lib/nokogiri/version_warning.rb
161
162
  lib/nokogiri/xml.rb
162
163
  lib/nokogiri/xml/attr.rb
163
164
  lib/nokogiri/xml/attribute_decl.rb
@@ -3,7 +3,7 @@
3
3
  * http://nokogiri.org/
4
4
  * http://github.com/tenderlove/nokogiri/wikis
5
5
  * http://github.com/tenderlove/nokogiri/tree/master
6
- * http://groups.google.com/group/nokogiri-talk
6
+ * http://groups.google.com/group/nokogiri-list
7
7
  * http://github.com/tenderlove/nokogiri/issues
8
8
 
9
9
  == DESCRIPTION:
@@ -81,9 +81,27 @@ The IRC channel is #nokogiri on freenode.
81
81
 
82
82
  == ENCODING:
83
83
 
84
- Strings are always stored as UTF-8 internally. Methods that return text values
85
- will always return UTF-8 encoded strings. Methods that return XML (like to_xml,
86
- to_html and inner_html) will return a string encoded like the source document.
84
+ Strings are always stored as UTF-8 internally. Methods that return
85
+ text values will always return UTF-8 encoded strings. Methods that
86
+ return XML (like to_xml, to_html and inner_html) will return a string
87
+ encoded like the source document.
88
+
89
+ *WARNING*
90
+
91
+ Some documents declare one particular encoding, but use a different
92
+ one. So, which encoding should the parser choose?
93
+
94
+ Remember that data is just a stream of bytes. Only us humans add
95
+ meaning to that stream. Any particular set of bytes could be valid
96
+ characters in multiple encodings, so detecting encoding with 100%
97
+ accuracy is not possible. libxml2 does its best, but it can't be right
98
+ 100% of the time.
99
+
100
+ If you want Nokogiri to handle the document encoding properly, your
101
+ best bet is to explicitly set the encoding. Here is an example of
102
+ explicitly setting the encoding to EUC-JP on the parser:
103
+
104
+ doc = Nokogiri.XML('<foo><bar /><foo>', nil, 'EUC-JP')
87
105
 
88
106
  == INSTALL:
89
107
 
@@ -94,7 +112,7 @@ to_html and inner_html) will return a string encoded like the source document.
94
112
  Binary packages are available for:
95
113
 
96
114
  * SuSE[http://download.opensuse.org/repositories/devel:/languages:/ruby:/extensions/]
97
- * Fedora[https://admin.fedoraproject.org/pkgdb/packages/name/rubygem-nokogiri]
115
+ * Fedora[http://s390.koji.fedoraproject.org/koji/packageinfo?packageID=6756]
98
116
 
99
117
  == DEVELOPMENT:
100
118
 
data/Rakefile CHANGED
@@ -112,7 +112,7 @@ end
112
112
 
113
113
  file GENERATED_PARSER => "lib/nokogiri/css/parser.y" do |t|
114
114
  begin
115
- racc = `which racc`.strip
115
+ racc = Config::CONFIG['target_os'] =~ /mswin32/ ? '' : `which racc`.strip
116
116
  racc = "#{::Config::CONFIG['bindir']}/racc" if racc.empty?
117
117
  sh "#{racc} -l -o #{t.name} #{t.prerequisites.first}"
118
118
  rescue
@@ -129,7 +129,11 @@ file GENERATED_TOKENIZER => "lib/nokogiri/css/tokenizer.rex" do |t|
129
129
  end
130
130
 
131
131
  require 'tasks/test'
132
- require 'tasks/cross_compile'
132
+ begin
133
+ require 'tasks/cross_compile' unless java
134
+ rescue RuntimeError => e
135
+ warn "WARNING: Could not perform some cross-compiling: #{e}"
136
+ end
133
137
 
134
138
  desc "set environment variables to build and/or test with debug options"
135
139
  task :debug do
@@ -17,7 +17,7 @@ end
17
17
  $CFLAGS << " #{ENV["CFLAGS"]}"
18
18
  $LIBS << " #{ENV["LIBS"]}"
19
19
 
20
- if Config::CONFIG['target_os'] == 'mingw32'
20
+ if Config::CONFIG['target_os'] == 'mingw32' || Config::CONFIG['target_os'] =~ /mswin32/
21
21
  $CFLAGS << " -DXP_WIN -DXP_WIN32 -DUSE_INCLUDED_VASPRINTF"
22
22
  elsif Config::CONFIG['target_os'] =~ /solaris/
23
23
  $CFLAGS << " -DUSE_INCLUDED_VASPRINTF"
@@ -29,58 +29,81 @@ if RbConfig::MAKEFILE_CONFIG['CC'] =~ /mingw/
29
29
  $CFLAGS << " -DIN_LIBXML"
30
30
  end
31
31
 
32
- $CFLAGS << " -O3 -Wall -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
32
+ if RbConfig::MAKEFILE_CONFIG['CC'] =~ /gcc/
33
+ $CFLAGS << " -O3 -Wall -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
34
+ end
35
+
36
+ if Config::CONFIG['target_os'] =~ /mswin32/
37
+ lib_prefix = 'lib'
38
+
39
+ # There's no default include/lib dir on Windows. Let's just add the Ruby ones
40
+ # and resort on the search path specified by INCLUDE and LIB environment
41
+ # variables
42
+ HEADER_DIRS = [INCLUDEDIR]
43
+ LIB_DIRS = [LIBDIR]
44
+ XML2_HEADER_DIRS = [File.join(INCLUDEDIR, "libxml2"), INCLUDEDIR]
45
+
46
+ else
47
+ lib_prefix = ''
48
+
49
+ HEADER_DIRS = [
50
+ # First search /opt/local for macports
51
+ '/opt/local/include',
33
52
 
34
- HEADER_DIRS = [
35
- # First search /opt/local for macports
36
- '/opt/local/include',
53
+ # Then search /usr/local for people that installed from source
54
+ '/usr/local/include',
37
55
 
38
- # Then search /usr/local for people that installed from source
39
- '/usr/local/include',
56
+ # Check the ruby install locations
57
+ INCLUDEDIR,
40
58
 
41
- # Check the ruby install locations
42
- INCLUDEDIR,
59
+ # Finally fall back to /usr
60
+ '/usr/include',
61
+ '/usr/include/libxml2',
62
+ ]
43
63
 
44
- # Finally fall back to /usr
45
- '/usr/include',
46
- '/usr/include/libxml2',
47
- ]
64
+ LIB_DIRS = [
65
+ # First search /opt/local for macports
66
+ '/opt/local/lib',
48
67
 
49
- LIB_DIRS = [
50
- # First search /opt/local for macports
51
- '/opt/local/lib',
68
+ # Then search /usr/local for people that installed from source
69
+ '/usr/local/lib',
52
70
 
53
- # Then search /usr/local for people that installed from source
54
- '/usr/local/lib',
71
+ # Check the ruby install locations
72
+ LIBDIR,
55
73
 
56
- # Check the ruby install locations
57
- LIBDIR,
74
+ # Finally fall back to /usr
75
+ '/usr/lib',
76
+ ]
58
77
 
59
- # Finally fall back to /usr
60
- '/usr/lib',
61
- ]
78
+ XML2_HEADER_DIRS = [
79
+ '/opt/local/include/libxml2',
80
+ '/usr/local/include/libxml2',
81
+ File.join(INCLUDEDIR, "libxml2")
82
+ ] + HEADER_DIRS
83
+ end
62
84
 
63
85
  dir_config('zlib', HEADER_DIRS, LIB_DIRS)
64
86
  dir_config('iconv', HEADER_DIRS, LIB_DIRS)
65
- dir_config('xml2', [
66
- '/opt/local/include/libxml2',
67
- '/usr/local/include/libxml2',
68
- File.join(INCLUDEDIR, "libxml2")] + HEADER_DIRS, LIB_DIRS)
87
+ dir_config('xml2', XML2_HEADER_DIRS, LIB_DIRS)
69
88
  dir_config('xslt', HEADER_DIRS, LIB_DIRS)
70
89
 
71
90
  def asplode(lib)
72
91
  abort "-----\n#{lib} is missing. please visit http://nokogiri.org/tutorials/installing_nokogiri.html for help with installing dependencies.\n-----"
73
92
  end
74
93
 
75
- asplode "iconv" unless find_header('iconv.h')
94
+ # Use this with cross compiling
95
+ # PKG_CONFIG_PATH=/Users/apatterson/git/nokogiri/tmp/cross/lib/pkgconfig/ \
96
+ # rake cross compile RUBY_CC_VERSION=1.9.1
97
+ pkg_config('libxslt') if RUBY_PLATFORM =~ /mingw/
98
+
76
99
  asplode "libxml2" unless find_header('libxml/parser.h')
77
100
  asplode "libxslt" unless find_header('libxslt/xslt.h')
78
101
  asplode "libexslt" unless find_header('libexslt/exslt.h')
79
- asplode "zlib" unless find_library('z', 'gzopen')
80
- asplode "libxml2" unless find_library('xml2', 'xmlParseDoc')
81
- asplode "libxslt" unless find_library('xslt', 'xsltParseStylesheetDoc')
82
- asplode "libexslt" unless find_library('exslt', 'exsltFuncRegister')
102
+ asplode "libxml2" unless find_library("#{lib_prefix}xml2", 'xmlParseDoc')
103
+ asplode "libxslt" unless find_library("#{lib_prefix}xslt", 'xsltParseStylesheetDoc')
104
+ asplode "libexslt" unless find_library("#{lib_prefix}exslt", 'exsltFuncRegister')
83
105
 
106
+ have_func 'xmlFirstElementChild'
84
107
  have_func('xmlRelaxNGSetParserStructuredErrors')
85
108
  have_func('xmlRelaxNGSetParserStructuredErrors')
86
109
  have_func('xmlRelaxNGSetValidStructuredErrors')
@@ -77,6 +77,8 @@ int is_2_6_16(void) ;
77
77
  #define RBSTR_OR_QNIL(_str) \
78
78
  (_str ? NOKOGIRI_STR_NEW2(_str) : Qnil)
79
79
 
80
+ #include <xml_libxml2_hacks.h>
81
+
80
82
  #include <xml_io.h>
81
83
  #include <xml_document.h>
82
84
  #include <html_entity_lookup.h>
@@ -46,6 +46,11 @@ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
46
46
 
47
47
  for (child = node->children ; child ; child = child->next)
48
48
  recursively_remove_namespaces_from_node(child);
49
+
50
+ if (node->nsDef) {
51
+ xmlFreeNsList(node->nsDef);
52
+ node->nsDef = NULL;
53
+ }
49
54
  }
50
55
 
51
56
  /*
@@ -0,0 +1,112 @@
1
+ #ifndef HAVE_XMLFIRSTELEMENTCHILD
2
+ #include <libxml/tree.h>
3
+ /**
4
+ * xmlFirstElementChild:
5
+ * @parent: the parent node
6
+ *
7
+ * Finds the first child node of that element which is a Element node
8
+ * Note the handling of entities references is different than in
9
+ * the W3C DOM element traversal spec since we don't have back reference
10
+ * from entities content to entities references.
11
+ *
12
+ * Returns the first element child or NULL if not available
13
+ */
14
+ xmlNodePtr
15
+ xmlFirstElementChild(xmlNodePtr parent) {
16
+ xmlNodePtr cur = NULL;
17
+
18
+ if (parent == NULL)
19
+ return(NULL);
20
+ switch (parent->type) {
21
+ case XML_ELEMENT_NODE:
22
+ case XML_ENTITY_NODE:
23
+ case XML_DOCUMENT_NODE:
24
+ case XML_HTML_DOCUMENT_NODE:
25
+ cur = parent->children;
26
+ break;
27
+ default:
28
+ return(NULL);
29
+ }
30
+ while (cur != NULL) {
31
+ if (cur->type == XML_ELEMENT_NODE)
32
+ return(cur);
33
+ cur = cur->next;
34
+ }
35
+ return(NULL);
36
+ }
37
+
38
+ /**
39
+ * xmlNextElementSibling:
40
+ * @node: the current node
41
+ *
42
+ * Finds the first closest next sibling of the node which is an
43
+ * element node.
44
+ * Note the handling of entities references is different than in
45
+ * the W3C DOM element traversal spec since we don't have back reference
46
+ * from entities content to entities references.
47
+ *
48
+ * Returns the next element sibling or NULL if not available
49
+ */
50
+ xmlNodePtr
51
+ xmlNextElementSibling(xmlNodePtr node) {
52
+ if (node == NULL)
53
+ return(NULL);
54
+ switch (node->type) {
55
+ case XML_ELEMENT_NODE:
56
+ case XML_TEXT_NODE:
57
+ case XML_CDATA_SECTION_NODE:
58
+ case XML_ENTITY_REF_NODE:
59
+ case XML_ENTITY_NODE:
60
+ case XML_PI_NODE:
61
+ case XML_COMMENT_NODE:
62
+ case XML_DTD_NODE:
63
+ case XML_XINCLUDE_START:
64
+ case XML_XINCLUDE_END:
65
+ node = node->next;
66
+ break;
67
+ default:
68
+ return(NULL);
69
+ }
70
+ while (node != NULL) {
71
+ if (node->type == XML_ELEMENT_NODE)
72
+ return(node);
73
+ node = node->next;
74
+ }
75
+ return(NULL);
76
+ }
77
+
78
+ /**
79
+ * xmlLastElementChild:
80
+ * @parent: the parent node
81
+ *
82
+ * Finds the last child node of that element which is a Element node
83
+ * Note the handling of entities references is different than in
84
+ * the W3C DOM element traversal spec since we don't have back reference
85
+ * from entities content to entities references.
86
+ *
87
+ * Returns the last element child or NULL if not available
88
+ */
89
+ xmlNodePtr
90
+ xmlLastElementChild(xmlNodePtr parent) {
91
+ xmlNodePtr cur = NULL;
92
+
93
+ if (parent == NULL)
94
+ return(NULL);
95
+ switch (parent->type) {
96
+ case XML_ELEMENT_NODE:
97
+ case XML_ENTITY_NODE:
98
+ case XML_DOCUMENT_NODE:
99
+ case XML_HTML_DOCUMENT_NODE:
100
+ cur = parent->last;
101
+ break;
102
+ default:
103
+ return(NULL);
104
+ }
105
+ while (cur != NULL) {
106
+ if (cur->type == XML_ELEMENT_NODE)
107
+ return(cur);
108
+ cur = cur->prev;
109
+ }
110
+ return(NULL);
111
+ }
112
+ #endif
@@ -0,0 +1,12 @@
1
+ #ifndef HAVE_XMLFIRSTELEMENTCHILD
2
+
3
+ #ifndef XML_LIBXML2_HACKS
4
+ #define XML_LIBXML2_HACKS
5
+
6
+ xmlNodePtr xmlFirstElementChild(xmlNodePtr parent);
7
+ xmlNodePtr xmlNextElementSibling(xmlNodePtr node);
8
+ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
9
+
10
+ #endif
11
+
12
+ #endif
@@ -103,7 +103,7 @@ static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
103
103
  static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf)
104
104
  {
105
105
  VALUE reparented_obj ;
106
- xmlNodePtr reparentee, pivot, reparented ;
106
+ xmlNodePtr reparentee, pivot, reparented, next_text, new_next_text ;
107
107
 
108
108
  if(!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode))
109
109
  rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
@@ -141,6 +141,34 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep
141
141
  }
142
142
  }
143
143
 
144
+ if (reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
145
+ /*
146
+ * libxml merges text nodes in a right-to-left fashion, meaning that if
147
+ * there are two text nodes who would be adjacent, the right (or following,
148
+ * or next) node will be merged into the left (or preceding, or previous)
149
+ * node.
150
+ *
151
+ * and by "merged" I mean the string contents will be concatenated onto the
152
+ * left node's contents, and then the node will be freed.
153
+ *
154
+ * which means that if we have a ruby object wrapped around the right node,
155
+ * its memory would be freed out from under it.
156
+ *
157
+ * so, we detect this edge case and unlink-and-root the text node before it gets
158
+ * merged. then we dup the node and insert that duplicate back into the
159
+ * document where the real node was.
160
+ *
161
+ * yes, this is totally lame.
162
+ */
163
+ next_text = pivot->next ;
164
+ new_next_text = xmlDocCopyNode(next_text, pivot->doc, 1) ;
165
+
166
+ xmlUnlinkNode(next_text);
167
+ NOKOGIRI_ROOT_NODE(next_text);
168
+
169
+ xmlAddNextSibling(pivot, new_next_text);
170
+ }
171
+
144
172
  /* TODO: I really want to remove this. We shouldn't support 2.6.16 anymore */
145
173
  if ( reparentee->type == XML_TEXT_NODE && pivot->type == XML_TEXT_NODE && is_2_6_16() ) {
146
174
  /* work around a string-handling bug in libxml 2.6.16. we'd rather leak than segfault. */
@@ -1021,26 +1049,32 @@ static VALUE line(VALUE self)
1021
1049
  */
1022
1050
  static VALUE add_namespace_definition(VALUE self, VALUE prefix, VALUE href)
1023
1051
  {
1024
- xmlNodePtr node;
1052
+ xmlNodePtr node, namespacee;
1025
1053
  xmlNsPtr ns;
1026
1054
 
1027
1055
  Data_Get_Struct(self, xmlNode, node);
1056
+ namespacee = node ;
1028
1057
 
1029
- ns = xmlNewNs(
1058
+ ns = xmlSearchNs(
1059
+ node->doc,
1030
1060
  node,
1031
- (const xmlChar *)StringValuePtr(href),
1032
1061
  (const xmlChar *)(NIL_P(prefix) ? NULL : StringValuePtr(prefix))
1033
1062
  );
1034
1063
 
1035
1064
  if(!ns) {
1036
- ns = xmlSearchNs(
1037
- node->doc,
1038
- node,
1065
+ if (node->type != XML_ELEMENT_NODE) {
1066
+ namespacee = node->parent;
1067
+ }
1068
+ ns = xmlNewNs(
1069
+ namespacee,
1070
+ (const xmlChar *)StringValuePtr(href),
1039
1071
  (const xmlChar *)(NIL_P(prefix) ? NULL : StringValuePtr(prefix))
1040
1072
  );
1041
1073
  }
1042
1074
 
1043
- if(NIL_P(prefix)) xmlSetNs(node, ns);
1075
+ if (!ns) return Qnil ;
1076
+
1077
+ if(NIL_P(prefix) || node != namespacee) xmlSetNs(node, ns);
1044
1078
 
1045
1079
  return Nokogiri_wrap_xml_namespace(node->doc, ns);
1046
1080
  }
@@ -1122,13 +1156,11 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1122
1156
  xmlNodePtr node;
1123
1157
  xmlNodePtr list;
1124
1158
  xmlNodeSetPtr set;
1159
+ xmlParserErrors error;
1125
1160
  VALUE doc, err;
1126
1161
 
1127
1162
  Data_Get_Struct(self, xmlNode, node);
1128
1163
 
1129
- if(!node->parent)
1130
- rb_raise(rb_eRuntimeError, "no contextual parsing on unlinked nodes");
1131
-
1132
1164
  doc = DOC_RUBY_OBJECT(node->doc);
1133
1165
  err = rb_iv_get(doc, "@errors");
1134
1166
 
@@ -1141,7 +1173,7 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1141
1173
  htmlHandleOmittedElem(0);
1142
1174
  #endif
1143
1175
 
1144
- xmlParseInNodeContext(
1176
+ error = xmlParseInNodeContext(
1145
1177
  node,
1146
1178
  StringValuePtr(_str),
1147
1179
  (int)RSTRING_LEN(_str),
@@ -1154,6 +1186,20 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1154
1186
 
1155
1187
  xmlSetStructuredErrorFunc(NULL, NULL);
1156
1188
 
1189
+ /* FIXME: This probably needs to handle more constants... */
1190
+ switch(error) {
1191
+ case XML_ERR_OK:
1192
+ break;
1193
+
1194
+ case XML_ERR_INTERNAL_ERROR:
1195
+ case XML_ERR_NO_MEMORY:
1196
+ rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
1197
+ break;
1198
+
1199
+ default:
1200
+ break;
1201
+ }
1202
+
1157
1203
  set = xmlXPathNodeSetCreate(NULL);
1158
1204
 
1159
1205
  while(list) {