nokogiri 1.10.9 → 1.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (230) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +190 -95
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +909 -422
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  18. data/ext/nokogiri/nokogiri.c +258 -105
  19. data/ext/nokogiri/nokogiri.h +207 -90
  20. data/ext/nokogiri/test_global_handlers.c +40 -0
  21. data/ext/nokogiri/xml_attr.c +18 -18
  22. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  23. data/ext/nokogiri/xml_cdata.c +33 -33
  24. data/ext/nokogiri/xml_comment.c +19 -31
  25. data/ext/nokogiri/xml_document.c +499 -323
  26. data/ext/nokogiri/xml_document_fragment.c +17 -36
  27. data/ext/nokogiri/xml_dtd.c +65 -59
  28. data/ext/nokogiri/xml_element_content.c +63 -55
  29. data/ext/nokogiri/xml_element_decl.c +31 -31
  30. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  31. data/ext/nokogiri/xml_entity_decl.c +37 -35
  32. data/ext/nokogiri/xml_entity_reference.c +17 -19
  33. data/ext/nokogiri/xml_namespace.c +131 -61
  34. data/ext/nokogiri/xml_node.c +1429 -723
  35. data/ext/nokogiri/xml_node_set.c +257 -225
  36. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  37. data/ext/nokogiri/xml_reader.c +340 -231
  38. data/ext/nokogiri/xml_relax_ng.c +87 -99
  39. data/ext/nokogiri/xml_sax_parser.c +269 -176
  40. data/ext/nokogiri/xml_sax_parser_context.c +286 -152
  41. data/ext/nokogiri/xml_sax_push_parser.c +111 -64
  42. data/ext/nokogiri/xml_schema.c +132 -140
  43. data/ext/nokogiri/xml_syntax_error.c +52 -23
  44. data/ext/nokogiri/xml_text.c +37 -30
  45. data/ext/nokogiri/xml_xpath_context.c +373 -185
  46. data/ext/nokogiri/xslt_stylesheet.c +342 -191
  47. data/gumbo-parser/CHANGES.md +63 -0
  48. data/gumbo-parser/Makefile +129 -0
  49. data/gumbo-parser/THANKS +27 -0
  50. data/gumbo-parser/src/Makefile +34 -0
  51. data/gumbo-parser/src/README.md +41 -0
  52. data/gumbo-parser/src/ascii.c +75 -0
  53. data/gumbo-parser/src/ascii.h +115 -0
  54. data/gumbo-parser/src/attribute.c +42 -0
  55. data/gumbo-parser/src/attribute.h +17 -0
  56. data/gumbo-parser/src/char_ref.c +22225 -0
  57. data/gumbo-parser/src/char_ref.h +29 -0
  58. data/gumbo-parser/src/char_ref.rl +2154 -0
  59. data/gumbo-parser/src/error.c +658 -0
  60. data/gumbo-parser/src/error.h +152 -0
  61. data/gumbo-parser/src/foreign_attrs.c +103 -0
  62. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
  66. data/gumbo-parser/src/parser.c +4932 -0
  67. data/gumbo-parser/src/parser.h +41 -0
  68. data/gumbo-parser/src/replacement.h +33 -0
  69. data/gumbo-parser/src/string_buffer.c +103 -0
  70. data/gumbo-parser/src/string_buffer.h +68 -0
  71. data/gumbo-parser/src/string_piece.c +48 -0
  72. data/gumbo-parser/src/svg_attrs.c +174 -0
  73. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  74. data/gumbo-parser/src/svg_tags.c +137 -0
  75. data/gumbo-parser/src/svg_tags.gperf +55 -0
  76. data/gumbo-parser/src/tag.c +223 -0
  77. data/gumbo-parser/src/tag_lookup.c +382 -0
  78. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  79. data/gumbo-parser/src/tag_lookup.h +13 -0
  80. data/gumbo-parser/src/token_buffer.c +79 -0
  81. data/gumbo-parser/src/token_buffer.h +71 -0
  82. data/gumbo-parser/src/token_type.h +17 -0
  83. data/gumbo-parser/src/tokenizer.c +3464 -0
  84. data/gumbo-parser/src/tokenizer.h +112 -0
  85. data/gumbo-parser/src/tokenizer_states.h +339 -0
  86. data/gumbo-parser/src/utf8.c +245 -0
  87. data/gumbo-parser/src/utf8.h +164 -0
  88. data/gumbo-parser/src/util.c +66 -0
  89. data/gumbo-parser/src/util.h +34 -0
  90. data/gumbo-parser/src/vector.c +111 -0
  91. data/gumbo-parser/src/vector.h +45 -0
  92. data/lib/nokogiri/class_resolver.rb +67 -0
  93. data/lib/nokogiri/css/node.rb +14 -8
  94. data/lib/nokogiri/css/parser.rb +399 -377
  95. data/lib/nokogiri/css/parser.y +250 -245
  96. data/lib/nokogiri/css/parser_extras.rb +16 -71
  97. data/lib/nokogiri/css/selector_cache.rb +38 -0
  98. data/lib/nokogiri/css/syntax_error.rb +3 -1
  99. data/lib/nokogiri/css/tokenizer.rb +7 -5
  100. data/lib/nokogiri/css/tokenizer.rex +11 -9
  101. data/lib/nokogiri/css/xpath_visitor.rb +242 -96
  102. data/lib/nokogiri/css.rb +122 -17
  103. data/lib/nokogiri/decorators/slop.rb +11 -11
  104. data/lib/nokogiri/encoding_handler.rb +57 -0
  105. data/lib/nokogiri/extension.rb +32 -0
  106. data/lib/nokogiri/gumbo.rb +15 -0
  107. data/lib/nokogiri/html.rb +38 -27
  108. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  109. data/lib/nokogiri/html4/document.rb +235 -0
  110. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  111. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  112. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  113. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  114. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  115. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  116. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  117. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  118. data/lib/nokogiri/html4.rb +42 -0
  119. data/lib/nokogiri/html5/builder.rb +40 -0
  120. data/lib/nokogiri/html5/document.rb +199 -0
  121. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  122. data/lib/nokogiri/html5/node.rb +103 -0
  123. data/lib/nokogiri/html5.rb +368 -0
  124. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  125. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  126. data/lib/nokogiri/syntax_error.rb +2 -0
  127. data/lib/nokogiri/version/constant.rb +6 -0
  128. data/lib/nokogiri/version/info.rb +224 -0
  129. data/lib/nokogiri/version.rb +3 -108
  130. data/lib/nokogiri/xml/attr.rb +55 -3
  131. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  132. data/lib/nokogiri/xml/builder.rb +83 -35
  133. data/lib/nokogiri/xml/cdata.rb +3 -1
  134. data/lib/nokogiri/xml/character_data.rb +2 -0
  135. data/lib/nokogiri/xml/document.rb +359 -130
  136. data/lib/nokogiri/xml/document_fragment.rb +170 -54
  137. data/lib/nokogiri/xml/dtd.rb +4 -2
  138. data/lib/nokogiri/xml/element_content.rb +12 -2
  139. data/lib/nokogiri/xml/element_decl.rb +6 -2
  140. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  141. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  142. data/lib/nokogiri/xml/namespace.rb +44 -0
  143. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  144. data/lib/nokogiri/xml/node.rb +1168 -420
  145. data/lib/nokogiri/xml/node_set.rb +145 -67
  146. data/lib/nokogiri/xml/notation.rb +13 -0
  147. data/lib/nokogiri/xml/parse_options.rb +145 -52
  148. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  149. data/lib/nokogiri/xml/pp/node.rb +47 -30
  150. data/lib/nokogiri/xml/pp.rb +4 -2
  151. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  152. data/lib/nokogiri/xml/reader.rb +68 -41
  153. data/lib/nokogiri/xml/relax_ng.rb +60 -17
  154. data/lib/nokogiri/xml/sax/document.rb +198 -111
  155. data/lib/nokogiri/xml/sax/parser.rb +144 -67
  156. data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
  157. data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
  158. data/lib/nokogiri/xml/sax.rb +54 -4
  159. data/lib/nokogiri/xml/schema.rb +116 -39
  160. data/lib/nokogiri/xml/searchable.rb +139 -95
  161. data/lib/nokogiri/xml/syntax_error.rb +29 -5
  162. data/lib/nokogiri/xml/text.rb +2 -0
  163. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  164. data/lib/nokogiri/xml/xpath.rb +15 -4
  165. data/lib/nokogiri/xml/xpath_context.rb +15 -4
  166. data/lib/nokogiri/xml.rb +45 -55
  167. data/lib/nokogiri/xslt/stylesheet.rb +32 -8
  168. data/lib/nokogiri/xslt.rb +103 -30
  169. data/lib/nokogiri.rb +59 -75
  170. data/lib/xsd/xmlparser/nokogiri.rb +32 -29
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  175. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  176. data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
  177. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  178. metadata +123 -295
  179. data/ext/nokogiri/html_document.c +0 -170
  180. data/ext/nokogiri/html_document.h +0 -10
  181. data/ext/nokogiri/html_element_description.c +0 -279
  182. data/ext/nokogiri/html_element_description.h +0 -10
  183. data/ext/nokogiri/html_entity_lookup.c +0 -32
  184. data/ext/nokogiri/html_entity_lookup.h +0 -8
  185. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  186. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  187. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  188. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  189. data/ext/nokogiri/xml_attr.h +0 -9
  190. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  191. data/ext/nokogiri/xml_cdata.h +0 -9
  192. data/ext/nokogiri/xml_comment.h +0 -9
  193. data/ext/nokogiri/xml_document.h +0 -23
  194. data/ext/nokogiri/xml_document_fragment.h +0 -10
  195. data/ext/nokogiri/xml_dtd.h +0 -10
  196. data/ext/nokogiri/xml_element_content.h +0 -10
  197. data/ext/nokogiri/xml_element_decl.h +0 -9
  198. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  199. data/ext/nokogiri/xml_entity_decl.h +0 -10
  200. data/ext/nokogiri/xml_entity_reference.h +0 -9
  201. data/ext/nokogiri/xml_io.c +0 -61
  202. data/ext/nokogiri/xml_io.h +0 -11
  203. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  204. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  205. data/ext/nokogiri/xml_namespace.h +0 -14
  206. data/ext/nokogiri/xml_node.h +0 -13
  207. data/ext/nokogiri/xml_node_set.h +0 -12
  208. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  209. data/ext/nokogiri/xml_reader.h +0 -10
  210. data/ext/nokogiri/xml_relax_ng.h +0 -9
  211. data/ext/nokogiri/xml_sax_parser.h +0 -39
  212. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  213. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  214. data/ext/nokogiri/xml_schema.h +0 -9
  215. data/ext/nokogiri/xml_syntax_error.h +0 -13
  216. data/ext/nokogiri/xml_text.h +0 -9
  217. data/ext/nokogiri/xml_xpath_context.h +0 -10
  218. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  219. data/lib/nokogiri/html/document.rb +0 -335
  220. data/lib/nokogiri/html/document_fragment.rb +0 -49
  221. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  222. data/lib/nokogiri/html/sax/parser.rb +0 -62
  223. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  224. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  225. data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
  226. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  227. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  228. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  229. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  230. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,79 +1,112 @@
1
- #include <xml_encoding_handler.h>
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriEncodingHandler;
4
+
5
+ static void
6
+ xml_encoding_handler_dealloc(void *data)
7
+ {
8
+ /* make sure iconv handlers are cleaned up and freed */
9
+ xmlCharEncodingHandlerPtr c_handler = data;
10
+ xmlCharEncCloseFunc(c_handler);
11
+ }
12
+
13
+ static const rb_data_type_t xml_char_encoding_handler_type = {
14
+ .wrap_struct_name = "xmlCharEncodingHandler",
15
+ .function = {
16
+ .dfree = xml_encoding_handler_dealloc,
17
+ },
18
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
19
+ };
20
+
2
21
 
3
22
  /*
4
23
  * call-seq: Nokogiri::EncodingHandler.[](name)
5
24
  *
6
25
  * Get the encoding handler for +name+
7
26
  */
8
- static VALUE get(VALUE klass, VALUE key)
27
+ static VALUE
28
+ rb_xml_encoding_handler_s_get(VALUE klass, VALUE key)
9
29
  {
10
30
  xmlCharEncodingHandlerPtr handler;
11
31
 
12
32
  handler = xmlFindCharEncodingHandler(StringValueCStr(key));
13
- if(handler)
14
- return Data_Wrap_Struct(klass, NULL, NULL, handler);
33
+ if (handler) {
34
+ return TypedData_Wrap_Struct(klass, &xml_char_encoding_handler_type, handler);
35
+ }
15
36
 
16
37
  return Qnil;
17
38
  }
18
39
 
40
+
19
41
  /*
20
42
  * call-seq: Nokogiri::EncodingHandler.delete(name)
21
43
  *
22
44
  * Delete the encoding alias named +name+
23
45
  */
24
- static VALUE delete(VALUE klass, VALUE name)
46
+ static VALUE
47
+ rb_xml_encoding_handler_s_delete(VALUE klass, VALUE name)
25
48
  {
26
- if(xmlDelEncodingAlias(StringValueCStr(name))) return Qnil;
49
+ if (xmlDelEncodingAlias(StringValueCStr(name))) { return Qnil; }
27
50
 
28
51
  return Qtrue;
29
52
  }
30
53
 
54
+
31
55
  /*
32
- * call-seq: Nokogiri::EncodingHandler.alias(from, to)
56
+ * call-seq: Nokogiri::EncodingHandler.alias(real_name, alias_name)
33
57
  *
34
- * Alias encoding handler with name +from+ to name +to+
58
+ * Alias encoding handler with name +real_name+ to name +alias_name+
35
59
  */
36
- static VALUE alias(VALUE klass, VALUE from, VALUE to)
60
+ static VALUE
61
+ rb_xml_encoding_handler_s_alias(VALUE klass, VALUE from, VALUE to)
37
62
  {
38
63
  xmlAddEncodingAlias(StringValueCStr(from), StringValueCStr(to));
39
64
 
40
65
  return to;
41
66
  }
42
67
 
68
+
43
69
  /*
44
70
  * call-seq: Nokogiri::EncodingHandler.clear_aliases!
45
71
  *
46
72
  * Remove all encoding aliases.
47
73
  */
48
- static VALUE clear_aliases(VALUE klass)
74
+ static VALUE
75
+ rb_xml_encoding_handler_s_clear_aliases(VALUE klass)
49
76
  {
50
77
  xmlCleanupEncodingAliases();
51
78
 
52
79
  return klass;
53
80
  }
54
81
 
82
+
55
83
  /*
56
84
  * call-seq: name
57
85
  *
58
86
  * Get the name of this EncodingHandler
59
87
  */
60
- static VALUE name(VALUE self)
88
+ static VALUE
89
+ rb_xml_encoding_handler_name(VALUE self)
61
90
  {
62
91
  xmlCharEncodingHandlerPtr handler;
63
92
 
64
- Data_Get_Struct(self, xmlCharEncodingHandler, handler);
93
+ TypedData_Get_Struct(self, xmlCharEncodingHandler, &xml_char_encoding_handler_type, handler);
65
94
 
66
95
  return NOKOGIRI_STR_NEW2(handler->name);
67
96
  }
68
97
 
69
- void init_xml_encoding_handler()
98
+
99
+ void
100
+ noko_init_xml_encoding_handler(void)
70
101
  {
71
- VALUE nokogiri = rb_define_module("Nokogiri");
72
- VALUE klass = rb_define_class_under(nokogiri, "EncodingHandler", rb_cObject);
73
-
74
- rb_define_singleton_method(klass, "[]", get, 1);
75
- rb_define_singleton_method(klass, "delete", delete, 1);
76
- rb_define_singleton_method(klass, "alias", alias, 2);
77
- rb_define_singleton_method(klass, "clear_aliases!", clear_aliases, 0);
78
- rb_define_method(klass, "name", name, 0);
102
+ cNokogiriEncodingHandler = rb_define_class_under(mNokogiri, "EncodingHandler", rb_cObject);
103
+
104
+ rb_undef_alloc_func(cNokogiriEncodingHandler);
105
+
106
+ rb_define_singleton_method(cNokogiriEncodingHandler, "[]", rb_xml_encoding_handler_s_get, 1);
107
+ rb_define_singleton_method(cNokogiriEncodingHandler, "delete", rb_xml_encoding_handler_s_delete, 1);
108
+ rb_define_singleton_method(cNokogiriEncodingHandler, "alias", rb_xml_encoding_handler_s_alias, 2);
109
+ rb_define_singleton_method(cNokogiriEncodingHandler, "clear_aliases!", rb_xml_encoding_handler_s_clear_aliases, 0);
110
+
111
+ rb_define_method(cNokogiriEncodingHandler, "name", rb_xml_encoding_handler_name, 0);
79
112
  }
@@ -1,4 +1,6 @@
1
- #include <xml_entity_decl.h>
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriXmlEntityDecl;
2
4
 
3
5
  /*
4
6
  * call-seq:
@@ -6,12 +8,13 @@
6
8
  *
7
9
  * Get the original_content before ref substitution
8
10
  */
9
- static VALUE original_content(VALUE self)
11
+ static VALUE
12
+ original_content(VALUE self)
10
13
  {
11
14
  xmlEntityPtr node;
12
- Data_Get_Struct(self, xmlEntity, node);
15
+ Noko_Node_Get_Struct(self, xmlEntity, node);
13
16
 
14
- if(!node->orig) return Qnil;
17
+ if (!node->orig) { return Qnil; }
15
18
 
16
19
  return NOKOGIRI_STR_NEW2(node->orig);
17
20
  }
@@ -22,12 +25,13 @@ static VALUE original_content(VALUE self)
22
25
  *
23
26
  * Get the content
24
27
  */
25
- static VALUE get_content(VALUE self)
28
+ static VALUE
29
+ get_content(VALUE self)
26
30
  {
27
31
  xmlEntityPtr node;
28
- Data_Get_Struct(self, xmlEntity, node);
32
+ Noko_Node_Get_Struct(self, xmlEntity, node);
29
33
 
30
- if(!node->content) return Qnil;
34
+ if (!node->content) { return Qnil; }
31
35
 
32
36
  return NOKOGIRI_STR_NEW(node->content, node->length);
33
37
  }
@@ -38,10 +42,11 @@ static VALUE get_content(VALUE self)
38
42
  *
39
43
  * Get the entity type
40
44
  */
41
- static VALUE entity_type(VALUE self)
45
+ static VALUE
46
+ entity_type(VALUE self)
42
47
  {
43
48
  xmlEntityPtr node;
44
- Data_Get_Struct(self, xmlEntity, node);
49
+ Noko_Node_Get_Struct(self, xmlEntity, node);
45
50
 
46
51
  return INT2NUM((int)node->etype);
47
52
  }
@@ -52,12 +57,13 @@ static VALUE entity_type(VALUE self)
52
57
  *
53
58
  * Get the external identifier for PUBLIC
54
59
  */
55
- static VALUE external_id(VALUE self)
60
+ static VALUE
61
+ external_id(VALUE self)
56
62
  {
57
63
  xmlEntityPtr node;
58
- Data_Get_Struct(self, xmlEntity, node);
64
+ Noko_Node_Get_Struct(self, xmlEntity, node);
59
65
 
60
- if(!node->ExternalID) return Qnil;
66
+ if (!node->ExternalID) { return Qnil; }
61
67
 
62
68
  return NOKOGIRI_STR_NEW2(node->ExternalID);
63
69
  }
@@ -68,43 +74,39 @@ static VALUE external_id(VALUE self)
68
74
  *
69
75
  * Get the URI for a SYSTEM or PUBLIC Entity
70
76
  */
71
- static VALUE system_id(VALUE self)
77
+ static VALUE
78
+ system_id(VALUE self)
72
79
  {
73
80
  xmlEntityPtr node;
74
- Data_Get_Struct(self, xmlEntity, node);
81
+ Noko_Node_Get_Struct(self, xmlEntity, node);
75
82
 
76
- if(!node->SystemID) return Qnil;
83
+ if (!node->SystemID) { return Qnil; }
77
84
 
78
85
  return NOKOGIRI_STR_NEW2(node->SystemID);
79
86
  }
80
87
 
81
- VALUE cNokogiriXmlEntityDecl;
82
-
83
- void init_xml_entity_decl()
88
+ void
89
+ noko_init_xml_entity_decl(void)
84
90
  {
85
- VALUE nokogiri = rb_define_module("Nokogiri");
86
- VALUE xml = rb_define_module_under(nokogiri, "XML");
87
- VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
88
- VALUE klass = rb_define_class_under(xml, "EntityDecl", node);
89
-
90
- cNokogiriXmlEntityDecl = klass;
91
+ assert(cNokogiriXmlNode);
92
+ cNokogiriXmlEntityDecl = rb_define_class_under(mNokogiriXml, "EntityDecl", cNokogiriXmlNode);
91
93
 
92
- rb_define_method(klass, "original_content", original_content, 0);
93
- rb_define_method(klass, "content", get_content, 0);
94
- rb_define_method(klass, "entity_type", entity_type, 0);
95
- rb_define_method(klass, "external_id", external_id, 0);
96
- rb_define_method(klass, "system_id", system_id, 0);
94
+ rb_define_method(cNokogiriXmlEntityDecl, "original_content", original_content, 0);
95
+ rb_define_method(cNokogiriXmlEntityDecl, "content", get_content, 0);
96
+ rb_define_method(cNokogiriXmlEntityDecl, "entity_type", entity_type, 0);
97
+ rb_define_method(cNokogiriXmlEntityDecl, "external_id", external_id, 0);
98
+ rb_define_method(cNokogiriXmlEntityDecl, "system_id", system_id, 0);
97
99
 
98
100
  rb_const_set(cNokogiriXmlEntityDecl, rb_intern("INTERNAL_GENERAL"),
99
- INT2NUM(XML_INTERNAL_GENERAL_ENTITY));
101
+ INT2NUM(XML_INTERNAL_GENERAL_ENTITY));
100
102
  rb_const_set(cNokogiriXmlEntityDecl, rb_intern("EXTERNAL_GENERAL_PARSED"),
101
- INT2NUM(XML_EXTERNAL_GENERAL_PARSED_ENTITY));
103
+ INT2NUM(XML_EXTERNAL_GENERAL_PARSED_ENTITY));
102
104
  rb_const_set(cNokogiriXmlEntityDecl, rb_intern("EXTERNAL_GENERAL_UNPARSED"),
103
- INT2NUM(XML_EXTERNAL_GENERAL_UNPARSED_ENTITY));
105
+ INT2NUM(XML_EXTERNAL_GENERAL_UNPARSED_ENTITY));
104
106
  rb_const_set(cNokogiriXmlEntityDecl, rb_intern("INTERNAL_PARAMETER"),
105
- INT2NUM(XML_INTERNAL_PARAMETER_ENTITY));
107
+ INT2NUM(XML_INTERNAL_PARAMETER_ENTITY));
106
108
  rb_const_set(cNokogiriXmlEntityDecl, rb_intern("EXTERNAL_PARAMETER"),
107
- INT2NUM(XML_EXTERNAL_PARAMETER_ENTITY));
109
+ INT2NUM(XML_EXTERNAL_PARAMETER_ENTITY));
108
110
  rb_const_set(cNokogiriXmlEntityDecl, rb_intern("INTERNAL_PREDEFINED"),
109
- INT2NUM(XML_INTERNAL_PREDEFINED_ENTITY));
111
+ INT2NUM(XML_INTERNAL_PREDEFINED_ENTITY));
110
112
  }
@@ -1,4 +1,6 @@
1
- #include <xml_entity_reference.h>
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriXmlEntityReference;
2
4
 
3
5
  /*
4
6
  * call-seq:
@@ -6,7 +8,8 @@
6
8
  *
7
9
  * Create a new EntityReference element on the +document+ with +name+
8
10
  */
9
- static VALUE new(int argc, VALUE *argv, VALUE klass)
11
+ static VALUE
12
+ new (int argc, VALUE *argv, VALUE klass)
10
13
  {
11
14
  xmlDocPtr xml_doc;
12
15
  xmlNodePtr node;
@@ -17,36 +20,31 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
17
20
 
18
21
  rb_scan_args(argc, argv, "2*", &document, &name, &rest);
19
22
 
20
- Data_Get_Struct(document, xmlDoc, xml_doc);
23
+ xml_doc = noko_xml_document_unwrap(document);
21
24
 
22
25
  node = xmlNewReference(
23
- xml_doc,
24
- (const xmlChar *)StringValueCStr(name)
25
- );
26
+ xml_doc,
27
+ (const xmlChar *)StringValueCStr(name)
28
+ );
26
29
 
27
- nokogiri_root_node(node);
30
+ noko_xml_document_pin_node(node);
28
31
 
29
- rb_node = Nokogiri_wrap_xml_node(klass, node);
32
+ rb_node = noko_xml_node_wrap(klass, node);
30
33
  rb_obj_call_init(rb_node, argc, argv);
31
34
 
32
- if(rb_block_given_p()) rb_yield(rb_node);
35
+ if (rb_block_given_p()) { rb_yield(rb_node); }
33
36
 
34
37
  return rb_node;
35
38
  }
36
39
 
37
- VALUE cNokogiriXmlEntityReference;
38
- void init_xml_entity_reference()
40
+ void
41
+ noko_init_xml_entity_reference(void)
39
42
  {
40
- VALUE nokogiri = rb_define_module("Nokogiri");
41
- VALUE xml = rb_define_module_under(nokogiri, "XML");
42
- VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
43
-
43
+ assert(cNokogiriXmlNode);
44
44
  /*
45
45
  * EntityReference represents an EntityReference node in an xml document.
46
46
  */
47
- VALUE klass = rb_define_class_under(xml, "EntityReference", node);
48
-
49
- cNokogiriXmlEntityReference = klass;
47
+ cNokogiriXmlEntityReference = rb_define_class_under(mNokogiriXml, "EntityReference", cNokogiriXmlNode);
50
48
 
51
- rb_define_singleton_method(klass, "new", new, -1);
49
+ rb_define_singleton_method(cNokogiriXmlEntityReference, "new", new, -1);
52
50
  }
@@ -1,111 +1,181 @@
1
- #include <xml_namespace.h>
1
+ #include <nokogiri.h>
2
+
3
+ /*
4
+ * The lifecycle of a Namespace node is more complicated than other Nodes, for two reasons:
5
+ *
6
+ * 1. the underlying C structure has a different layout than all the other node structs, with the
7
+ * `_private` member where we store a pointer to Ruby object data not being in first position.
8
+ * 2. xmlNs structures returned in an xmlNodeset from an XPath query are copies of the document's
9
+ * namespaces, and so do not share the same memory lifecycle as everything else in a document.
10
+ *
11
+ * As a result of 1, you may see special handling of XML_NAMESPACE_DECL node types throughout the
12
+ * Nokogiri C code, though I intend to wrap up that logic in ruby_object_{get,set} functions
13
+ * shortly.
14
+ *
15
+ * As a result of 2, you will see we have special handling in this file and in xml_node_set.c to
16
+ * carefully manage the memory lifecycle of xmlNs structs to match the Ruby object's GC
17
+ * lifecycle. In xml_node_set.c we have local versions of xmlXPathNodeSetDel() and
18
+ * xmlXPathFreeNodeSet() that avoid freeing xmlNs structs in the node set. In this file, we decide
19
+ * whether or not to call dealloc_namespace() depending on whether the xmlNs struct appears to be
20
+ * in an xmlNodeSet (and thus the result of an XPath query) or not.
21
+ *
22
+ * Yes, this is madness.
23
+ */
2
24
 
3
25
  VALUE cNokogiriXmlNamespace ;
4
26
 
5
- static void dealloc_namespace(xmlNsPtr ns)
27
+ static void
28
+ _xml_namespace_dealloc(void *ptr)
6
29
  {
7
30
  /*
8
- *
9
31
  * this deallocator is only used for namespace nodes that are part of an xpath
10
- * node set.
11
- *
12
- * see Nokogiri_wrap_xml_namespace() for more details.
13
- *
32
+ * node set. see noko_xml_namespace_wrap().
14
33
  */
15
- NOKOGIRI_DEBUG_START(ns) ;
34
+ xmlNsPtr ns = ptr;
35
+
16
36
  if (ns->href) {
17
- xmlFree((xmlChar *)(uintptr_t)ns->href);
37
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->href));
18
38
  }
19
39
  if (ns->prefix) {
20
- xmlFree((xmlChar *)(uintptr_t)ns->prefix);
40
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->prefix));
21
41
  }
22
42
  xmlFree(ns);
23
- NOKOGIRI_DEBUG_END(ns) ;
24
43
  }
25
44
 
45
+ static void
46
+ _xml_namespace_update_references(void *ptr)
47
+ {
48
+ xmlNsPtr ns = ptr;
49
+ if (ns->_private) {
50
+ ns->_private = (void *)rb_gc_location((VALUE)ns->_private);
51
+ }
52
+ }
53
+
54
+ static const rb_data_type_t xml_ns_type_with_free = {
55
+ .wrap_struct_name = "xmlNs (with free)",
56
+ .function = {
57
+ .dfree = _xml_namespace_dealloc,
58
+ .dcompact = _xml_namespace_update_references,
59
+ },
60
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
61
+ };
62
+
63
+ static const rb_data_type_t xml_ns_type_without_free = {
64
+ .wrap_struct_name = "xmlNs (without free)",
65
+ .function = {
66
+ .dcompact = _xml_namespace_update_references,
67
+ },
68
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
69
+ };
26
70
 
27
71
  /*
28
- * call-seq:
29
- * prefix
72
+ * :call-seq:
73
+ * prefix() → String or nil
74
+ *
75
+ * Return the prefix for this Namespace, or +nil+ if there is no prefix (e.g., default namespace).
76
+ *
77
+ * *Example*
78
+ *
79
+ * doc = Nokogiri::XML.parse(<<~XML)
80
+ * <?xml version="1.0"?>
81
+ * <root xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
82
+ * <child1 foo="abc" noko:bar="def"/>
83
+ * <noko:child2 foo="qwe" noko:bar="rty"/>
84
+ * </root>
85
+ * XML
30
86
  *
31
- * Get the prefix for this namespace. Returns +nil+ if there is no prefix.
87
+ * doc.root.elements.first.namespace.prefix
88
+ * # => nil
89
+ *
90
+ * doc.root.elements.last.namespace.prefix
91
+ * # => "noko"
32
92
  */
33
- static VALUE prefix(VALUE self)
93
+ static VALUE
94
+ prefix(VALUE self)
34
95
  {
35
96
  xmlNsPtr ns;
36
97
 
37
- Data_Get_Struct(self, xmlNs, ns);
38
- if(!ns->prefix) return Qnil;
98
+ Noko_Namespace_Get_Struct(self, xmlNs, ns);
99
+ if (!ns->prefix) { return Qnil; }
39
100
 
40
101
  return NOKOGIRI_STR_NEW2(ns->prefix);
41
102
  }
42
103
 
43
104
  /*
44
- * call-seq:
45
- * href
105
+ * :call-seq:
106
+ * href() → String
107
+ *
108
+ * Returns the URI reference for this Namespace.
109
+ *
110
+ * *Example*
111
+ *
112
+ * doc = Nokogiri::XML.parse(<<~XML)
113
+ * <?xml version="1.0"?>
114
+ * <root xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
115
+ * <child1 foo="abc" noko:bar="def"/>
116
+ * <noko:child2 foo="qwe" noko:bar="rty"/>
117
+ * </root>
118
+ * XML
46
119
  *
47
- * Get the href for this namespace
120
+ * doc.root.elements.first.namespace.href
121
+ * # => "http://nokogiri.org/ns/default"
122
+ *
123
+ * doc.root.elements.last.namespace.href
124
+ * # => "http://nokogiri.org/ns/noko"
48
125
  */
49
- static VALUE href(VALUE self)
126
+ static VALUE
127
+ href(VALUE self)
50
128
  {
51
129
  xmlNsPtr ns;
52
130
 
53
- Data_Get_Struct(self, xmlNs, ns);
54
- if(!ns->href) return Qnil;
131
+ Noko_Namespace_Get_Struct(self, xmlNs, ns);
132
+ if (!ns->href) { return Qnil; }
55
133
 
56
134
  return NOKOGIRI_STR_NEW2(ns->href);
57
135
  }
58
136
 
59
- static int part_of_an_xpath_node_set_eh(xmlNsPtr node)
60
- {
61
- return (node->next && ! NOKOGIRI_NAMESPACE_EH(node->next));
62
- }
63
-
64
- VALUE Nokogiri_wrap_xml_namespace(xmlDocPtr doc, xmlNsPtr node)
137
+ VALUE
138
+ noko_xml_namespace_wrap(xmlNsPtr c_namespace, xmlDocPtr c_document)
65
139
  {
66
- VALUE ns = 0, document, node_cache;
140
+ VALUE rb_namespace;
67
141
 
68
- assert(doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE);
69
-
70
- if (node->_private) return (VALUE)node->_private;
71
-
72
- if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
142
+ if (c_namespace->_private) {
143
+ return (VALUE)c_namespace->_private;
144
+ }
73
145
 
74
- if (DOC_RUBY_OBJECT_TEST(doc)) {
75
- document = DOC_RUBY_OBJECT(doc);
146
+ if (c_document) {
147
+ rb_namespace = TypedData_Wrap_Struct(cNokogiriXmlNamespace,
148
+ &xml_ns_type_without_free,
149
+ c_namespace);
76
150
 
77
- if (part_of_an_xpath_node_set_eh(node)) {
78
- /*
79
- * this is a duplicate returned as part of an xpath query node set, and so
80
- * we need to make sure we manage this memory.
81
- *
82
- * see comments in xml_node_set.c for more details.
83
- */
84
- ns = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, dealloc_namespace, node);
85
- } else {
86
- ns = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, 0, node);
87
- node_cache = rb_iv_get(document, "@node_cache");
88
- rb_ary_push(node_cache, ns);
151
+ if (DOC_RUBY_OBJECT_TEST(c_document)) {
152
+ rb_iv_set(rb_namespace, "@document", DOC_RUBY_OBJECT(c_document));
153
+ rb_ary_push(DOC_NODE_CACHE(c_document), rb_namespace);
89
154
  }
90
-
91
- rb_iv_set(ns, "@document", document);
92
155
  } else {
93
- ns = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, 0, node);
156
+ rb_namespace = TypedData_Wrap_Struct(cNokogiriXmlNamespace,
157
+ &xml_ns_type_with_free,
158
+ c_namespace);
94
159
  }
95
160
 
96
- node->_private = (void *)ns;
161
+ c_namespace->_private = (void *)rb_namespace;
97
162
 
98
- return ns;
163
+ return rb_namespace;
164
+ }
165
+
166
+ VALUE
167
+ noko_xml_namespace_wrap_xpath_copy(xmlNsPtr c_namespace)
168
+ {
169
+ return noko_xml_namespace_wrap(c_namespace, NULL);
99
170
  }
100
171
 
101
- void init_xml_namespace()
172
+ void
173
+ noko_init_xml_namespace(void)
102
174
  {
103
- VALUE nokogiri = rb_define_module("Nokogiri");
104
- VALUE xml = rb_define_module_under(nokogiri, "XML");
105
- VALUE klass = rb_define_class_under(xml, "Namespace", rb_cObject);
175
+ cNokogiriXmlNamespace = rb_define_class_under(mNokogiriXml, "Namespace", rb_cObject);
106
176
 
107
- cNokogiriXmlNamespace = klass;
177
+ rb_undef_alloc_func(cNokogiriXmlNamespace);
108
178
 
109
- rb_define_method(klass, "prefix", prefix, 0);
110
- rb_define_method(klass, "href", href, 0);
179
+ rb_define_method(cNokogiriXmlNamespace, "prefix", prefix, 0);
180
+ rb_define_method(cNokogiriXmlNamespace, "href", href, 0);
111
181
  }