nokogiri 1.10.9 → 1.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (230) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +190 -95
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +909 -422
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  18. data/ext/nokogiri/nokogiri.c +258 -105
  19. data/ext/nokogiri/nokogiri.h +207 -90
  20. data/ext/nokogiri/test_global_handlers.c +40 -0
  21. data/ext/nokogiri/xml_attr.c +18 -18
  22. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  23. data/ext/nokogiri/xml_cdata.c +33 -33
  24. data/ext/nokogiri/xml_comment.c +19 -31
  25. data/ext/nokogiri/xml_document.c +499 -323
  26. data/ext/nokogiri/xml_document_fragment.c +17 -36
  27. data/ext/nokogiri/xml_dtd.c +65 -59
  28. data/ext/nokogiri/xml_element_content.c +63 -55
  29. data/ext/nokogiri/xml_element_decl.c +31 -31
  30. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  31. data/ext/nokogiri/xml_entity_decl.c +37 -35
  32. data/ext/nokogiri/xml_entity_reference.c +17 -19
  33. data/ext/nokogiri/xml_namespace.c +131 -61
  34. data/ext/nokogiri/xml_node.c +1429 -723
  35. data/ext/nokogiri/xml_node_set.c +257 -225
  36. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  37. data/ext/nokogiri/xml_reader.c +340 -231
  38. data/ext/nokogiri/xml_relax_ng.c +87 -99
  39. data/ext/nokogiri/xml_sax_parser.c +269 -176
  40. data/ext/nokogiri/xml_sax_parser_context.c +286 -152
  41. data/ext/nokogiri/xml_sax_push_parser.c +111 -64
  42. data/ext/nokogiri/xml_schema.c +132 -140
  43. data/ext/nokogiri/xml_syntax_error.c +52 -23
  44. data/ext/nokogiri/xml_text.c +37 -30
  45. data/ext/nokogiri/xml_xpath_context.c +373 -185
  46. data/ext/nokogiri/xslt_stylesheet.c +342 -191
  47. data/gumbo-parser/CHANGES.md +63 -0
  48. data/gumbo-parser/Makefile +129 -0
  49. data/gumbo-parser/THANKS +27 -0
  50. data/gumbo-parser/src/Makefile +34 -0
  51. data/gumbo-parser/src/README.md +41 -0
  52. data/gumbo-parser/src/ascii.c +75 -0
  53. data/gumbo-parser/src/ascii.h +115 -0
  54. data/gumbo-parser/src/attribute.c +42 -0
  55. data/gumbo-parser/src/attribute.h +17 -0
  56. data/gumbo-parser/src/char_ref.c +22225 -0
  57. data/gumbo-parser/src/char_ref.h +29 -0
  58. data/gumbo-parser/src/char_ref.rl +2154 -0
  59. data/gumbo-parser/src/error.c +658 -0
  60. data/gumbo-parser/src/error.h +152 -0
  61. data/gumbo-parser/src/foreign_attrs.c +103 -0
  62. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
  66. data/gumbo-parser/src/parser.c +4932 -0
  67. data/gumbo-parser/src/parser.h +41 -0
  68. data/gumbo-parser/src/replacement.h +33 -0
  69. data/gumbo-parser/src/string_buffer.c +103 -0
  70. data/gumbo-parser/src/string_buffer.h +68 -0
  71. data/gumbo-parser/src/string_piece.c +48 -0
  72. data/gumbo-parser/src/svg_attrs.c +174 -0
  73. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  74. data/gumbo-parser/src/svg_tags.c +137 -0
  75. data/gumbo-parser/src/svg_tags.gperf +55 -0
  76. data/gumbo-parser/src/tag.c +223 -0
  77. data/gumbo-parser/src/tag_lookup.c +382 -0
  78. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  79. data/gumbo-parser/src/tag_lookup.h +13 -0
  80. data/gumbo-parser/src/token_buffer.c +79 -0
  81. data/gumbo-parser/src/token_buffer.h +71 -0
  82. data/gumbo-parser/src/token_type.h +17 -0
  83. data/gumbo-parser/src/tokenizer.c +3464 -0
  84. data/gumbo-parser/src/tokenizer.h +112 -0
  85. data/gumbo-parser/src/tokenizer_states.h +339 -0
  86. data/gumbo-parser/src/utf8.c +245 -0
  87. data/gumbo-parser/src/utf8.h +164 -0
  88. data/gumbo-parser/src/util.c +66 -0
  89. data/gumbo-parser/src/util.h +34 -0
  90. data/gumbo-parser/src/vector.c +111 -0
  91. data/gumbo-parser/src/vector.h +45 -0
  92. data/lib/nokogiri/class_resolver.rb +67 -0
  93. data/lib/nokogiri/css/node.rb +14 -8
  94. data/lib/nokogiri/css/parser.rb +399 -377
  95. data/lib/nokogiri/css/parser.y +250 -245
  96. data/lib/nokogiri/css/parser_extras.rb +16 -71
  97. data/lib/nokogiri/css/selector_cache.rb +38 -0
  98. data/lib/nokogiri/css/syntax_error.rb +3 -1
  99. data/lib/nokogiri/css/tokenizer.rb +7 -5
  100. data/lib/nokogiri/css/tokenizer.rex +11 -9
  101. data/lib/nokogiri/css/xpath_visitor.rb +242 -96
  102. data/lib/nokogiri/css.rb +122 -17
  103. data/lib/nokogiri/decorators/slop.rb +11 -11
  104. data/lib/nokogiri/encoding_handler.rb +57 -0
  105. data/lib/nokogiri/extension.rb +32 -0
  106. data/lib/nokogiri/gumbo.rb +15 -0
  107. data/lib/nokogiri/html.rb +38 -27
  108. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  109. data/lib/nokogiri/html4/document.rb +235 -0
  110. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  111. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  112. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  113. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  114. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  115. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  116. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  117. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  118. data/lib/nokogiri/html4.rb +42 -0
  119. data/lib/nokogiri/html5/builder.rb +40 -0
  120. data/lib/nokogiri/html5/document.rb +199 -0
  121. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  122. data/lib/nokogiri/html5/node.rb +103 -0
  123. data/lib/nokogiri/html5.rb +368 -0
  124. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  125. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  126. data/lib/nokogiri/syntax_error.rb +2 -0
  127. data/lib/nokogiri/version/constant.rb +6 -0
  128. data/lib/nokogiri/version/info.rb +224 -0
  129. data/lib/nokogiri/version.rb +3 -108
  130. data/lib/nokogiri/xml/attr.rb +55 -3
  131. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  132. data/lib/nokogiri/xml/builder.rb +83 -35
  133. data/lib/nokogiri/xml/cdata.rb +3 -1
  134. data/lib/nokogiri/xml/character_data.rb +2 -0
  135. data/lib/nokogiri/xml/document.rb +359 -130
  136. data/lib/nokogiri/xml/document_fragment.rb +170 -54
  137. data/lib/nokogiri/xml/dtd.rb +4 -2
  138. data/lib/nokogiri/xml/element_content.rb +12 -2
  139. data/lib/nokogiri/xml/element_decl.rb +6 -2
  140. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  141. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  142. data/lib/nokogiri/xml/namespace.rb +44 -0
  143. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  144. data/lib/nokogiri/xml/node.rb +1168 -420
  145. data/lib/nokogiri/xml/node_set.rb +145 -67
  146. data/lib/nokogiri/xml/notation.rb +13 -0
  147. data/lib/nokogiri/xml/parse_options.rb +145 -52
  148. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  149. data/lib/nokogiri/xml/pp/node.rb +47 -30
  150. data/lib/nokogiri/xml/pp.rb +4 -2
  151. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  152. data/lib/nokogiri/xml/reader.rb +68 -41
  153. data/lib/nokogiri/xml/relax_ng.rb +60 -17
  154. data/lib/nokogiri/xml/sax/document.rb +198 -111
  155. data/lib/nokogiri/xml/sax/parser.rb +144 -67
  156. data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
  157. data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
  158. data/lib/nokogiri/xml/sax.rb +54 -4
  159. data/lib/nokogiri/xml/schema.rb +116 -39
  160. data/lib/nokogiri/xml/searchable.rb +139 -95
  161. data/lib/nokogiri/xml/syntax_error.rb +29 -5
  162. data/lib/nokogiri/xml/text.rb +2 -0
  163. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  164. data/lib/nokogiri/xml/xpath.rb +15 -4
  165. data/lib/nokogiri/xml/xpath_context.rb +15 -4
  166. data/lib/nokogiri/xml.rb +45 -55
  167. data/lib/nokogiri/xslt/stylesheet.rb +32 -8
  168. data/lib/nokogiri/xslt.rb +103 -30
  169. data/lib/nokogiri.rb +59 -75
  170. data/lib/xsd/xmlparser/nokogiri.rb +32 -29
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  175. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  176. data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
  177. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  178. metadata +123 -295
  179. data/ext/nokogiri/html_document.c +0 -170
  180. data/ext/nokogiri/html_document.h +0 -10
  181. data/ext/nokogiri/html_element_description.c +0 -279
  182. data/ext/nokogiri/html_element_description.h +0 -10
  183. data/ext/nokogiri/html_entity_lookup.c +0 -32
  184. data/ext/nokogiri/html_entity_lookup.h +0 -8
  185. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  186. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  187. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  188. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  189. data/ext/nokogiri/xml_attr.h +0 -9
  190. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  191. data/ext/nokogiri/xml_cdata.h +0 -9
  192. data/ext/nokogiri/xml_comment.h +0 -9
  193. data/ext/nokogiri/xml_document.h +0 -23
  194. data/ext/nokogiri/xml_document_fragment.h +0 -10
  195. data/ext/nokogiri/xml_dtd.h +0 -10
  196. data/ext/nokogiri/xml_element_content.h +0 -10
  197. data/ext/nokogiri/xml_element_decl.h +0 -9
  198. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  199. data/ext/nokogiri/xml_entity_decl.h +0 -10
  200. data/ext/nokogiri/xml_entity_reference.h +0 -9
  201. data/ext/nokogiri/xml_io.c +0 -61
  202. data/ext/nokogiri/xml_io.h +0 -11
  203. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  204. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  205. data/ext/nokogiri/xml_namespace.h +0 -14
  206. data/ext/nokogiri/xml_node.h +0 -13
  207. data/ext/nokogiri/xml_node_set.h +0 -12
  208. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  209. data/ext/nokogiri/xml_reader.h +0 -10
  210. data/ext/nokogiri/xml_relax_ng.h +0 -9
  211. data/ext/nokogiri/xml_sax_parser.h +0 -39
  212. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  213. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  214. data/ext/nokogiri/xml_schema.h +0 -9
  215. data/ext/nokogiri/xml_syntax_error.h +0 -13
  216. data/ext/nokogiri/xml_text.h +0 -9
  217. data/ext/nokogiri/xml_xpath_context.h +0 -10
  218. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  219. data/lib/nokogiri/html/document.rb +0 -335
  220. data/lib/nokogiri/html/document_fragment.rb +0 -49
  221. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  222. data/lib/nokogiri/html/sax/parser.rb +0 -62
  223. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  224. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  225. data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
  226. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  227. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  228. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  229. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  230. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,13 +1,30 @@
1
- #include <xml_reader.h>
1
+ #include <nokogiri.h>
2
2
 
3
- static void dealloc(xmlTextReaderPtr reader)
3
+ VALUE cNokogiriXmlReader;
4
+
5
+ static void
6
+ xml_reader_deallocate(void *data)
4
7
  {
5
- NOKOGIRI_DEBUG_START(reader);
8
+ // free the document separately because we _may_ have triggered preservation by calling
9
+ // xmlTextReaderCurrentDoc during a read_more.
10
+ xmlTextReaderPtr reader = data;
11
+ xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
6
12
  xmlFreeTextReader(reader);
7
- NOKOGIRI_DEBUG_END(reader);
13
+ if (doc) {
14
+ xmlFreeDoc(doc);
15
+ }
8
16
  }
9
17
 
10
- static int has_attributes(xmlTextReaderPtr reader)
18
+ static const rb_data_type_t xml_text_reader_type = {
19
+ .wrap_struct_name = "xmlTextReader",
20
+ .function = {
21
+ .dfree = xml_reader_deallocate,
22
+ },
23
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
24
+ };
25
+
26
+ static int
27
+ has_attributes(xmlTextReaderPtr reader)
11
28
  {
12
29
  /*
13
30
  * this implementation of xmlTextReaderHasAttributes explicitly includes
@@ -16,47 +33,40 @@ static int has_attributes(xmlTextReaderPtr reader)
16
33
  */
17
34
  xmlNodePtr node ;
18
35
  node = xmlTextReaderCurrentNode(reader);
19
- if (node == NULL)
20
- return(0);
36
+ if (node == NULL) {
37
+ return (0);
38
+ }
21
39
 
22
40
  if ((node->type == XML_ELEMENT_NODE) &&
23
- ((node->properties != NULL) || (node->nsDef != NULL)))
24
- return(1);
25
- return(0);
41
+ ((node->properties != NULL) || (node->nsDef != NULL))) {
42
+ return (1);
43
+ }
44
+ return (0);
26
45
  }
27
46
 
28
- static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
47
+ // TODO: merge this function into the `namespaces` method implementation
48
+ static void
49
+ Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
29
50
  {
30
51
  xmlNsPtr ns;
31
- static char buffer[XMLNS_BUFFER_LEN] ;
32
- char *key ;
33
- size_t keylen ;
52
+ VALUE key;
34
53
 
35
- if (node->type != XML_ELEMENT_NODE) return ;
54
+ if (node->type != XML_ELEMENT_NODE) { return ; }
36
55
 
37
56
  ns = node->nsDef;
38
57
  while (ns != NULL) {
39
58
 
40
- keylen = XMLNS_PREFIX_LEN + (ns->prefix ? (strlen((const char*)ns->prefix) + 1) : 0) ;
41
- if (keylen > XMLNS_BUFFER_LEN) {
42
- key = (char*)malloc(keylen) ;
43
- } else {
44
- key = buffer ;
45
- }
46
-
59
+ key = rb_enc_str_new_cstr(XMLNS_PREFIX, rb_utf8_encoding());
47
60
  if (ns->prefix) {
48
- sprintf(key, "%s:%s", XMLNS_PREFIX, ns->prefix);
49
- } else {
50
- sprintf(key, "%s", XMLNS_PREFIX);
61
+ rb_str_cat_cstr(key, ":");
62
+ rb_str_cat_cstr(key, (const char *)ns->prefix);
51
63
  }
52
64
 
65
+ key = rb_str_conv_enc(key, rb_utf8_encoding(), rb_default_internal_encoding());
53
66
  rb_hash_aset(attr_hash,
54
- NOKOGIRI_STR_NEW2(key),
55
- (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
56
- );
57
- if (key != buffer) {
58
- free(key);
59
- }
67
+ key,
68
+ (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
69
+ );
60
70
  ns = ns->next ;
61
71
  }
62
72
  }
@@ -68,15 +78,16 @@ static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
68
78
  *
69
79
  * Was an attribute generated from the default value in the DTD or schema?
70
80
  */
71
- static VALUE default_eh(VALUE self)
81
+ static VALUE
82
+ default_eh(VALUE self)
72
83
  {
73
84
  xmlTextReaderPtr reader;
74
85
  int eh;
75
86
 
76
- Data_Get_Struct(self, xmlTextReader, reader);
87
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
77
88
  eh = xmlTextReaderIsDefault(reader);
78
- if(eh == 0) return Qfalse;
79
- if(eh == 1) return Qtrue;
89
+ if (eh == 0) { return Qfalse; }
90
+ if (eh == 1) { return Qtrue; }
80
91
 
81
92
  return Qnil;
82
93
  }
@@ -87,15 +98,16 @@ static VALUE default_eh(VALUE self)
87
98
  *
88
99
  * Does this node have a text value?
89
100
  */
90
- static VALUE value_eh(VALUE self)
101
+ static VALUE
102
+ value_eh(VALUE self)
91
103
  {
92
104
  xmlTextReaderPtr reader;
93
105
  int eh;
94
106
 
95
- Data_Get_Struct(self, xmlTextReader, reader);
107
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
96
108
  eh = xmlTextReaderHasValue(reader);
97
- if(eh == 0) return Qfalse;
98
- if(eh == 1) return Qtrue;
109
+ if (eh == 0) { return Qfalse; }
110
+ if (eh == 1) { return Qtrue; }
99
111
 
100
112
  return Qnil;
101
113
  }
@@ -106,15 +118,16 @@ static VALUE value_eh(VALUE self)
106
118
  *
107
119
  * Does this node have attributes?
108
120
  */
109
- static VALUE attributes_eh(VALUE self)
121
+ static VALUE
122
+ attributes_eh(VALUE self)
110
123
  {
111
124
  xmlTextReaderPtr reader;
112
125
  int eh;
113
126
 
114
- Data_Get_Struct(self, xmlTextReader, reader);
127
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
115
128
  eh = has_attributes(reader);
116
- if(eh == 0) return Qfalse;
117
- if(eh == 1) return Qtrue;
129
+ if (eh == 0) { return Qfalse; }
130
+ if (eh == 1) { return Qtrue; }
118
131
 
119
132
  return Qnil;
120
133
  }
@@ -125,52 +138,94 @@ static VALUE attributes_eh(VALUE self)
125
138
  *
126
139
  * Get a hash of namespaces for this Node
127
140
  */
128
- static VALUE namespaces(VALUE self)
141
+ static VALUE
142
+ rb_xml_reader_namespaces(VALUE rb_reader)
129
143
  {
130
- xmlTextReaderPtr reader;
131
- xmlNodePtr ptr;
132
- VALUE attr ;
144
+ VALUE rb_namespaces = rb_hash_new() ;
145
+ xmlTextReaderPtr c_reader;
146
+ xmlNodePtr c_node;
147
+ VALUE rb_errors;
133
148
 
134
- Data_Get_Struct(self, xmlTextReader, reader);
149
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
135
150
 
136
- attr = rb_hash_new() ;
151
+ if (! has_attributes(c_reader)) {
152
+ return rb_namespaces ;
153
+ }
137
154
 
138
- if (! has_attributes(reader))
139
- return attr ;
155
+ rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
140
156
 
141
- ptr = xmlTextReaderExpand(reader);
142
- if(ptr == NULL) return Qnil;
157
+ xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
158
+ c_node = xmlTextReaderExpand(c_reader);
159
+ xmlSetStructuredErrorFunc(NULL, NULL);
143
160
 
144
- Nokogiri_xml_node_namespaces(ptr, attr);
161
+ if (c_node == NULL) {
162
+ if (RARRAY_LEN(rb_errors) > 0) {
163
+ VALUE rb_error = rb_ary_entry(rb_errors, 0);
164
+ VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
165
+ rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
166
+ }
167
+ return Qnil;
168
+ }
169
+
170
+ Nokogiri_xml_node_namespaces(c_node, rb_namespaces);
145
171
 
146
- return attr ;
172
+ return rb_namespaces ;
147
173
  }
148
174
 
149
175
  /*
150
- * call-seq:
151
- * attribute_nodes
152
- *
153
- * Get a list of attributes for this Node
176
+ :call-seq: attribute_hash() → Hash<String ⇒ String>
177
+
178
+ Get the attributes of the current node as a Hash of names and values.
179
+
180
+ See related: #attributes and #namespaces
154
181
  */
155
- static VALUE attribute_nodes(VALUE self)
182
+ static VALUE
183
+ rb_xml_reader_attribute_hash(VALUE rb_reader)
156
184
  {
157
- xmlTextReaderPtr reader;
158
- xmlNodePtr ptr;
159
- VALUE attr ;
185
+ VALUE rb_attributes = rb_hash_new();
186
+ xmlTextReaderPtr c_reader;
187
+ xmlNodePtr c_node;
188
+ xmlAttrPtr c_property;
189
+ VALUE rb_errors;
160
190
 
161
- Data_Get_Struct(self, xmlTextReader, reader);
191
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
162
192
 
163
- attr = rb_ary_new() ;
193
+ if (!has_attributes(c_reader)) {
194
+ return rb_attributes;
195
+ }
164
196
 
165
- if (! has_attributes(reader))
166
- return attr ;
197
+ rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
167
198
 
168
- ptr = xmlTextReaderExpand(reader);
169
- if(ptr == NULL) return Qnil;
199
+ xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
200
+ c_node = xmlTextReaderExpand(c_reader);
201
+ xmlSetStructuredErrorFunc(NULL, NULL);
170
202
 
171
- Nokogiri_xml_node_properties(ptr, attr);
203
+ if (c_node == NULL) {
204
+ if (RARRAY_LEN(rb_errors) > 0) {
205
+ VALUE rb_error = rb_ary_entry(rb_errors, 0);
206
+ VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
207
+ rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
208
+ }
209
+ return Qnil;
210
+ }
211
+
212
+ c_property = c_node->properties;
213
+ while (c_property != NULL) {
214
+ VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name);
215
+ VALUE rb_value = Qnil;
216
+ xmlChar *c_value = xmlNodeGetContent((xmlNode *)c_property);
217
+
218
+ if (c_value) {
219
+ rb_value = NOKOGIRI_STR_NEW2(c_value);
220
+ xmlFree(c_value);
221
+ }
172
222
 
173
- return attr ;
223
+ rb_hash_aset(rb_attributes, rb_name, rb_value);
224
+
225
+ c_property = c_property->next;
226
+ }
227
+
228
+ return rb_attributes;
174
229
  }
175
230
 
176
231
  /*
@@ -179,22 +234,23 @@ static VALUE attribute_nodes(VALUE self)
179
234
  *
180
235
  * Get the value of attribute at +index+
181
236
  */
182
- static VALUE attribute_at(VALUE self, VALUE index)
237
+ static VALUE
238
+ attribute_at(VALUE self, VALUE index)
183
239
  {
184
240
  xmlTextReaderPtr reader;
185
241
  xmlChar *value;
186
242
  VALUE rb_value;
187
243
 
188
- Data_Get_Struct(self, xmlTextReader, reader);
244
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
189
245
 
190
- if(NIL_P(index)) return Qnil;
246
+ if (NIL_P(index)) { return Qnil; }
191
247
  index = rb_Integer(index);
192
248
 
193
249
  value = xmlTextReaderGetAttributeNo(
194
- reader,
195
- (int)NUM2INT(index)
196
- );
197
- if(value == NULL) return Qnil;
250
+ reader,
251
+ (int)NUM2INT(index)
252
+ );
253
+ if (value == NULL) { return Qnil; }
198
254
 
199
255
  rb_value = NOKOGIRI_STR_NEW2(value);
200
256
  xmlFree(value);
@@ -207,19 +263,20 @@ static VALUE attribute_at(VALUE self, VALUE index)
207
263
  *
208
264
  * Get the value of attribute named +name+
209
265
  */
210
- static VALUE reader_attribute(VALUE self, VALUE name)
266
+ static VALUE
267
+ reader_attribute(VALUE self, VALUE name)
211
268
  {
212
269
  xmlTextReaderPtr reader;
213
270
  xmlChar *value ;
214
271
  VALUE rb_value;
215
272
 
216
- Data_Get_Struct(self, xmlTextReader, reader);
273
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
217
274
 
218
- if(NIL_P(name)) return Qnil;
275
+ if (NIL_P(name)) { return Qnil; }
219
276
  name = StringValue(name) ;
220
277
 
221
- value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValueCStr(name));
222
- if(value == NULL) return Qnil;
278
+ value = xmlTextReaderGetAttribute(reader, (xmlChar *)StringValueCStr(name));
279
+ if (value == NULL) { return Qnil; }
223
280
 
224
281
  rb_value = NOKOGIRI_STR_NEW2(value);
225
282
  xmlFree(value);
@@ -232,16 +289,17 @@ static VALUE reader_attribute(VALUE self, VALUE name)
232
289
  *
233
290
  * Get the number of attributes for the current node
234
291
  */
235
- static VALUE attribute_count(VALUE self)
292
+ static VALUE
293
+ attribute_count(VALUE self)
236
294
  {
237
295
  xmlTextReaderPtr reader;
238
296
  int count;
239
297
 
240
- Data_Get_Struct(self, xmlTextReader, reader);
298
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
241
299
  count = xmlTextReaderAttributeCount(reader);
242
- if(count == -1) return Qnil;
300
+ if (count == -1) { return Qnil; }
243
301
 
244
- return INT2NUM((long)count);
302
+ return INT2NUM(count);
245
303
  }
246
304
 
247
305
  /*
@@ -250,16 +308,17 @@ static VALUE attribute_count(VALUE self)
250
308
  *
251
309
  * Get the depth of the node
252
310
  */
253
- static VALUE depth(VALUE self)
311
+ static VALUE
312
+ depth(VALUE self)
254
313
  {
255
314
  xmlTextReaderPtr reader;
256
315
  int depth;
257
316
 
258
- Data_Get_Struct(self, xmlTextReader, reader);
317
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
259
318
  depth = xmlTextReaderDepth(reader);
260
- if(depth == -1) return Qnil;
319
+ if (depth == -1) { return Qnil; }
261
320
 
262
- return INT2NUM((long)depth);
321
+ return INT2NUM(depth);
263
322
  }
264
323
 
265
324
  /*
@@ -268,14 +327,15 @@ static VALUE depth(VALUE self)
268
327
  *
269
328
  * Get the XML version of the document being read
270
329
  */
271
- static VALUE xml_version(VALUE self)
330
+ static VALUE
331
+ xml_version(VALUE self)
272
332
  {
273
333
  xmlTextReaderPtr reader;
274
334
  const char *version;
275
335
 
276
- Data_Get_Struct(self, xmlTextReader, reader);
336
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
277
337
  version = (const char *)xmlTextReaderConstXmlVersion(reader);
278
- if(version == NULL) return Qnil;
338
+ if (version == NULL) { return Qnil; }
279
339
 
280
340
  return NOKOGIRI_STR_NEW2(version);
281
341
  }
@@ -286,14 +346,15 @@ static VALUE xml_version(VALUE self)
286
346
  *
287
347
  * Get the xml:lang scope within which the node resides.
288
348
  */
289
- static VALUE lang(VALUE self)
349
+ static VALUE
350
+ lang(VALUE self)
290
351
  {
291
352
  xmlTextReaderPtr reader;
292
353
  const char *lang;
293
354
 
294
- Data_Get_Struct(self, xmlTextReader, reader);
355
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
295
356
  lang = (const char *)xmlTextReaderConstXmlLang(reader);
296
- if(lang == NULL) return Qnil;
357
+ if (lang == NULL) { return Qnil; }
297
358
 
298
359
  return NOKOGIRI_STR_NEW2(lang);
299
360
  }
@@ -304,14 +365,15 @@ static VALUE lang(VALUE self)
304
365
  *
305
366
  * Get the text value of the node if present. Returns a utf-8 encoded string.
306
367
  */
307
- static VALUE value(VALUE self)
368
+ static VALUE
369
+ value(VALUE self)
308
370
  {
309
371
  xmlTextReaderPtr reader;
310
372
  const char *value;
311
373
 
312
- Data_Get_Struct(self, xmlTextReader, reader);
374
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
313
375
  value = (const char *)xmlTextReaderConstValue(reader);
314
- if(value == NULL) return Qnil;
376
+ if (value == NULL) { return Qnil; }
315
377
 
316
378
  return NOKOGIRI_STR_NEW2(value);
317
379
  }
@@ -322,14 +384,15 @@ static VALUE value(VALUE self)
322
384
  *
323
385
  * Get the shorthand reference to the namespace associated with the node.
324
386
  */
325
- static VALUE prefix(VALUE self)
387
+ static VALUE
388
+ prefix(VALUE self)
326
389
  {
327
390
  xmlTextReaderPtr reader;
328
391
  const char *prefix;
329
392
 
330
- Data_Get_Struct(self, xmlTextReader, reader);
393
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
331
394
  prefix = (const char *)xmlTextReaderConstPrefix(reader);
332
- if(prefix == NULL) return Qnil;
395
+ if (prefix == NULL) { return Qnil; }
333
396
 
334
397
  return NOKOGIRI_STR_NEW2(prefix);
335
398
  }
@@ -340,14 +403,15 @@ static VALUE prefix(VALUE self)
340
403
  *
341
404
  * Get the URI defining the namespace associated with the node
342
405
  */
343
- static VALUE namespace_uri(VALUE self)
406
+ static VALUE
407
+ namespace_uri(VALUE self)
344
408
  {
345
409
  xmlTextReaderPtr reader;
346
410
  const char *uri;
347
411
 
348
- Data_Get_Struct(self, xmlTextReader, reader);
412
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
349
413
  uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
350
- if(uri == NULL) return Qnil;
414
+ if (uri == NULL) { return Qnil; }
351
415
 
352
416
  return NOKOGIRI_STR_NEW2(uri);
353
417
  }
@@ -358,14 +422,15 @@ static VALUE namespace_uri(VALUE self)
358
422
  *
359
423
  * Get the local name of the node
360
424
  */
361
- static VALUE local_name(VALUE self)
425
+ static VALUE
426
+ local_name(VALUE self)
362
427
  {
363
428
  xmlTextReaderPtr reader;
364
429
  const char *name;
365
430
 
366
- Data_Get_Struct(self, xmlTextReader, reader);
431
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
367
432
  name = (const char *)xmlTextReaderConstLocalName(reader);
368
- if(name == NULL) return Qnil;
433
+ if (name == NULL) { return Qnil; }
369
434
 
370
435
  return NOKOGIRI_STR_NEW2(name);
371
436
  }
@@ -376,14 +441,15 @@ static VALUE local_name(VALUE self)
376
441
  *
377
442
  * Get the name of the node. Returns a utf-8 encoded string.
378
443
  */
379
- static VALUE name(VALUE self)
444
+ static VALUE
445
+ name(VALUE self)
380
446
  {
381
447
  xmlTextReaderPtr reader;
382
448
  const char *name;
383
449
 
384
- Data_Get_Struct(self, xmlTextReader, reader);
450
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
385
451
  name = (const char *)xmlTextReaderConstName(reader);
386
- if(name == NULL) return Qnil;
452
+ if (name == NULL) { return Qnil; }
387
453
 
388
454
  return NOKOGIRI_STR_NEW2(name);
389
455
  }
@@ -394,16 +460,24 @@ static VALUE name(VALUE self)
394
460
  *
395
461
  * Get the xml:base of the node
396
462
  */
397
- static VALUE base_uri(VALUE self)
463
+ static VALUE
464
+ rb_xml_reader_base_uri(VALUE rb_reader)
398
465
  {
399
- xmlTextReaderPtr reader;
400
- const char * base_uri;
466
+ VALUE rb_base_uri;
467
+ xmlTextReaderPtr c_reader;
468
+ xmlChar *c_base_uri;
469
+
470
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
471
+
472
+ c_base_uri = xmlTextReaderBaseUri(c_reader);
473
+ if (c_base_uri == NULL) {
474
+ return Qnil;
475
+ }
401
476
 
402
- Data_Get_Struct(self, xmlTextReader, reader);
403
- base_uri = (const char *)xmlTextReaderBaseUri(reader);
404
- if (base_uri == NULL) return Qnil;
477
+ rb_base_uri = NOKOGIRI_STR_NEW2(c_base_uri);
478
+ xmlFree(c_base_uri);
405
479
 
406
- return NOKOGIRI_STR_NEW2(base_uri);
480
+ return rb_base_uri;
407
481
  }
408
482
 
409
483
  /*
@@ -412,11 +486,12 @@ static VALUE base_uri(VALUE self)
412
486
  *
413
487
  * Get the state of the reader
414
488
  */
415
- static VALUE state(VALUE self)
489
+ static VALUE
490
+ state(VALUE self)
416
491
  {
417
492
  xmlTextReaderPtr reader;
418
- Data_Get_Struct(self, xmlTextReader, reader);
419
- return INT2NUM((long)xmlTextReaderReadState(reader));
493
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
494
+ return INT2NUM(xmlTextReaderReadState(reader));
420
495
  }
421
496
 
422
497
  /*
@@ -425,11 +500,12 @@ static VALUE state(VALUE self)
425
500
  *
426
501
  * Get the type of readers current node
427
502
  */
428
- static VALUE node_type(VALUE self)
503
+ static VALUE
504
+ node_type(VALUE self)
429
505
  {
430
506
  xmlTextReaderPtr reader;
431
- Data_Get_Struct(self, xmlTextReader, reader);
432
- return INT2NUM((long)xmlTextReaderNodeType(reader));
507
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
508
+ return INT2NUM(xmlTextReaderNodeType(reader));
433
509
  }
434
510
 
435
511
  /*
@@ -438,31 +514,42 @@ static VALUE node_type(VALUE self)
438
514
  *
439
515
  * Move the Reader forward through the XML document.
440
516
  */
441
- static VALUE read_more(VALUE self)
517
+ static VALUE
518
+ read_more(VALUE rb_reader)
442
519
  {
443
- xmlTextReaderPtr reader;
444
- xmlErrorPtr error;
445
- VALUE error_list;
446
- int ret;
520
+ xmlTextReaderPtr c_reader;
521
+ libxmlStructuredErrorHandlerState handler_state;
447
522
 
448
- Data_Get_Struct(self, xmlTextReader, reader);
523
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
449
524
 
450
- error_list = rb_funcall(self, rb_intern("errors"), 0);
525
+ VALUE rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
526
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
451
527
 
452
- xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
453
- ret = xmlTextReaderRead(reader);
454
- xmlSetStructuredErrorFunc(NULL, NULL);
528
+ int status = xmlTextReaderRead(c_reader);
455
529
 
456
- if(ret == 1) return self;
457
- if(ret == 0) return Qnil;
530
+ noko__structured_error_func_restore(&handler_state);
458
531
 
459
- error = xmlGetLastError();
460
- if(error)
461
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
462
- else
463
- rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);
532
+ xmlDocPtr c_document = xmlTextReaderCurrentDoc(c_reader);
533
+ if (c_document && c_document->encoding == NULL) {
534
+ VALUE constructor_encoding = rb_iv_get(rb_reader, "@encoding");
535
+ if (RTEST(constructor_encoding)) {
536
+ c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
537
+ } else {
538
+ rb_iv_set(rb_reader, "@encoding", NOKOGIRI_STR_NEW2("UTF-8"));
539
+ c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
540
+ }
541
+ }
464
542
 
465
- return Qnil;
543
+ if (status == 1) { return rb_reader; }
544
+ if (status == 0) { return Qnil; }
545
+
546
+ /* if we're here, there was an error */
547
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
548
+ if (RB_TEST(exception)) {
549
+ rb_exc_raise(exception);
550
+ } else {
551
+ rb_raise(rb_eRuntimeError, "Error pulling: %d", status);
552
+ }
466
553
  }
467
554
 
468
555
  /*
@@ -472,19 +559,20 @@ static VALUE read_more(VALUE self)
472
559
  * Read the contents of the current node, including child nodes and markup.
473
560
  * Returns a utf-8 encoded string.
474
561
  */
475
- static VALUE inner_xml(VALUE self)
562
+ static VALUE
563
+ inner_xml(VALUE self)
476
564
  {
477
565
  xmlTextReaderPtr reader;
478
- xmlChar* value;
566
+ xmlChar *value;
479
567
  VALUE str;
480
568
 
481
- Data_Get_Struct(self, xmlTextReader, reader);
569
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
482
570
 
483
571
  value = xmlTextReaderReadInnerXml(reader);
484
572
 
485
573
  str = Qnil;
486
- if(value) {
487
- str = NOKOGIRI_STR_NEW2((char*)value);
574
+ if (value) {
575
+ str = NOKOGIRI_STR_NEW2((char *)value);
488
576
  xmlFree(value);
489
577
  }
490
578
 
@@ -498,18 +586,19 @@ static VALUE inner_xml(VALUE self)
498
586
  * Read the current node and its contents, including child nodes and markup.
499
587
  * Returns a utf-8 encoded string.
500
588
  */
501
- static VALUE outer_xml(VALUE self)
589
+ static VALUE
590
+ outer_xml(VALUE self)
502
591
  {
503
592
  xmlTextReaderPtr reader;
504
593
  xmlChar *value;
505
594
  VALUE str = Qnil;
506
595
 
507
- Data_Get_Struct(self, xmlTextReader, reader);
596
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
508
597
 
509
598
  value = xmlTextReaderReadOuterXml(reader);
510
599
 
511
- if(value) {
512
- str = NOKOGIRI_STR_NEW2((char*)value);
600
+ if (value) {
601
+ str = NOKOGIRI_STR_NEW2((char *)value);
513
602
  xmlFree(value);
514
603
  }
515
604
  return str;
@@ -519,38 +608,41 @@ static VALUE outer_xml(VALUE self)
519
608
  * call-seq:
520
609
  * from_memory(string, url = nil, encoding = nil, options = 0)
521
610
  *
522
- * Create a new reader that parses +string+
611
+ * Create a new Reader to parse a String.
523
612
  */
524
- static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
613
+ static VALUE
614
+ from_memory(int argc, VALUE *argv, VALUE klass)
525
615
  {
616
+ /* TODO: deprecate this method, since Reader.new can handle both memory and IO. It can then
617
+ * become private. */
526
618
  VALUE rb_buffer, rb_url, encoding, rb_options;
527
619
  xmlTextReaderPtr reader;
528
- const char * c_url = NULL;
529
- const char * c_encoding = NULL;
620
+ const char *c_url = NULL;
621
+ const char *c_encoding = NULL;
530
622
  int c_options = 0;
531
623
  VALUE rb_reader, args[3];
532
624
 
533
625
  rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options);
534
626
 
535
- if (!RTEST(rb_buffer)) rb_raise(rb_eArgError, "string cannot be nil");
536
- if (RTEST(rb_url)) c_url = StringValueCStr(rb_url);
537
- if (RTEST(encoding)) c_encoding = StringValueCStr(encoding);
538
- if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
627
+ if (!RTEST(rb_buffer)) { rb_raise(rb_eArgError, "string cannot be nil"); }
628
+ if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); }
629
+ if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); }
630
+ if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); }
539
631
 
540
632
  reader = xmlReaderForMemory(
541
- StringValuePtr(rb_buffer),
542
- (int)RSTRING_LEN(rb_buffer),
543
- c_url,
544
- c_encoding,
545
- c_options
546
- );
547
-
548
- if(reader == NULL) {
633
+ StringValuePtr(rb_buffer),
634
+ (int)RSTRING_LEN(rb_buffer),
635
+ c_url,
636
+ c_encoding,
637
+ c_options
638
+ );
639
+
640
+ if (reader == NULL) {
549
641
  xmlFreeTextReader(reader);
550
642
  rb_raise(rb_eRuntimeError, "couldn't create a parser");
551
643
  }
552
644
 
553
- rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
645
+ rb_reader = TypedData_Wrap_Struct(klass, &xml_text_reader_type, reader);
554
646
  args[0] = rb_buffer;
555
647
  args[1] = rb_url;
556
648
  args[2] = encoding;
@@ -563,39 +655,42 @@ static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
563
655
  * call-seq:
564
656
  * from_io(io, url = nil, encoding = nil, options = 0)
565
657
  *
566
- * Create a new reader that parses +io+
658
+ * Create a new Reader to parse an IO stream.
567
659
  */
568
- static VALUE from_io(int argc, VALUE *argv, VALUE klass)
660
+ static VALUE
661
+ from_io(int argc, VALUE *argv, VALUE klass)
569
662
  {
663
+ /* TODO: deprecate this method, since Reader.new can handle both memory and IO. It can then
664
+ * become private. */
570
665
  VALUE rb_io, rb_url, encoding, rb_options;
571
666
  xmlTextReaderPtr reader;
572
- const char * c_url = NULL;
573
- const char * c_encoding = NULL;
667
+ const char *c_url = NULL;
668
+ const char *c_encoding = NULL;
574
669
  int c_options = 0;
575
670
  VALUE rb_reader, args[3];
576
671
 
577
672
  rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options);
578
673
 
579
- if (!RTEST(rb_io)) rb_raise(rb_eArgError, "io cannot be nil");
580
- if (RTEST(rb_url)) c_url = StringValueCStr(rb_url);
581
- if (RTEST(encoding)) c_encoding = StringValueCStr(encoding);
582
- if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);
674
+ if (!RTEST(rb_io)) { rb_raise(rb_eArgError, "io cannot be nil"); }
675
+ if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); }
676
+ if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); }
677
+ if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); }
583
678
 
584
679
  reader = xmlReaderForIO(
585
- (xmlInputReadCallback)io_read_callback,
586
- (xmlInputCloseCallback)io_close_callback,
587
- (void *)rb_io,
588
- c_url,
589
- c_encoding,
590
- c_options
591
- );
592
-
593
- if(reader == NULL) {
680
+ (xmlInputReadCallback)noko_io_read,
681
+ (xmlInputCloseCallback)noko_io_close,
682
+ (void *)rb_io,
683
+ c_url,
684
+ c_encoding,
685
+ c_options
686
+ );
687
+
688
+ if (reader == NULL) {
594
689
  xmlFreeTextReader(reader);
595
690
  rb_raise(rb_eRuntimeError, "couldn't create a parser");
596
691
  }
597
692
 
598
- rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
693
+ rb_reader = TypedData_Wrap_Struct(klass, &xml_text_reader_type, reader);
599
694
  args[0] = rb_io;
600
695
  args[1] = rb_url;
601
696
  args[2] = encoding;
@@ -610,59 +705,73 @@ static VALUE from_io(int argc, VALUE *argv, VALUE klass)
610
705
  *
611
706
  * Returns true if the current node is empty, otherwise false.
612
707
  */
613
- static VALUE empty_element_p(VALUE self)
708
+ static VALUE
709
+ empty_element_p(VALUE self)
614
710
  {
615
711
  xmlTextReaderPtr reader;
616
712
 
617
- Data_Get_Struct(self, xmlTextReader, reader);
713
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
618
714
 
619
- if(xmlTextReaderIsEmptyElement(reader))
715
+ if (xmlTextReaderIsEmptyElement(reader)) {
620
716
  return Qtrue;
717
+ }
621
718
 
622
719
  return Qfalse;
623
720
  }
624
721
 
625
- VALUE cNokogiriXmlReader;
626
-
627
- void init_xml_reader()
722
+ static VALUE
723
+ rb_xml_reader_encoding(VALUE rb_reader)
628
724
  {
629
- VALUE module = rb_define_module("Nokogiri");
630
- VALUE xml = rb_define_module_under(module, "XML");
725
+ xmlTextReaderPtr c_reader;
726
+ const char *parser_encoding;
727
+ VALUE constructor_encoding;
728
+
729
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
730
+ parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
731
+ if (parser_encoding) {
732
+ return NOKOGIRI_STR_NEW2(parser_encoding);
733
+ }
631
734
 
632
- /*
633
- * The Reader parser allows you to effectively pull parse an XML document.
634
- * Once instantiated, call Nokogiri::XML::Reader#each to iterate over each
635
- * node. Note that you may only iterate over the document once!
636
- */
637
- VALUE klass = rb_define_class_under(xml, "Reader", rb_cObject);
638
-
639
- cNokogiriXmlReader = klass;
640
-
641
- rb_define_singleton_method(klass, "from_memory", from_memory, -1);
642
- rb_define_singleton_method(klass, "from_io", from_io, -1);
643
-
644
- rb_define_method(klass, "read", read_more, 0);
645
- rb_define_method(klass, "inner_xml", inner_xml, 0);
646
- rb_define_method(klass, "outer_xml", outer_xml, 0);
647
- rb_define_method(klass, "state", state, 0);
648
- rb_define_method(klass, "node_type", node_type, 0);
649
- rb_define_method(klass, "name", name, 0);
650
- rb_define_method(klass, "local_name", local_name, 0);
651
- rb_define_method(klass, "namespace_uri", namespace_uri, 0);
652
- rb_define_method(klass, "prefix", prefix, 0);
653
- rb_define_method(klass, "value", value, 0);
654
- rb_define_method(klass, "lang", lang, 0);
655
- rb_define_method(klass, "xml_version", xml_version, 0);
656
- rb_define_method(klass, "depth", depth, 0);
657
- rb_define_method(klass, "attribute_count", attribute_count, 0);
658
- rb_define_method(klass, "attribute", reader_attribute, 1);
659
- rb_define_method(klass, "namespaces", namespaces, 0);
660
- rb_define_method(klass, "attribute_at", attribute_at, 1);
661
- rb_define_method(klass, "empty_element?", empty_element_p, 0);
662
- rb_define_method(klass, "attributes?", attributes_eh, 0);
663
- rb_define_method(klass, "value?", value_eh, 0);
664
- rb_define_method(klass, "default?", default_eh, 0);
665
- rb_define_method(klass, "base_uri", base_uri, 0);
666
-
667
- rb_define_private_method(klass, "attr_nodes", attribute_nodes, 0);
735
+ constructor_encoding = rb_iv_get(rb_reader, "@encoding");
736
+ if (RTEST(constructor_encoding)) {
737
+ return constructor_encoding;
738
+ }
739
+
740
+ return Qnil;
741
+ }
742
+
743
+ void
744
+ noko_init_xml_reader(void)
745
+ {
746
+ cNokogiriXmlReader = rb_define_class_under(mNokogiriXml, "Reader", rb_cObject);
747
+
748
+ rb_undef_alloc_func(cNokogiriXmlReader);
749
+
750
+ rb_define_singleton_method(cNokogiriXmlReader, "from_memory", from_memory, -1);
751
+ rb_define_singleton_method(cNokogiriXmlReader, "from_io", from_io, -1);
752
+
753
+ rb_define_method(cNokogiriXmlReader, "attribute", reader_attribute, 1);
754
+ rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
755
+ rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
756
+ rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
757
+ rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
758
+ rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
759
+ rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0);
760
+ rb_define_method(cNokogiriXmlReader, "depth", depth, 0);
761
+ rb_define_method(cNokogiriXmlReader, "empty_element?", empty_element_p, 0);
762
+ rb_define_method(cNokogiriXmlReader, "encoding", rb_xml_reader_encoding, 0);
763
+ rb_define_method(cNokogiriXmlReader, "inner_xml", inner_xml, 0);
764
+ rb_define_method(cNokogiriXmlReader, "lang", lang, 0);
765
+ rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
766
+ rb_define_method(cNokogiriXmlReader, "name", name, 0);
767
+ rb_define_method(cNokogiriXmlReader, "namespace_uri", namespace_uri, 0);
768
+ rb_define_method(cNokogiriXmlReader, "namespaces", rb_xml_reader_namespaces, 0);
769
+ rb_define_method(cNokogiriXmlReader, "node_type", node_type, 0);
770
+ rb_define_method(cNokogiriXmlReader, "outer_xml", outer_xml, 0);
771
+ rb_define_method(cNokogiriXmlReader, "prefix", prefix, 0);
772
+ rb_define_method(cNokogiriXmlReader, "read", read_more, 0);
773
+ rb_define_method(cNokogiriXmlReader, "state", state, 0);
774
+ rb_define_method(cNokogiriXmlReader, "value", value, 0);
775
+ rb_define_method(cNokogiriXmlReader, "value?", value_eh, 0);
776
+ rb_define_method(cNokogiriXmlReader, "xml_version", xml_version, 0);
668
777
  }