nokogiri 1.14.0.rc1-arm-linux

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (200) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +287 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +41 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1082 -0
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +114 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  17. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  18. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  19. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  21. data/ext/nokogiri/include/libxml2/libxml/SAX.h +204 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +172 -0
  23. data/ext/nokogiri/include/libxml2/libxml/c14n.h +128 -0
  24. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  25. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  26. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  27. data/ext/nokogiri/include/libxml2/libxml/dict.h +81 -0
  28. data/ext/nokogiri/include/libxml2/libxml/encoding.h +232 -0
  29. data/ext/nokogiri/include/libxml2/libxml/entities.h +153 -0
  30. data/ext/nokogiri/include/libxml2/libxml/globals.h +499 -0
  31. data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
  32. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  33. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
  35. data/ext/nokogiri/include/libxml2/libxml/parser.h +1244 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +656 -0
  37. data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
  38. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +218 -0
  39. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
  41. data/ext/nokogiri/include/libxml2/libxml/threads.h +91 -0
  42. data/ext/nokogiri/include/libxml2/libxml/tree.h +1312 -0
  43. data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
  44. data/ext/nokogiri/include/libxml2/libxml/valid.h +463 -0
  45. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +368 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +947 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +77 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +226 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +503 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xpath.h +575 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +137 -0
  65. data/ext/nokogiri/include/libxslt/attributes.h +38 -0
  66. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  67. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  68. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  69. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  70. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  71. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  72. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  73. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  74. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  75. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  76. data/ext/nokogiri/include/libxslt/security.h +104 -0
  77. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  78. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  79. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  80. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  81. data/ext/nokogiri/include/libxslt/xsltInternals.h +1982 -0
  82. data/ext/nokogiri/include/libxslt/xsltconfig.h +179 -0
  83. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  84. data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
  85. data/ext/nokogiri/include/libxslt/xsltutils.h +310 -0
  86. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  87. data/ext/nokogiri/nokogiri.c +259 -0
  88. data/ext/nokogiri/nokogiri.h +235 -0
  89. data/ext/nokogiri/test_global_handlers.c +40 -0
  90. data/ext/nokogiri/xml_attr.c +103 -0
  91. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  92. data/ext/nokogiri/xml_cdata.c +57 -0
  93. data/ext/nokogiri/xml_comment.c +62 -0
  94. data/ext/nokogiri/xml_document.c +689 -0
  95. data/ext/nokogiri/xml_document_fragment.c +44 -0
  96. data/ext/nokogiri/xml_dtd.c +208 -0
  97. data/ext/nokogiri/xml_element_content.c +128 -0
  98. data/ext/nokogiri/xml_element_decl.c +69 -0
  99. data/ext/nokogiri/xml_encoding_handler.c +104 -0
  100. data/ext/nokogiri/xml_entity_decl.c +112 -0
  101. data/ext/nokogiri/xml_entity_reference.c +50 -0
  102. data/ext/nokogiri/xml_namespace.c +186 -0
  103. data/ext/nokogiri/xml_node.c +2425 -0
  104. data/ext/nokogiri/xml_node_set.c +496 -0
  105. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  106. data/ext/nokogiri/xml_reader.c +794 -0
  107. data/ext/nokogiri/xml_relax_ng.c +183 -0
  108. data/ext/nokogiri/xml_sax_parser.c +316 -0
  109. data/ext/nokogiri/xml_sax_parser_context.c +283 -0
  110. data/ext/nokogiri/xml_sax_push_parser.c +166 -0
  111. data/ext/nokogiri/xml_schema.c +282 -0
  112. data/ext/nokogiri/xml_syntax_error.c +85 -0
  113. data/ext/nokogiri/xml_text.c +48 -0
  114. data/ext/nokogiri/xml_xpath_context.c +413 -0
  115. data/ext/nokogiri/xslt_stylesheet.c +363 -0
  116. data/gumbo-parser/CHANGES.md +63 -0
  117. data/gumbo-parser/Makefile +111 -0
  118. data/gumbo-parser/THANKS +27 -0
  119. data/lib/nokogiri/2.7/nokogiri.so +0 -0
  120. data/lib/nokogiri/3.0/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  123. data/lib/nokogiri/class_resolver.rb +67 -0
  124. data/lib/nokogiri/css/node.rb +54 -0
  125. data/lib/nokogiri/css/parser.rb +770 -0
  126. data/lib/nokogiri/css/parser.y +277 -0
  127. data/lib/nokogiri/css/parser_extras.rb +96 -0
  128. data/lib/nokogiri/css/syntax_error.rb +9 -0
  129. data/lib/nokogiri/css/tokenizer.rb +155 -0
  130. data/lib/nokogiri/css/tokenizer.rex +56 -0
  131. data/lib/nokogiri/css/xpath_visitor.rb +359 -0
  132. data/lib/nokogiri/css.rb +66 -0
  133. data/lib/nokogiri/decorators/slop.rb +44 -0
  134. data/lib/nokogiri/encoding_handler.rb +57 -0
  135. data/lib/nokogiri/extension.rb +32 -0
  136. data/lib/nokogiri/gumbo.rb +15 -0
  137. data/lib/nokogiri/html.rb +48 -0
  138. data/lib/nokogiri/html4/builder.rb +37 -0
  139. data/lib/nokogiri/html4/document.rb +214 -0
  140. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  141. data/lib/nokogiri/html4/element_description.rb +25 -0
  142. data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
  143. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  144. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  145. data/lib/nokogiri/html4/sax/parser.rb +63 -0
  146. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  147. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  148. data/lib/nokogiri/html4.rb +47 -0
  149. data/lib/nokogiri/html5/document.rb +168 -0
  150. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  151. data/lib/nokogiri/html5/node.rb +98 -0
  152. data/lib/nokogiri/html5.rb +389 -0
  153. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  154. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  155. data/lib/nokogiri/syntax_error.rb +6 -0
  156. data/lib/nokogiri/version/constant.rb +6 -0
  157. data/lib/nokogiri/version/info.rb +223 -0
  158. data/lib/nokogiri/version.rb +4 -0
  159. data/lib/nokogiri/xml/attr.rb +66 -0
  160. data/lib/nokogiri/xml/attribute_decl.rb +20 -0
  161. data/lib/nokogiri/xml/builder.rb +487 -0
  162. data/lib/nokogiri/xml/cdata.rb +13 -0
  163. data/lib/nokogiri/xml/character_data.rb +9 -0
  164. data/lib/nokogiri/xml/document.rb +471 -0
  165. data/lib/nokogiri/xml/document_fragment.rb +205 -0
  166. data/lib/nokogiri/xml/dtd.rb +34 -0
  167. data/lib/nokogiri/xml/element_content.rb +38 -0
  168. data/lib/nokogiri/xml/element_decl.rb +15 -0
  169. data/lib/nokogiri/xml/entity_decl.rb +21 -0
  170. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  171. data/lib/nokogiri/xml/namespace.rb +58 -0
  172. data/lib/nokogiri/xml/node/save_options.rb +68 -0
  173. data/lib/nokogiri/xml/node.rb +1563 -0
  174. data/lib/nokogiri/xml/node_set.rb +446 -0
  175. data/lib/nokogiri/xml/notation.rb +19 -0
  176. data/lib/nokogiri/xml/parse_options.rb +213 -0
  177. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  178. data/lib/nokogiri/xml/pp/node.rb +57 -0
  179. data/lib/nokogiri/xml/pp.rb +4 -0
  180. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  181. data/lib/nokogiri/xml/reader.rb +105 -0
  182. data/lib/nokogiri/xml/relax_ng.rb +38 -0
  183. data/lib/nokogiri/xml/sax/document.rb +167 -0
  184. data/lib/nokogiri/xml/sax/parser.rb +125 -0
  185. data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
  186. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  187. data/lib/nokogiri/xml/sax.rb +6 -0
  188. data/lib/nokogiri/xml/schema.rb +73 -0
  189. data/lib/nokogiri/xml/searchable.rb +270 -0
  190. data/lib/nokogiri/xml/syntax_error.rb +72 -0
  191. data/lib/nokogiri/xml/text.rb +11 -0
  192. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  193. data/lib/nokogiri/xml/xpath.rb +21 -0
  194. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  195. data/lib/nokogiri/xml.rb +76 -0
  196. data/lib/nokogiri/xslt/stylesheet.rb +27 -0
  197. data/lib/nokogiri/xslt.rb +65 -0
  198. data/lib/nokogiri.rb +120 -0
  199. data/lib/xsd/xmlparser/nokogiri.rb +104 -0
  200. metadata +317 -0
@@ -0,0 +1,794 @@
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriXmlReader;
4
+
5
+ static void
6
+ dealloc(xmlTextReaderPtr reader)
7
+ {
8
+ xmlFreeTextReader(reader);
9
+ }
10
+
11
+ static int
12
+ has_attributes(xmlTextReaderPtr reader)
13
+ {
14
+ /*
15
+ * this implementation of xmlTextReaderHasAttributes explicitly includes
16
+ * namespaces and properties, because some earlier versions ignore
17
+ * namespaces.
18
+ */
19
+ xmlNodePtr node ;
20
+ node = xmlTextReaderCurrentNode(reader);
21
+ if (node == NULL) {
22
+ return (0);
23
+ }
24
+
25
+ if ((node->type == XML_ELEMENT_NODE) &&
26
+ ((node->properties != NULL) || (node->nsDef != NULL))) {
27
+ return (1);
28
+ }
29
+ return (0);
30
+ }
31
+
32
+ // TODO: merge this function into the `namespaces` method implementation
33
+ static void
34
+ Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
35
+ {
36
+ xmlNsPtr ns;
37
+ VALUE key;
38
+
39
+ if (node->type != XML_ELEMENT_NODE) { return ; }
40
+
41
+ ns = node->nsDef;
42
+ while (ns != NULL) {
43
+
44
+ key = rb_enc_str_new_cstr(XMLNS_PREFIX, rb_utf8_encoding());
45
+ if (ns->prefix) {
46
+ rb_str_cat_cstr(key, ":");
47
+ rb_str_cat_cstr(key, (const char *)ns->prefix);
48
+ }
49
+
50
+ key = rb_str_conv_enc(key, rb_utf8_encoding(), rb_default_internal_encoding());
51
+ rb_hash_aset(attr_hash,
52
+ key,
53
+ (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
54
+ );
55
+ ns = ns->next ;
56
+ }
57
+ }
58
+
59
+
60
+ /*
61
+ * call-seq:
62
+ * default?
63
+ *
64
+ * Was an attribute generated from the default value in the DTD or schema?
65
+ */
66
+ static VALUE
67
+ default_eh(VALUE self)
68
+ {
69
+ xmlTextReaderPtr reader;
70
+ int eh;
71
+
72
+ Data_Get_Struct(self, xmlTextReader, reader);
73
+ eh = xmlTextReaderIsDefault(reader);
74
+ if (eh == 0) { return Qfalse; }
75
+ if (eh == 1) { return Qtrue; }
76
+
77
+ return Qnil;
78
+ }
79
+
80
+ /*
81
+ * call-seq:
82
+ * value?
83
+ *
84
+ * Does this node have a text value?
85
+ */
86
+ static VALUE
87
+ value_eh(VALUE self)
88
+ {
89
+ xmlTextReaderPtr reader;
90
+ int eh;
91
+
92
+ Data_Get_Struct(self, xmlTextReader, reader);
93
+ eh = xmlTextReaderHasValue(reader);
94
+ if (eh == 0) { return Qfalse; }
95
+ if (eh == 1) { return Qtrue; }
96
+
97
+ return Qnil;
98
+ }
99
+
100
+ /*
101
+ * call-seq:
102
+ * attributes?
103
+ *
104
+ * Does this node have attributes?
105
+ */
106
+ static VALUE
107
+ attributes_eh(VALUE self)
108
+ {
109
+ xmlTextReaderPtr reader;
110
+ int eh;
111
+
112
+ Data_Get_Struct(self, xmlTextReader, reader);
113
+ eh = has_attributes(reader);
114
+ if (eh == 0) { return Qfalse; }
115
+ if (eh == 1) { return Qtrue; }
116
+
117
+ return Qnil;
118
+ }
119
+
120
+ /*
121
+ * call-seq:
122
+ * namespaces
123
+ *
124
+ * Get a hash of namespaces for this Node
125
+ */
126
+ static VALUE
127
+ rb_xml_reader_namespaces(VALUE rb_reader)
128
+ {
129
+ VALUE rb_namespaces = rb_hash_new() ;
130
+ xmlTextReaderPtr c_reader;
131
+ xmlNodePtr c_node;
132
+ VALUE rb_errors;
133
+
134
+ Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
135
+
136
+ if (! has_attributes(c_reader)) {
137
+ return rb_namespaces ;
138
+ }
139
+
140
+ rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
141
+
142
+ xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
143
+ c_node = xmlTextReaderExpand(c_reader);
144
+ xmlSetStructuredErrorFunc(NULL, NULL);
145
+
146
+ if (c_node == NULL) {
147
+ if (RARRAY_LEN(rb_errors) > 0) {
148
+ VALUE rb_error = rb_ary_entry(rb_errors, 0);
149
+ VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
150
+ rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
151
+ }
152
+ return Qnil;
153
+ }
154
+
155
+ Nokogiri_xml_node_namespaces(c_node, rb_namespaces);
156
+
157
+ return rb_namespaces ;
158
+ }
159
+
160
+ /*
161
+ :call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
162
+
163
+ Get the attributes of the current node as an Array of XML:Attr
164
+
165
+ ⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri.
166
+
167
+ See related: #attribute_hash, #attributes
168
+ */
169
+ static VALUE
170
+ rb_xml_reader_attribute_nodes(VALUE rb_reader)
171
+ {
172
+ xmlTextReaderPtr c_reader;
173
+ xmlNodePtr c_node;
174
+ VALUE attr_nodes;
175
+ int j;
176
+
177
+ // TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598
178
+ // After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
179
+ NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
180
+
181
+ Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
182
+
183
+ if (! has_attributes(c_reader)) {
184
+ return rb_ary_new() ;
185
+ }
186
+
187
+ c_node = xmlTextReaderExpand(c_reader);
188
+ if (c_node == NULL) {
189
+ return Qnil;
190
+ }
191
+
192
+ attr_nodes = noko_xml_node_attrs(c_node);
193
+
194
+ /* ensure that the Reader won't be GCed as long as a node is referenced */
195
+ for (j = 0 ; j < RARRAY_LEN(attr_nodes) ; j++) {
196
+ rb_iv_set(rb_ary_entry(attr_nodes, j), "@reader", rb_reader);
197
+ }
198
+
199
+ return attr_nodes;
200
+ }
201
+
202
+ /*
203
+ :call-seq: attribute_hash() → Hash<String ⇒ String>
204
+
205
+ Get the attributes of the current node as a Hash of names and values.
206
+
207
+ See related: #attributes and #namespaces
208
+ */
209
+ static VALUE
210
+ rb_xml_reader_attribute_hash(VALUE rb_reader)
211
+ {
212
+ VALUE rb_attributes = rb_hash_new();
213
+ xmlTextReaderPtr c_reader;
214
+ xmlNodePtr c_node;
215
+ xmlAttrPtr c_property;
216
+ VALUE rb_errors;
217
+
218
+ Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
219
+
220
+ if (!has_attributes(c_reader)) {
221
+ return rb_attributes;
222
+ }
223
+
224
+ rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
225
+
226
+ xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
227
+ c_node = xmlTextReaderExpand(c_reader);
228
+ xmlSetStructuredErrorFunc(NULL, NULL);
229
+
230
+ if (c_node == NULL) {
231
+ if (RARRAY_LEN(rb_errors) > 0) {
232
+ VALUE rb_error = rb_ary_entry(rb_errors, 0);
233
+ VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
234
+ rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
235
+ }
236
+ return Qnil;
237
+ }
238
+
239
+ c_property = c_node->properties;
240
+ while (c_property != NULL) {
241
+ VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name);
242
+ VALUE rb_value = Qnil;
243
+ xmlChar *c_value = xmlNodeGetContent((xmlNode *)c_property);
244
+
245
+ if (c_value) {
246
+ rb_value = NOKOGIRI_STR_NEW2(c_value);
247
+ xmlFree(c_value);
248
+ }
249
+
250
+ rb_hash_aset(rb_attributes, rb_name, rb_value);
251
+
252
+ c_property = c_property->next;
253
+ }
254
+
255
+ return rb_attributes;
256
+ }
257
+
258
+ /*
259
+ * call-seq:
260
+ * attribute_at(index)
261
+ *
262
+ * Get the value of attribute at +index+
263
+ */
264
+ static VALUE
265
+ attribute_at(VALUE self, VALUE index)
266
+ {
267
+ xmlTextReaderPtr reader;
268
+ xmlChar *value;
269
+ VALUE rb_value;
270
+
271
+ Data_Get_Struct(self, xmlTextReader, reader);
272
+
273
+ if (NIL_P(index)) { return Qnil; }
274
+ index = rb_Integer(index);
275
+
276
+ value = xmlTextReaderGetAttributeNo(
277
+ reader,
278
+ (int)NUM2INT(index)
279
+ );
280
+ if (value == NULL) { return Qnil; }
281
+
282
+ rb_value = NOKOGIRI_STR_NEW2(value);
283
+ xmlFree(value);
284
+ return rb_value;
285
+ }
286
+
287
+ /*
288
+ * call-seq:
289
+ * attribute(name)
290
+ *
291
+ * Get the value of attribute named +name+
292
+ */
293
+ static VALUE
294
+ reader_attribute(VALUE self, VALUE name)
295
+ {
296
+ xmlTextReaderPtr reader;
297
+ xmlChar *value ;
298
+ VALUE rb_value;
299
+
300
+ Data_Get_Struct(self, xmlTextReader, reader);
301
+
302
+ if (NIL_P(name)) { return Qnil; }
303
+ name = StringValue(name) ;
304
+
305
+ value = xmlTextReaderGetAttribute(reader, (xmlChar *)StringValueCStr(name));
306
+ if (value == NULL) { return Qnil; }
307
+
308
+ rb_value = NOKOGIRI_STR_NEW2(value);
309
+ xmlFree(value);
310
+ return rb_value;
311
+ }
312
+
313
+ /*
314
+ * call-seq:
315
+ * attribute_count
316
+ *
317
+ * Get the number of attributes for the current node
318
+ */
319
+ static VALUE
320
+ attribute_count(VALUE self)
321
+ {
322
+ xmlTextReaderPtr reader;
323
+ int count;
324
+
325
+ Data_Get_Struct(self, xmlTextReader, reader);
326
+ count = xmlTextReaderAttributeCount(reader);
327
+ if (count == -1) { return Qnil; }
328
+
329
+ return INT2NUM(count);
330
+ }
331
+
332
+ /*
333
+ * call-seq:
334
+ * depth
335
+ *
336
+ * Get the depth of the node
337
+ */
338
+ static VALUE
339
+ depth(VALUE self)
340
+ {
341
+ xmlTextReaderPtr reader;
342
+ int depth;
343
+
344
+ Data_Get_Struct(self, xmlTextReader, reader);
345
+ depth = xmlTextReaderDepth(reader);
346
+ if (depth == -1) { return Qnil; }
347
+
348
+ return INT2NUM(depth);
349
+ }
350
+
351
+ /*
352
+ * call-seq:
353
+ * xml_version
354
+ *
355
+ * Get the XML version of the document being read
356
+ */
357
+ static VALUE
358
+ xml_version(VALUE self)
359
+ {
360
+ xmlTextReaderPtr reader;
361
+ const char *version;
362
+
363
+ Data_Get_Struct(self, xmlTextReader, reader);
364
+ version = (const char *)xmlTextReaderConstXmlVersion(reader);
365
+ if (version == NULL) { return Qnil; }
366
+
367
+ return NOKOGIRI_STR_NEW2(version);
368
+ }
369
+
370
+ /*
371
+ * call-seq:
372
+ * lang
373
+ *
374
+ * Get the xml:lang scope within which the node resides.
375
+ */
376
+ static VALUE
377
+ lang(VALUE self)
378
+ {
379
+ xmlTextReaderPtr reader;
380
+ const char *lang;
381
+
382
+ Data_Get_Struct(self, xmlTextReader, reader);
383
+ lang = (const char *)xmlTextReaderConstXmlLang(reader);
384
+ if (lang == NULL) { return Qnil; }
385
+
386
+ return NOKOGIRI_STR_NEW2(lang);
387
+ }
388
+
389
+ /*
390
+ * call-seq:
391
+ * value
392
+ *
393
+ * Get the text value of the node if present. Returns a utf-8 encoded string.
394
+ */
395
+ static VALUE
396
+ value(VALUE self)
397
+ {
398
+ xmlTextReaderPtr reader;
399
+ const char *value;
400
+
401
+ Data_Get_Struct(self, xmlTextReader, reader);
402
+ value = (const char *)xmlTextReaderConstValue(reader);
403
+ if (value == NULL) { return Qnil; }
404
+
405
+ return NOKOGIRI_STR_NEW2(value);
406
+ }
407
+
408
+ /*
409
+ * call-seq:
410
+ * prefix
411
+ *
412
+ * Get the shorthand reference to the namespace associated with the node.
413
+ */
414
+ static VALUE
415
+ prefix(VALUE self)
416
+ {
417
+ xmlTextReaderPtr reader;
418
+ const char *prefix;
419
+
420
+ Data_Get_Struct(self, xmlTextReader, reader);
421
+ prefix = (const char *)xmlTextReaderConstPrefix(reader);
422
+ if (prefix == NULL) { return Qnil; }
423
+
424
+ return NOKOGIRI_STR_NEW2(prefix);
425
+ }
426
+
427
+ /*
428
+ * call-seq:
429
+ * namespace_uri
430
+ *
431
+ * Get the URI defining the namespace associated with the node
432
+ */
433
+ static VALUE
434
+ namespace_uri(VALUE self)
435
+ {
436
+ xmlTextReaderPtr reader;
437
+ const char *uri;
438
+
439
+ Data_Get_Struct(self, xmlTextReader, reader);
440
+ uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
441
+ if (uri == NULL) { return Qnil; }
442
+
443
+ return NOKOGIRI_STR_NEW2(uri);
444
+ }
445
+
446
+ /*
447
+ * call-seq:
448
+ * local_name
449
+ *
450
+ * Get the local name of the node
451
+ */
452
+ static VALUE
453
+ local_name(VALUE self)
454
+ {
455
+ xmlTextReaderPtr reader;
456
+ const char *name;
457
+
458
+ Data_Get_Struct(self, xmlTextReader, reader);
459
+ name = (const char *)xmlTextReaderConstLocalName(reader);
460
+ if (name == NULL) { return Qnil; }
461
+
462
+ return NOKOGIRI_STR_NEW2(name);
463
+ }
464
+
465
+ /*
466
+ * call-seq:
467
+ * name
468
+ *
469
+ * Get the name of the node. Returns a utf-8 encoded string.
470
+ */
471
+ static VALUE
472
+ name(VALUE self)
473
+ {
474
+ xmlTextReaderPtr reader;
475
+ const char *name;
476
+
477
+ Data_Get_Struct(self, xmlTextReader, reader);
478
+ name = (const char *)xmlTextReaderConstName(reader);
479
+ if (name == NULL) { return Qnil; }
480
+
481
+ return NOKOGIRI_STR_NEW2(name);
482
+ }
483
+
484
+ /*
485
+ * call-seq:
486
+ * base_uri
487
+ *
488
+ * Get the xml:base of the node
489
+ */
490
+ static VALUE
491
+ rb_xml_reader_base_uri(VALUE rb_reader)
492
+ {
493
+ VALUE rb_base_uri;
494
+ xmlTextReaderPtr c_reader;
495
+ xmlChar *c_base_uri;
496
+
497
+ Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
498
+
499
+ c_base_uri = xmlTextReaderBaseUri(c_reader);
500
+ if (c_base_uri == NULL) {
501
+ return Qnil;
502
+ }
503
+
504
+ rb_base_uri = NOKOGIRI_STR_NEW2(c_base_uri);
505
+ xmlFree(c_base_uri);
506
+
507
+ return rb_base_uri;
508
+ }
509
+
510
+ /*
511
+ * call-seq:
512
+ * state
513
+ *
514
+ * Get the state of the reader
515
+ */
516
+ static VALUE
517
+ state(VALUE self)
518
+ {
519
+ xmlTextReaderPtr reader;
520
+ Data_Get_Struct(self, xmlTextReader, reader);
521
+ return INT2NUM(xmlTextReaderReadState(reader));
522
+ }
523
+
524
+ /*
525
+ * call-seq:
526
+ * node_type
527
+ *
528
+ * Get the type of readers current node
529
+ */
530
+ static VALUE
531
+ node_type(VALUE self)
532
+ {
533
+ xmlTextReaderPtr reader;
534
+ Data_Get_Struct(self, xmlTextReader, reader);
535
+ return INT2NUM(xmlTextReaderNodeType(reader));
536
+ }
537
+
538
+ /*
539
+ * call-seq:
540
+ * read
541
+ *
542
+ * Move the Reader forward through the XML document.
543
+ */
544
+ static VALUE
545
+ read_more(VALUE self)
546
+ {
547
+ xmlTextReaderPtr reader;
548
+ xmlErrorPtr error;
549
+ VALUE error_list;
550
+ int ret;
551
+
552
+ Data_Get_Struct(self, xmlTextReader, reader);
553
+
554
+ error_list = rb_funcall(self, rb_intern("errors"), 0);
555
+
556
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
557
+ ret = xmlTextReaderRead(reader);
558
+ xmlSetStructuredErrorFunc(NULL, NULL);
559
+
560
+ if (ret == 1) { return self; }
561
+ if (ret == 0) { return Qnil; }
562
+
563
+ error = xmlGetLastError();
564
+ if (error) {
565
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
566
+ } else {
567
+ rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);
568
+ }
569
+
570
+ return Qnil;
571
+ }
572
+
573
+ /*
574
+ * call-seq:
575
+ * inner_xml
576
+ *
577
+ * Read the contents of the current node, including child nodes and markup.
578
+ * Returns a utf-8 encoded string.
579
+ */
580
+ static VALUE
581
+ inner_xml(VALUE self)
582
+ {
583
+ xmlTextReaderPtr reader;
584
+ xmlChar *value;
585
+ VALUE str;
586
+
587
+ Data_Get_Struct(self, xmlTextReader, reader);
588
+
589
+ value = xmlTextReaderReadInnerXml(reader);
590
+
591
+ str = Qnil;
592
+ if (value) {
593
+ str = NOKOGIRI_STR_NEW2((char *)value);
594
+ xmlFree(value);
595
+ }
596
+
597
+ return str;
598
+ }
599
+
600
+ /*
601
+ * call-seq:
602
+ * outer_xml
603
+ *
604
+ * Read the current node and its contents, including child nodes and markup.
605
+ * Returns a utf-8 encoded string.
606
+ */
607
+ static VALUE
608
+ outer_xml(VALUE self)
609
+ {
610
+ xmlTextReaderPtr reader;
611
+ xmlChar *value;
612
+ VALUE str = Qnil;
613
+
614
+ Data_Get_Struct(self, xmlTextReader, reader);
615
+
616
+ value = xmlTextReaderReadOuterXml(reader);
617
+
618
+ if (value) {
619
+ str = NOKOGIRI_STR_NEW2((char *)value);
620
+ xmlFree(value);
621
+ }
622
+ return str;
623
+ }
624
+
625
+ /*
626
+ * call-seq:
627
+ * from_memory(string, url = nil, encoding = nil, options = 0)
628
+ *
629
+ * Create a new reader that parses +string+
630
+ */
631
+ static VALUE
632
+ from_memory(int argc, VALUE *argv, VALUE klass)
633
+ {
634
+ VALUE rb_buffer, rb_url, encoding, rb_options;
635
+ xmlTextReaderPtr reader;
636
+ const char *c_url = NULL;
637
+ const char *c_encoding = NULL;
638
+ int c_options = 0;
639
+ VALUE rb_reader, args[3];
640
+
641
+ rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options);
642
+
643
+ if (!RTEST(rb_buffer)) { rb_raise(rb_eArgError, "string cannot be nil"); }
644
+ if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); }
645
+ if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); }
646
+ if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); }
647
+
648
+ reader = xmlReaderForMemory(
649
+ StringValuePtr(rb_buffer),
650
+ (int)RSTRING_LEN(rb_buffer),
651
+ c_url,
652
+ c_encoding,
653
+ c_options
654
+ );
655
+
656
+ if (reader == NULL) {
657
+ xmlFreeTextReader(reader);
658
+ rb_raise(rb_eRuntimeError, "couldn't create a parser");
659
+ }
660
+
661
+ rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
662
+ args[0] = rb_buffer;
663
+ args[1] = rb_url;
664
+ args[2] = encoding;
665
+ rb_obj_call_init(rb_reader, 3, args);
666
+
667
+ return rb_reader;
668
+ }
669
+
670
+ /*
671
+ * call-seq:
672
+ * from_io(io, url = nil, encoding = nil, options = 0)
673
+ *
674
+ * Create a new reader that parses +io+
675
+ */
676
+ static VALUE
677
+ from_io(int argc, VALUE *argv, VALUE klass)
678
+ {
679
+ VALUE rb_io, rb_url, encoding, rb_options;
680
+ xmlTextReaderPtr reader;
681
+ const char *c_url = NULL;
682
+ const char *c_encoding = NULL;
683
+ int c_options = 0;
684
+ VALUE rb_reader, args[3];
685
+
686
+ rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options);
687
+
688
+ if (!RTEST(rb_io)) { rb_raise(rb_eArgError, "io cannot be nil"); }
689
+ if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); }
690
+ if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); }
691
+ if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); }
692
+
693
+ reader = xmlReaderForIO(
694
+ (xmlInputReadCallback)noko_io_read,
695
+ (xmlInputCloseCallback)noko_io_close,
696
+ (void *)rb_io,
697
+ c_url,
698
+ c_encoding,
699
+ c_options
700
+ );
701
+
702
+ if (reader == NULL) {
703
+ xmlFreeTextReader(reader);
704
+ rb_raise(rb_eRuntimeError, "couldn't create a parser");
705
+ }
706
+
707
+ rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
708
+ args[0] = rb_io;
709
+ args[1] = rb_url;
710
+ args[2] = encoding;
711
+ rb_obj_call_init(rb_reader, 3, args);
712
+
713
+ return rb_reader;
714
+ }
715
+
716
+ /*
717
+ * call-seq:
718
+ * reader.empty_element? # => true or false
719
+ *
720
+ * Returns true if the current node is empty, otherwise false.
721
+ */
722
+ static VALUE
723
+ empty_element_p(VALUE self)
724
+ {
725
+ xmlTextReaderPtr reader;
726
+
727
+ Data_Get_Struct(self, xmlTextReader, reader);
728
+
729
+ if (xmlTextReaderIsEmptyElement(reader)) {
730
+ return Qtrue;
731
+ }
732
+
733
+ return Qfalse;
734
+ }
735
+
736
+ static VALUE
737
+ rb_xml_reader_encoding(VALUE rb_reader)
738
+ {
739
+ xmlTextReaderPtr c_reader;
740
+ const char *parser_encoding;
741
+ VALUE constructor_encoding;
742
+
743
+ constructor_encoding = rb_iv_get(rb_reader, "@encoding");
744
+ if (RTEST(constructor_encoding)) {
745
+ return constructor_encoding;
746
+ }
747
+
748
+ Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
749
+ parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
750
+ if (parser_encoding == NULL) { return Qnil; }
751
+ return NOKOGIRI_STR_NEW2(parser_encoding);
752
+ }
753
+
754
+ void
755
+ noko_init_xml_reader()
756
+ {
757
+ /*
758
+ * The Reader parser allows you to effectively pull parse an XML document.
759
+ * Once instantiated, call Nokogiri::XML::Reader#each to iterate over each
760
+ * node. Note that you may only iterate over the document once!
761
+ */
762
+ cNokogiriXmlReader = rb_define_class_under(mNokogiriXml, "Reader", rb_cObject);
763
+
764
+ rb_undef_alloc_func(cNokogiriXmlReader);
765
+
766
+ rb_define_singleton_method(cNokogiriXmlReader, "from_memory", from_memory, -1);
767
+ rb_define_singleton_method(cNokogiriXmlReader, "from_io", from_io, -1);
768
+
769
+ rb_define_method(cNokogiriXmlReader, "attribute", reader_attribute, 1);
770
+ rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
771
+ rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
772
+ rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
773
+ rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
774
+ rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
775
+ rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
776
+ rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0);
777
+ rb_define_method(cNokogiriXmlReader, "depth", depth, 0);
778
+ rb_define_method(cNokogiriXmlReader, "empty_element?", empty_element_p, 0);
779
+ rb_define_method(cNokogiriXmlReader, "encoding", rb_xml_reader_encoding, 0);
780
+ rb_define_method(cNokogiriXmlReader, "inner_xml", inner_xml, 0);
781
+ rb_define_method(cNokogiriXmlReader, "lang", lang, 0);
782
+ rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
783
+ rb_define_method(cNokogiriXmlReader, "name", name, 0);
784
+ rb_define_method(cNokogiriXmlReader, "namespace_uri", namespace_uri, 0);
785
+ rb_define_method(cNokogiriXmlReader, "namespaces", rb_xml_reader_namespaces, 0);
786
+ rb_define_method(cNokogiriXmlReader, "node_type", node_type, 0);
787
+ rb_define_method(cNokogiriXmlReader, "outer_xml", outer_xml, 0);
788
+ rb_define_method(cNokogiriXmlReader, "prefix", prefix, 0);
789
+ rb_define_method(cNokogiriXmlReader, "read", read_more, 0);
790
+ rb_define_method(cNokogiriXmlReader, "state", state, 0);
791
+ rb_define_method(cNokogiriXmlReader, "value", value, 0);
792
+ rb_define_method(cNokogiriXmlReader, "value?", value_eh, 0);
793
+ rb_define_method(cNokogiriXmlReader, "xml_version", xml_version, 0);
794
+ }