nokogiri 1.18.0.rc1-x86_64-linux-gnu

Sign up to get free protection for your applications and to get access to all the features.
Files changed (203) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +293 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +42 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1173 -0
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  18. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  19. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
  21. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
  23. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
  24. data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
  25. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  26. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  27. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  28. data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
  29. data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
  30. data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
  31. data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
  32. data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
  33. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  35. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
  37. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
  38. data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
  39. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
  41. data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
  42. data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
  43. data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
  44. data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
  45. data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
  65. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
  66. data/ext/nokogiri/include/libxslt/attributes.h +39 -0
  67. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  68. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  69. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  70. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  71. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  72. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  73. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  74. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  75. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  76. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  77. data/ext/nokogiri/include/libxslt/security.h +104 -0
  78. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  79. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  80. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  81. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  82. data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
  83. data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
  84. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  85. data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
  86. data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
  87. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  88. data/ext/nokogiri/nokogiri.c +294 -0
  89. data/ext/nokogiri/nokogiri.h +238 -0
  90. data/ext/nokogiri/test_global_handlers.c +40 -0
  91. data/ext/nokogiri/xml_attr.c +103 -0
  92. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  93. data/ext/nokogiri/xml_cdata.c +62 -0
  94. data/ext/nokogiri/xml_comment.c +57 -0
  95. data/ext/nokogiri/xml_document.c +784 -0
  96. data/ext/nokogiri/xml_document_fragment.c +29 -0
  97. data/ext/nokogiri/xml_dtd.c +208 -0
  98. data/ext/nokogiri/xml_element_content.c +131 -0
  99. data/ext/nokogiri/xml_element_decl.c +69 -0
  100. data/ext/nokogiri/xml_encoding_handler.c +112 -0
  101. data/ext/nokogiri/xml_entity_decl.c +112 -0
  102. data/ext/nokogiri/xml_entity_reference.c +50 -0
  103. data/ext/nokogiri/xml_namespace.c +181 -0
  104. data/ext/nokogiri/xml_node.c +2459 -0
  105. data/ext/nokogiri/xml_node_set.c +518 -0
  106. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  107. data/ext/nokogiri/xml_reader.c +777 -0
  108. data/ext/nokogiri/xml_relax_ng.c +149 -0
  109. data/ext/nokogiri/xml_sax_parser.c +403 -0
  110. data/ext/nokogiri/xml_sax_parser_context.c +390 -0
  111. data/ext/nokogiri/xml_sax_push_parser.c +206 -0
  112. data/ext/nokogiri/xml_schema.c +226 -0
  113. data/ext/nokogiri/xml_syntax_error.c +93 -0
  114. data/ext/nokogiri/xml_text.c +59 -0
  115. data/ext/nokogiri/xml_xpath_context.c +502 -0
  116. data/ext/nokogiri/xslt_stylesheet.c +421 -0
  117. data/gumbo-parser/CHANGES.md +63 -0
  118. data/gumbo-parser/Makefile +129 -0
  119. data/gumbo-parser/THANKS +27 -0
  120. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.3/nokogiri.so +0 -0
  123. data/lib/nokogiri/3.4/nokogiri.so +0 -0
  124. data/lib/nokogiri/class_resolver.rb +67 -0
  125. data/lib/nokogiri/css/node.rb +58 -0
  126. data/lib/nokogiri/css/parser.rb +772 -0
  127. data/lib/nokogiri/css/parser.y +277 -0
  128. data/lib/nokogiri/css/parser_extras.rb +36 -0
  129. data/lib/nokogiri/css/selector_cache.rb +38 -0
  130. data/lib/nokogiri/css/syntax_error.rb +9 -0
  131. data/lib/nokogiri/css/tokenizer.rb +155 -0
  132. data/lib/nokogiri/css/tokenizer.rex +57 -0
  133. data/lib/nokogiri/css/xpath_visitor.rb +375 -0
  134. data/lib/nokogiri/css.rb +132 -0
  135. data/lib/nokogiri/decorators/slop.rb +42 -0
  136. data/lib/nokogiri/encoding_handler.rb +57 -0
  137. data/lib/nokogiri/extension.rb +32 -0
  138. data/lib/nokogiri/gumbo.rb +15 -0
  139. data/lib/nokogiri/html.rb +48 -0
  140. data/lib/nokogiri/html4/builder.rb +37 -0
  141. data/lib/nokogiri/html4/document.rb +235 -0
  142. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  143. data/lib/nokogiri/html4/element_description.rb +25 -0
  144. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  145. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  146. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  147. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  148. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  149. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  150. data/lib/nokogiri/html4.rb +42 -0
  151. data/lib/nokogiri/html5/builder.rb +40 -0
  152. data/lib/nokogiri/html5/document.rb +199 -0
  153. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  154. data/lib/nokogiri/html5/node.rb +103 -0
  155. data/lib/nokogiri/html5.rb +368 -0
  156. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  157. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  158. data/lib/nokogiri/syntax_error.rb +6 -0
  159. data/lib/nokogiri/version/constant.rb +6 -0
  160. data/lib/nokogiri/version/info.rb +224 -0
  161. data/lib/nokogiri/version.rb +4 -0
  162. data/lib/nokogiri/xml/attr.rb +66 -0
  163. data/lib/nokogiri/xml/attribute_decl.rb +22 -0
  164. data/lib/nokogiri/xml/builder.rb +494 -0
  165. data/lib/nokogiri/xml/cdata.rb +13 -0
  166. data/lib/nokogiri/xml/character_data.rb +9 -0
  167. data/lib/nokogiri/xml/document.rb +514 -0
  168. data/lib/nokogiri/xml/document_fragment.rb +276 -0
  169. data/lib/nokogiri/xml/dtd.rb +34 -0
  170. data/lib/nokogiri/xml/element_content.rb +46 -0
  171. data/lib/nokogiri/xml/element_decl.rb +17 -0
  172. data/lib/nokogiri/xml/entity_decl.rb +23 -0
  173. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  174. data/lib/nokogiri/xml/namespace.rb +57 -0
  175. data/lib/nokogiri/xml/node/save_options.rb +76 -0
  176. data/lib/nokogiri/xml/node.rb +1650 -0
  177. data/lib/nokogiri/xml/node_set.rb +449 -0
  178. data/lib/nokogiri/xml/notation.rb +19 -0
  179. data/lib/nokogiri/xml/parse_options.rb +213 -0
  180. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  181. data/lib/nokogiri/xml/pp/node.rb +73 -0
  182. data/lib/nokogiri/xml/pp.rb +4 -0
  183. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  184. data/lib/nokogiri/xml/reader.rb +139 -0
  185. data/lib/nokogiri/xml/relax_ng.rb +75 -0
  186. data/lib/nokogiri/xml/sax/document.rb +258 -0
  187. data/lib/nokogiri/xml/sax/parser.rb +199 -0
  188. data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
  189. data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
  190. data/lib/nokogiri/xml/sax.rb +54 -0
  191. data/lib/nokogiri/xml/schema.rb +140 -0
  192. data/lib/nokogiri/xml/searchable.rb +297 -0
  193. data/lib/nokogiri/xml/syntax_error.rb +94 -0
  194. data/lib/nokogiri/xml/text.rb +11 -0
  195. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  196. data/lib/nokogiri/xml/xpath.rb +21 -0
  197. data/lib/nokogiri/xml/xpath_context.rb +49 -0
  198. data/lib/nokogiri/xml.rb +65 -0
  199. data/lib/nokogiri/xslt/stylesheet.rb +49 -0
  200. data/lib/nokogiri/xslt.rb +129 -0
  201. data/lib/nokogiri.rb +128 -0
  202. data/lib/xsd/xmlparser/nokogiri.rb +105 -0
  203. metadata +324 -0
@@ -0,0 +1,777 @@
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriXmlReader;
4
+
5
+ static void
6
+ xml_reader_deallocate(void *data)
7
+ {
8
+ // free the document separately because we _may_ have triggered preservation by calling
9
+ // xmlTextReaderCurrentDoc during a read_more.
10
+ xmlTextReaderPtr reader = data;
11
+ xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
12
+ xmlFreeTextReader(reader);
13
+ if (doc) {
14
+ xmlFreeDoc(doc);
15
+ }
16
+ }
17
+
18
+ static const rb_data_type_t xml_text_reader_type = {
19
+ .wrap_struct_name = "xmlTextReader",
20
+ .function = {
21
+ .dfree = xml_reader_deallocate,
22
+ },
23
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
24
+ };
25
+
26
+ static int
27
+ has_attributes(xmlTextReaderPtr reader)
28
+ {
29
+ /*
30
+ * this implementation of xmlTextReaderHasAttributes explicitly includes
31
+ * namespaces and properties, because some earlier versions ignore
32
+ * namespaces.
33
+ */
34
+ xmlNodePtr node ;
35
+ node = xmlTextReaderCurrentNode(reader);
36
+ if (node == NULL) {
37
+ return (0);
38
+ }
39
+
40
+ if ((node->type == XML_ELEMENT_NODE) &&
41
+ ((node->properties != NULL) || (node->nsDef != NULL))) {
42
+ return (1);
43
+ }
44
+ return (0);
45
+ }
46
+
47
+ // TODO: merge this function into the `namespaces` method implementation
48
+ static void
49
+ Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
50
+ {
51
+ xmlNsPtr ns;
52
+ VALUE key;
53
+
54
+ if (node->type != XML_ELEMENT_NODE) { return ; }
55
+
56
+ ns = node->nsDef;
57
+ while (ns != NULL) {
58
+
59
+ key = rb_enc_str_new_cstr(XMLNS_PREFIX, rb_utf8_encoding());
60
+ if (ns->prefix) {
61
+ rb_str_cat_cstr(key, ":");
62
+ rb_str_cat_cstr(key, (const char *)ns->prefix);
63
+ }
64
+
65
+ key = rb_str_conv_enc(key, rb_utf8_encoding(), rb_default_internal_encoding());
66
+ rb_hash_aset(attr_hash,
67
+ key,
68
+ (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
69
+ );
70
+ ns = ns->next ;
71
+ }
72
+ }
73
+
74
+
75
+ /*
76
+ * call-seq:
77
+ * default?
78
+ *
79
+ * Was an attribute generated from the default value in the DTD or schema?
80
+ */
81
+ static VALUE
82
+ default_eh(VALUE self)
83
+ {
84
+ xmlTextReaderPtr reader;
85
+ int eh;
86
+
87
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
88
+ eh = xmlTextReaderIsDefault(reader);
89
+ if (eh == 0) { return Qfalse; }
90
+ if (eh == 1) { return Qtrue; }
91
+
92
+ return Qnil;
93
+ }
94
+
95
+ /*
96
+ * call-seq:
97
+ * value?
98
+ *
99
+ * Does this node have a text value?
100
+ */
101
+ static VALUE
102
+ value_eh(VALUE self)
103
+ {
104
+ xmlTextReaderPtr reader;
105
+ int eh;
106
+
107
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
108
+ eh = xmlTextReaderHasValue(reader);
109
+ if (eh == 0) { return Qfalse; }
110
+ if (eh == 1) { return Qtrue; }
111
+
112
+ return Qnil;
113
+ }
114
+
115
+ /*
116
+ * call-seq:
117
+ * attributes?
118
+ *
119
+ * Does this node have attributes?
120
+ */
121
+ static VALUE
122
+ attributes_eh(VALUE self)
123
+ {
124
+ xmlTextReaderPtr reader;
125
+ int eh;
126
+
127
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
128
+ eh = has_attributes(reader);
129
+ if (eh == 0) { return Qfalse; }
130
+ if (eh == 1) { return Qtrue; }
131
+
132
+ return Qnil;
133
+ }
134
+
135
+ /*
136
+ * call-seq:
137
+ * namespaces
138
+ *
139
+ * Get a hash of namespaces for this Node
140
+ */
141
+ static VALUE
142
+ rb_xml_reader_namespaces(VALUE rb_reader)
143
+ {
144
+ VALUE rb_namespaces = rb_hash_new() ;
145
+ xmlTextReaderPtr c_reader;
146
+ xmlNodePtr c_node;
147
+ VALUE rb_errors;
148
+
149
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
150
+
151
+ if (! has_attributes(c_reader)) {
152
+ return rb_namespaces ;
153
+ }
154
+
155
+ rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
156
+
157
+ xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
158
+ c_node = xmlTextReaderExpand(c_reader);
159
+ xmlSetStructuredErrorFunc(NULL, NULL);
160
+
161
+ if (c_node == NULL) {
162
+ if (RARRAY_LEN(rb_errors) > 0) {
163
+ VALUE rb_error = rb_ary_entry(rb_errors, 0);
164
+ VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
165
+ rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
166
+ }
167
+ return Qnil;
168
+ }
169
+
170
+ Nokogiri_xml_node_namespaces(c_node, rb_namespaces);
171
+
172
+ return rb_namespaces ;
173
+ }
174
+
175
+ /*
176
+ :call-seq: attribute_hash() → Hash<String ⇒ String>
177
+
178
+ Get the attributes of the current node as a Hash of names and values.
179
+
180
+ See related: #attributes and #namespaces
181
+ */
182
+ static VALUE
183
+ rb_xml_reader_attribute_hash(VALUE rb_reader)
184
+ {
185
+ VALUE rb_attributes = rb_hash_new();
186
+ xmlTextReaderPtr c_reader;
187
+ xmlNodePtr c_node;
188
+ xmlAttrPtr c_property;
189
+ VALUE rb_errors;
190
+
191
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
192
+
193
+ if (!has_attributes(c_reader)) {
194
+ return rb_attributes;
195
+ }
196
+
197
+ rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
198
+
199
+ xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
200
+ c_node = xmlTextReaderExpand(c_reader);
201
+ xmlSetStructuredErrorFunc(NULL, NULL);
202
+
203
+ if (c_node == NULL) {
204
+ if (RARRAY_LEN(rb_errors) > 0) {
205
+ VALUE rb_error = rb_ary_entry(rb_errors, 0);
206
+ VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
207
+ rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
208
+ }
209
+ return Qnil;
210
+ }
211
+
212
+ c_property = c_node->properties;
213
+ while (c_property != NULL) {
214
+ VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name);
215
+ VALUE rb_value = Qnil;
216
+ xmlChar *c_value = xmlNodeGetContent((xmlNode *)c_property);
217
+
218
+ if (c_value) {
219
+ rb_value = NOKOGIRI_STR_NEW2(c_value);
220
+ xmlFree(c_value);
221
+ }
222
+
223
+ rb_hash_aset(rb_attributes, rb_name, rb_value);
224
+
225
+ c_property = c_property->next;
226
+ }
227
+
228
+ return rb_attributes;
229
+ }
230
+
231
+ /*
232
+ * call-seq:
233
+ * attribute_at(index)
234
+ *
235
+ * Get the value of attribute at +index+
236
+ */
237
+ static VALUE
238
+ attribute_at(VALUE self, VALUE index)
239
+ {
240
+ xmlTextReaderPtr reader;
241
+ xmlChar *value;
242
+ VALUE rb_value;
243
+
244
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
245
+
246
+ if (NIL_P(index)) { return Qnil; }
247
+ index = rb_Integer(index);
248
+
249
+ value = xmlTextReaderGetAttributeNo(
250
+ reader,
251
+ (int)NUM2INT(index)
252
+ );
253
+ if (value == NULL) { return Qnil; }
254
+
255
+ rb_value = NOKOGIRI_STR_NEW2(value);
256
+ xmlFree(value);
257
+ return rb_value;
258
+ }
259
+
260
+ /*
261
+ * call-seq:
262
+ * attribute(name)
263
+ *
264
+ * Get the value of attribute named +name+
265
+ */
266
+ static VALUE
267
+ reader_attribute(VALUE self, VALUE name)
268
+ {
269
+ xmlTextReaderPtr reader;
270
+ xmlChar *value ;
271
+ VALUE rb_value;
272
+
273
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
274
+
275
+ if (NIL_P(name)) { return Qnil; }
276
+ name = StringValue(name) ;
277
+
278
+ value = xmlTextReaderGetAttribute(reader, (xmlChar *)StringValueCStr(name));
279
+ if (value == NULL) { return Qnil; }
280
+
281
+ rb_value = NOKOGIRI_STR_NEW2(value);
282
+ xmlFree(value);
283
+ return rb_value;
284
+ }
285
+
286
+ /*
287
+ * call-seq:
288
+ * attribute_count
289
+ *
290
+ * Get the number of attributes for the current node
291
+ */
292
+ static VALUE
293
+ attribute_count(VALUE self)
294
+ {
295
+ xmlTextReaderPtr reader;
296
+ int count;
297
+
298
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
299
+ count = xmlTextReaderAttributeCount(reader);
300
+ if (count == -1) { return Qnil; }
301
+
302
+ return INT2NUM(count);
303
+ }
304
+
305
+ /*
306
+ * call-seq:
307
+ * depth
308
+ *
309
+ * Get the depth of the node
310
+ */
311
+ static VALUE
312
+ depth(VALUE self)
313
+ {
314
+ xmlTextReaderPtr reader;
315
+ int depth;
316
+
317
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
318
+ depth = xmlTextReaderDepth(reader);
319
+ if (depth == -1) { return Qnil; }
320
+
321
+ return INT2NUM(depth);
322
+ }
323
+
324
+ /*
325
+ * call-seq:
326
+ * xml_version
327
+ *
328
+ * Get the XML version of the document being read
329
+ */
330
+ static VALUE
331
+ xml_version(VALUE self)
332
+ {
333
+ xmlTextReaderPtr reader;
334
+ const char *version;
335
+
336
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
337
+ version = (const char *)xmlTextReaderConstXmlVersion(reader);
338
+ if (version == NULL) { return Qnil; }
339
+
340
+ return NOKOGIRI_STR_NEW2(version);
341
+ }
342
+
343
+ /*
344
+ * call-seq:
345
+ * lang
346
+ *
347
+ * Get the xml:lang scope within which the node resides.
348
+ */
349
+ static VALUE
350
+ lang(VALUE self)
351
+ {
352
+ xmlTextReaderPtr reader;
353
+ const char *lang;
354
+
355
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
356
+ lang = (const char *)xmlTextReaderConstXmlLang(reader);
357
+ if (lang == NULL) { return Qnil; }
358
+
359
+ return NOKOGIRI_STR_NEW2(lang);
360
+ }
361
+
362
+ /*
363
+ * call-seq:
364
+ * value
365
+ *
366
+ * Get the text value of the node if present. Returns a utf-8 encoded string.
367
+ */
368
+ static VALUE
369
+ value(VALUE self)
370
+ {
371
+ xmlTextReaderPtr reader;
372
+ const char *value;
373
+
374
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
375
+ value = (const char *)xmlTextReaderConstValue(reader);
376
+ if (value == NULL) { return Qnil; }
377
+
378
+ return NOKOGIRI_STR_NEW2(value);
379
+ }
380
+
381
+ /*
382
+ * call-seq:
383
+ * prefix
384
+ *
385
+ * Get the shorthand reference to the namespace associated with the node.
386
+ */
387
+ static VALUE
388
+ prefix(VALUE self)
389
+ {
390
+ xmlTextReaderPtr reader;
391
+ const char *prefix;
392
+
393
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
394
+ prefix = (const char *)xmlTextReaderConstPrefix(reader);
395
+ if (prefix == NULL) { return Qnil; }
396
+
397
+ return NOKOGIRI_STR_NEW2(prefix);
398
+ }
399
+
400
+ /*
401
+ * call-seq:
402
+ * namespace_uri
403
+ *
404
+ * Get the URI defining the namespace associated with the node
405
+ */
406
+ static VALUE
407
+ namespace_uri(VALUE self)
408
+ {
409
+ xmlTextReaderPtr reader;
410
+ const char *uri;
411
+
412
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
413
+ uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
414
+ if (uri == NULL) { return Qnil; }
415
+
416
+ return NOKOGIRI_STR_NEW2(uri);
417
+ }
418
+
419
+ /*
420
+ * call-seq:
421
+ * local_name
422
+ *
423
+ * Get the local name of the node
424
+ */
425
+ static VALUE
426
+ local_name(VALUE self)
427
+ {
428
+ xmlTextReaderPtr reader;
429
+ const char *name;
430
+
431
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
432
+ name = (const char *)xmlTextReaderConstLocalName(reader);
433
+ if (name == NULL) { return Qnil; }
434
+
435
+ return NOKOGIRI_STR_NEW2(name);
436
+ }
437
+
438
+ /*
439
+ * call-seq:
440
+ * name
441
+ *
442
+ * Get the name of the node. Returns a utf-8 encoded string.
443
+ */
444
+ static VALUE
445
+ name(VALUE self)
446
+ {
447
+ xmlTextReaderPtr reader;
448
+ const char *name;
449
+
450
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
451
+ name = (const char *)xmlTextReaderConstName(reader);
452
+ if (name == NULL) { return Qnil; }
453
+
454
+ return NOKOGIRI_STR_NEW2(name);
455
+ }
456
+
457
+ /*
458
+ * call-seq:
459
+ * base_uri
460
+ *
461
+ * Get the xml:base of the node
462
+ */
463
+ static VALUE
464
+ rb_xml_reader_base_uri(VALUE rb_reader)
465
+ {
466
+ VALUE rb_base_uri;
467
+ xmlTextReaderPtr c_reader;
468
+ xmlChar *c_base_uri;
469
+
470
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
471
+
472
+ c_base_uri = xmlTextReaderBaseUri(c_reader);
473
+ if (c_base_uri == NULL) {
474
+ return Qnil;
475
+ }
476
+
477
+ rb_base_uri = NOKOGIRI_STR_NEW2(c_base_uri);
478
+ xmlFree(c_base_uri);
479
+
480
+ return rb_base_uri;
481
+ }
482
+
483
+ /*
484
+ * call-seq:
485
+ * state
486
+ *
487
+ * Get the state of the reader
488
+ */
489
+ static VALUE
490
+ state(VALUE self)
491
+ {
492
+ xmlTextReaderPtr reader;
493
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
494
+ return INT2NUM(xmlTextReaderReadState(reader));
495
+ }
496
+
497
+ /*
498
+ * call-seq:
499
+ * node_type
500
+ *
501
+ * Get the type of readers current node
502
+ */
503
+ static VALUE
504
+ node_type(VALUE self)
505
+ {
506
+ xmlTextReaderPtr reader;
507
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
508
+ return INT2NUM(xmlTextReaderNodeType(reader));
509
+ }
510
+
511
+ /*
512
+ * call-seq:
513
+ * read
514
+ *
515
+ * Move the Reader forward through the XML document.
516
+ */
517
+ static VALUE
518
+ read_more(VALUE rb_reader)
519
+ {
520
+ xmlTextReaderPtr c_reader;
521
+ libxmlStructuredErrorHandlerState handler_state;
522
+
523
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
524
+
525
+ VALUE rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
526
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
527
+
528
+ int status = xmlTextReaderRead(c_reader);
529
+
530
+ noko__structured_error_func_restore(&handler_state);
531
+
532
+ xmlDocPtr c_document = xmlTextReaderCurrentDoc(c_reader);
533
+ if (c_document && c_document->encoding == NULL) {
534
+ VALUE constructor_encoding = rb_iv_get(rb_reader, "@encoding");
535
+ if (RTEST(constructor_encoding)) {
536
+ c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
537
+ } else {
538
+ rb_iv_set(rb_reader, "@encoding", NOKOGIRI_STR_NEW2("UTF-8"));
539
+ c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
540
+ }
541
+ }
542
+
543
+ if (status == 1) { return rb_reader; }
544
+ if (status == 0) { return Qnil; }
545
+
546
+ /* if we're here, there was an error */
547
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
548
+ if (RB_TEST(exception)) {
549
+ rb_exc_raise(exception);
550
+ } else {
551
+ rb_raise(rb_eRuntimeError, "Error pulling: %d", status);
552
+ }
553
+ }
554
+
555
+ /*
556
+ * call-seq:
557
+ * inner_xml
558
+ *
559
+ * Read the contents of the current node, including child nodes and markup.
560
+ * Returns a utf-8 encoded string.
561
+ */
562
+ static VALUE
563
+ inner_xml(VALUE self)
564
+ {
565
+ xmlTextReaderPtr reader;
566
+ xmlChar *value;
567
+ VALUE str;
568
+
569
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
570
+
571
+ value = xmlTextReaderReadInnerXml(reader);
572
+
573
+ str = Qnil;
574
+ if (value) {
575
+ str = NOKOGIRI_STR_NEW2((char *)value);
576
+ xmlFree(value);
577
+ }
578
+
579
+ return str;
580
+ }
581
+
582
+ /*
583
+ * call-seq:
584
+ * outer_xml
585
+ *
586
+ * Read the current node and its contents, including child nodes and markup.
587
+ * Returns a utf-8 encoded string.
588
+ */
589
+ static VALUE
590
+ outer_xml(VALUE self)
591
+ {
592
+ xmlTextReaderPtr reader;
593
+ xmlChar *value;
594
+ VALUE str = Qnil;
595
+
596
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
597
+
598
+ value = xmlTextReaderReadOuterXml(reader);
599
+
600
+ if (value) {
601
+ str = NOKOGIRI_STR_NEW2((char *)value);
602
+ xmlFree(value);
603
+ }
604
+ return str;
605
+ }
606
+
607
+ /*
608
+ * call-seq:
609
+ * from_memory(string, url = nil, encoding = nil, options = 0)
610
+ *
611
+ * Create a new Reader to parse a String.
612
+ */
613
+ static VALUE
614
+ from_memory(int argc, VALUE *argv, VALUE klass)
615
+ {
616
+ /* TODO: deprecate this method, since Reader.new can handle both memory and IO. It can then
617
+ * become private. */
618
+ VALUE rb_buffer, rb_url, encoding, rb_options;
619
+ xmlTextReaderPtr reader;
620
+ const char *c_url = NULL;
621
+ const char *c_encoding = NULL;
622
+ int c_options = 0;
623
+ VALUE rb_reader, args[3];
624
+
625
+ rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options);
626
+
627
+ if (!RTEST(rb_buffer)) { rb_raise(rb_eArgError, "string cannot be nil"); }
628
+ if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); }
629
+ if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); }
630
+ if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); }
631
+
632
+ reader = xmlReaderForMemory(
633
+ StringValuePtr(rb_buffer),
634
+ (int)RSTRING_LEN(rb_buffer),
635
+ c_url,
636
+ c_encoding,
637
+ c_options
638
+ );
639
+
640
+ if (reader == NULL) {
641
+ xmlFreeTextReader(reader);
642
+ rb_raise(rb_eRuntimeError, "couldn't create a parser");
643
+ }
644
+
645
+ rb_reader = TypedData_Wrap_Struct(klass, &xml_text_reader_type, reader);
646
+ args[0] = rb_buffer;
647
+ args[1] = rb_url;
648
+ args[2] = encoding;
649
+ rb_obj_call_init(rb_reader, 3, args);
650
+
651
+ return rb_reader;
652
+ }
653
+
654
+ /*
655
+ * call-seq:
656
+ * from_io(io, url = nil, encoding = nil, options = 0)
657
+ *
658
+ * Create a new Reader to parse an IO stream.
659
+ */
660
+ static VALUE
661
+ from_io(int argc, VALUE *argv, VALUE klass)
662
+ {
663
+ /* TODO: deprecate this method, since Reader.new can handle both memory and IO. It can then
664
+ * become private. */
665
+ VALUE rb_io, rb_url, encoding, rb_options;
666
+ xmlTextReaderPtr reader;
667
+ const char *c_url = NULL;
668
+ const char *c_encoding = NULL;
669
+ int c_options = 0;
670
+ VALUE rb_reader, args[3];
671
+
672
+ rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options);
673
+
674
+ if (!RTEST(rb_io)) { rb_raise(rb_eArgError, "io cannot be nil"); }
675
+ if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); }
676
+ if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); }
677
+ if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); }
678
+
679
+ reader = xmlReaderForIO(
680
+ (xmlInputReadCallback)noko_io_read,
681
+ (xmlInputCloseCallback)noko_io_close,
682
+ (void *)rb_io,
683
+ c_url,
684
+ c_encoding,
685
+ c_options
686
+ );
687
+
688
+ if (reader == NULL) {
689
+ xmlFreeTextReader(reader);
690
+ rb_raise(rb_eRuntimeError, "couldn't create a parser");
691
+ }
692
+
693
+ rb_reader = TypedData_Wrap_Struct(klass, &xml_text_reader_type, reader);
694
+ args[0] = rb_io;
695
+ args[1] = rb_url;
696
+ args[2] = encoding;
697
+ rb_obj_call_init(rb_reader, 3, args);
698
+
699
+ return rb_reader;
700
+ }
701
+
702
+ /*
703
+ * call-seq:
704
+ * reader.empty_element? # => true or false
705
+ *
706
+ * Returns true if the current node is empty, otherwise false.
707
+ */
708
+ static VALUE
709
+ empty_element_p(VALUE self)
710
+ {
711
+ xmlTextReaderPtr reader;
712
+
713
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
714
+
715
+ if (xmlTextReaderIsEmptyElement(reader)) {
716
+ return Qtrue;
717
+ }
718
+
719
+ return Qfalse;
720
+ }
721
+
722
+ static VALUE
723
+ rb_xml_reader_encoding(VALUE rb_reader)
724
+ {
725
+ xmlTextReaderPtr c_reader;
726
+ const char *parser_encoding;
727
+ VALUE constructor_encoding;
728
+
729
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
730
+ parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
731
+ if (parser_encoding) {
732
+ return NOKOGIRI_STR_NEW2(parser_encoding);
733
+ }
734
+
735
+ constructor_encoding = rb_iv_get(rb_reader, "@encoding");
736
+ if (RTEST(constructor_encoding)) {
737
+ return constructor_encoding;
738
+ }
739
+
740
+ return Qnil;
741
+ }
742
+
743
+ void
744
+ noko_init_xml_reader(void)
745
+ {
746
+ cNokogiriXmlReader = rb_define_class_under(mNokogiriXml, "Reader", rb_cObject);
747
+
748
+ rb_undef_alloc_func(cNokogiriXmlReader);
749
+
750
+ rb_define_singleton_method(cNokogiriXmlReader, "from_memory", from_memory, -1);
751
+ rb_define_singleton_method(cNokogiriXmlReader, "from_io", from_io, -1);
752
+
753
+ rb_define_method(cNokogiriXmlReader, "attribute", reader_attribute, 1);
754
+ rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
755
+ rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
756
+ rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
757
+ rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
758
+ rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
759
+ rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0);
760
+ rb_define_method(cNokogiriXmlReader, "depth", depth, 0);
761
+ rb_define_method(cNokogiriXmlReader, "empty_element?", empty_element_p, 0);
762
+ rb_define_method(cNokogiriXmlReader, "encoding", rb_xml_reader_encoding, 0);
763
+ rb_define_method(cNokogiriXmlReader, "inner_xml", inner_xml, 0);
764
+ rb_define_method(cNokogiriXmlReader, "lang", lang, 0);
765
+ rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
766
+ rb_define_method(cNokogiriXmlReader, "name", name, 0);
767
+ rb_define_method(cNokogiriXmlReader, "namespace_uri", namespace_uri, 0);
768
+ rb_define_method(cNokogiriXmlReader, "namespaces", rb_xml_reader_namespaces, 0);
769
+ rb_define_method(cNokogiriXmlReader, "node_type", node_type, 0);
770
+ rb_define_method(cNokogiriXmlReader, "outer_xml", outer_xml, 0);
771
+ rb_define_method(cNokogiriXmlReader, "prefix", prefix, 0);
772
+ rb_define_method(cNokogiriXmlReader, "read", read_more, 0);
773
+ rb_define_method(cNokogiriXmlReader, "state", state, 0);
774
+ rb_define_method(cNokogiriXmlReader, "value", value, 0);
775
+ rb_define_method(cNokogiriXmlReader, "value?", value_eh, 0);
776
+ rb_define_method(cNokogiriXmlReader, "xml_version", xml_version, 0);
777
+ }