nokogiri 1.11.0.rc2 → 1.11.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +171 -94
  6. data/ext/nokogiri/depend +37 -358
  7. data/ext/nokogiri/extconf.rb +581 -374
  8. data/ext/nokogiri/html_document.c +78 -82
  9. data/ext/nokogiri/html_element_description.c +84 -71
  10. data/ext/nokogiri/html_entity_lookup.c +21 -16
  11. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  12. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  13. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  14. data/ext/nokogiri/nokogiri.c +192 -93
  15. data/ext/nokogiri/nokogiri.h +177 -98
  16. data/ext/nokogiri/test_global_handlers.c +40 -0
  17. data/ext/nokogiri/xml_attr.c +15 -15
  18. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  19. data/ext/nokogiri/xml_cdata.c +13 -18
  20. data/ext/nokogiri/xml_comment.c +19 -26
  21. data/ext/nokogiri/xml_document.c +225 -163
  22. data/ext/nokogiri/xml_document_fragment.c +13 -15
  23. data/ext/nokogiri/xml_dtd.c +54 -48
  24. data/ext/nokogiri/xml_element_content.c +30 -27
  25. data/ext/nokogiri/xml_element_decl.c +22 -22
  26. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  27. data/ext/nokogiri/xml_entity_decl.c +32 -30
  28. data/ext/nokogiri/xml_entity_reference.c +16 -18
  29. data/ext/nokogiri/xml_namespace.c +56 -49
  30. data/ext/nokogiri/xml_node.c +338 -286
  31. data/ext/nokogiri/xml_node_set.c +168 -156
  32. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  33. data/ext/nokogiri/xml_reader.c +191 -157
  34. data/ext/nokogiri/xml_relax_ng.c +52 -28
  35. data/ext/nokogiri/xml_sax_parser.c +118 -118
  36. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  37. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  38. data/ext/nokogiri/xml_schema.c +95 -47
  39. data/ext/nokogiri/xml_syntax_error.c +42 -21
  40. data/ext/nokogiri/xml_text.c +13 -17
  41. data/ext/nokogiri/xml_xpath_context.c +206 -123
  42. data/ext/nokogiri/xslt_stylesheet.c +158 -161
  43. data/lib/nokogiri.rb +4 -8
  44. data/lib/nokogiri/css/parser.rb +62 -62
  45. data/lib/nokogiri/css/parser.y +2 -2
  46. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  47. data/lib/nokogiri/extension.rb +26 -0
  48. data/lib/nokogiri/html/document.rb +12 -26
  49. data/lib/nokogiri/html/document_fragment.rb +15 -15
  50. data/lib/nokogiri/version.rb +2 -148
  51. data/lib/nokogiri/version/constant.rb +5 -0
  52. data/lib/nokogiri/version/info.rb +205 -0
  53. data/lib/nokogiri/xml/builder.rb +2 -2
  54. data/lib/nokogiri/xml/document.rb +48 -18
  55. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  56. data/lib/nokogiri/xml/node.rb +88 -69
  57. data/lib/nokogiri/xml/parse_options.rb +6 -0
  58. data/lib/nokogiri/xml/reader.rb +2 -9
  59. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  60. data/lib/nokogiri/xml/schema.rb +12 -4
  61. data/lib/nokogiri/xml/searchable.rb +3 -1
  62. data/lib/nokogiri/xml/xpath.rb +1 -3
  63. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  64. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +73 -0
  65. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +103 -0
  66. data/patches/libxml2/0008-use-glibc-strlen.patch +53 -0
  67. data/patches/libxml2/0009-avoid-isnan-isinf.patch +81 -0
  68. data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +70 -0
  69. metadata +85 -155
  70. data/ext/nokogiri/html_document.h +0 -10
  71. data/ext/nokogiri/html_element_description.h +0 -10
  72. data/ext/nokogiri/html_entity_lookup.h +0 -8
  73. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  74. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  75. data/ext/nokogiri/xml_attr.h +0 -9
  76. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  77. data/ext/nokogiri/xml_cdata.h +0 -9
  78. data/ext/nokogiri/xml_comment.h +0 -9
  79. data/ext/nokogiri/xml_document.h +0 -23
  80. data/ext/nokogiri/xml_document_fragment.h +0 -10
  81. data/ext/nokogiri/xml_dtd.h +0 -10
  82. data/ext/nokogiri/xml_element_content.h +0 -10
  83. data/ext/nokogiri/xml_element_decl.h +0 -9
  84. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  85. data/ext/nokogiri/xml_entity_decl.h +0 -10
  86. data/ext/nokogiri/xml_entity_reference.h +0 -9
  87. data/ext/nokogiri/xml_io.c +0 -61
  88. data/ext/nokogiri/xml_io.h +0 -11
  89. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  90. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  91. data/ext/nokogiri/xml_namespace.h +0 -14
  92. data/ext/nokogiri/xml_node.h +0 -13
  93. data/ext/nokogiri/xml_node_set.h +0 -12
  94. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  95. data/ext/nokogiri/xml_reader.h +0 -10
  96. data/ext/nokogiri/xml_relax_ng.h +0 -9
  97. data/ext/nokogiri/xml_sax_parser.h +0 -39
  98. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  99. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  100. data/ext/nokogiri/xml_schema.h +0 -9
  101. data/ext/nokogiri/xml_syntax_error.h +0 -13
  102. data/ext/nokogiri/xml_text.h +0 -9
  103. data/ext/nokogiri/xml_xpath_context.h +0 -10
  104. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -1,6 +1,9 @@
1
- #include <xml_schema.h>
1
+ #include <nokogiri.h>
2
2
 
3
- static void dealloc(xmlSchemaPtr schema)
3
+ VALUE cNokogiriXmlSchema;
4
+
5
+ static void
6
+ dealloc(xmlSchemaPtr schema)
4
7
  {
5
8
  NOKOGIRI_DEBUG_START(schema);
6
9
  xmlSchemaFree(schema);
@@ -13,7 +16,8 @@ static void dealloc(xmlSchemaPtr schema)
13
16
  *
14
17
  * Validate a Nokogiri::XML::Document against this Schema.
15
18
  */
16
- static VALUE validate_document(VALUE self, VALUE document)
19
+ static VALUE
20
+ validate_document(VALUE self, VALUE document)
17
21
  {
18
22
  xmlDocPtr doc;
19
23
  xmlSchemaPtr schema;
@@ -27,7 +31,7 @@ static VALUE validate_document(VALUE self, VALUE document)
27
31
 
28
32
  valid_ctxt = xmlSchemaNewValidCtxt(schema);
29
33
 
30
- if(NULL == valid_ctxt) {
34
+ if (NULL == valid_ctxt) {
31
35
  /* we have a problem */
32
36
  rb_raise(rb_eRuntimeError, "Could not create a validation context");
33
37
  }
@@ -53,7 +57,8 @@ static VALUE validate_document(VALUE self, VALUE document)
53
57
  *
54
58
  * Validate a file against this Schema.
55
59
  */
56
- static VALUE validate_file(VALUE self, VALUE rb_filename)
60
+ static VALUE
61
+ validate_file(VALUE self, VALUE rb_filename)
57
62
  {
58
63
  xmlSchemaPtr schema;
59
64
  xmlSchemaValidCtxtPtr valid_ctxt;
@@ -61,13 +66,13 @@ static VALUE validate_file(VALUE self, VALUE rb_filename)
61
66
  VALUE errors;
62
67
 
63
68
  Data_Get_Struct(self, xmlSchema, schema);
64
- filename = (const char*)StringValueCStr(rb_filename) ;
69
+ filename = (const char *)StringValueCStr(rb_filename) ;
65
70
 
66
71
  errors = rb_ary_new();
67
72
 
68
73
  valid_ctxt = xmlSchemaNewValidCtxt(schema);
69
74
 
70
- if(NULL == valid_ctxt) {
75
+ if (NULL == valid_ctxt) {
71
76
  /* we have a problem */
72
77
  rb_raise(rb_eRuntimeError, "Could not create a validation context");
73
78
  }
@@ -93,15 +98,28 @@ static VALUE validate_file(VALUE self, VALUE rb_filename)
93
98
  *
94
99
  * Create a new Schema from the contents of +string+
95
100
  */
96
- static VALUE read_memory(VALUE klass, VALUE content)
101
+ static VALUE
102
+ read_memory(int argc, VALUE *argv, VALUE klass)
97
103
  {
104
+ VALUE content;
105
+ VALUE parse_options;
106
+ int parse_options_int;
107
+ xmlSchemaParserCtxtPtr ctx;
98
108
  xmlSchemaPtr schema;
99
- xmlSchemaParserCtxtPtr ctx = xmlSchemaNewMemParserCtxt(
100
- (const char *)StringValuePtr(content),
101
- (int)RSTRING_LEN(content)
102
- );
109
+ VALUE errors;
103
110
  VALUE rb_schema;
104
- VALUE errors = rb_ary_new();
111
+ int scanned_args = 0;
112
+ xmlExternalEntityLoader old_loader = 0;
113
+
114
+ scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
115
+ if (scanned_args == 1) {
116
+ parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
117
+ }
118
+ parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
119
+
120
+ ctx = xmlSchemaNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
121
+
122
+ errors = rb_ary_new();
105
123
  xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
106
124
 
107
125
  #ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
@@ -112,23 +130,34 @@ static VALUE read_memory(VALUE klass, VALUE content)
112
130
  );
113
131
  #endif
114
132
 
115
- schema = xmlSchemaParse(ctx);
133
+ if (parse_options_int & XML_PARSE_NONET) {
134
+ old_loader = xmlGetExternalEntityLoader();
135
+ xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
136
+ }
137
+
138
+ schema = xmlSchemaParse(ctx);
139
+
140
+ if (old_loader) {
141
+ xmlSetExternalEntityLoader(old_loader);
142
+ }
116
143
 
117
144
  xmlSetStructuredErrorFunc(NULL, NULL);
118
145
  xmlSchemaFreeParserCtxt(ctx);
119
146
 
120
- if(NULL == schema) {
147
+ if (NULL == schema) {
121
148
  xmlErrorPtr error = xmlGetLastError();
122
- if(error)
149
+ if (error) {
123
150
  Nokogiri_error_raise(NULL, error);
124
- else
151
+ } else {
125
152
  rb_raise(rb_eRuntimeError, "Could not parse document");
153
+ }
126
154
 
127
155
  return Qnil;
128
156
  }
129
157
 
130
158
  rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
131
159
  rb_iv_set(rb_schema, "@errors", errors);
160
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
132
161
 
133
162
  return rb_schema;
134
163
  }
@@ -138,24 +167,25 @@ static VALUE read_memory(VALUE klass, VALUE content)
138
167
  * out from under the VALUE pointer. This function checks to see if any of
139
168
  * those nodes have been exposed to Ruby, and if so we should raise an exception.
140
169
  */
141
- static int has_blank_nodes_p(VALUE cache)
170
+ static int
171
+ has_blank_nodes_p(VALUE cache)
142
172
  {
143
- long i;
173
+ long i;
144
174
 
145
- if (NIL_P(cache)) {
146
- return 0;
147
- }
175
+ if (NIL_P(cache)) {
176
+ return 0;
177
+ }
148
178
 
149
- for (i = 0; i < RARRAY_LEN(cache); i++) {
150
- xmlNodePtr node;
151
- VALUE element = rb_ary_entry(cache, i);
152
- Data_Get_Struct(element, xmlNode, node);
153
- if (xmlIsBlankNode(node)) {
154
- return 1;
155
- }
179
+ for (i = 0; i < RARRAY_LEN(cache); i++) {
180
+ xmlNodePtr node;
181
+ VALUE element = rb_ary_entry(cache, i);
182
+ Data_Get_Struct(element, xmlNode, node);
183
+ if (xmlIsBlankNode(node)) {
184
+ return 1;
156
185
  }
186
+ }
157
187
 
158
- return 0;
188
+ return 0;
159
189
  }
160
190
 
161
191
  /*
@@ -164,18 +194,29 @@ static int has_blank_nodes_p(VALUE cache)
164
194
  *
165
195
  * Create a new Schema from the Nokogiri::XML::Document +doc+
166
196
  */
167
- static VALUE from_document(VALUE klass, VALUE document)
197
+ static VALUE
198
+ from_document(int argc, VALUE *argv, VALUE klass)
168
199
  {
200
+ VALUE document;
201
+ VALUE parse_options;
202
+ int parse_options_int;
169
203
  xmlDocPtr doc;
170
204
  xmlSchemaParserCtxtPtr ctx;
171
205
  xmlSchemaPtr schema;
172
206
  VALUE errors;
173
207
  VALUE rb_schema;
208
+ int scanned_args = 0;
209
+ xmlExternalEntityLoader old_loader = 0;
210
+
211
+ scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
174
212
 
175
213
  Data_Get_Struct(document, xmlDoc, doc);
214
+ doc = doc->doc; /* In case someone passes us a node. ugh. */
176
215
 
177
- /* In case someone passes us a node. ugh. */
178
- doc = doc->doc;
216
+ if (scanned_args == 1) {
217
+ parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
218
+ }
219
+ parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
179
220
 
180
221
  if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) {
181
222
  rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous");
@@ -194,41 +235,48 @@ static VALUE from_document(VALUE klass, VALUE document)
194
235
  );
195
236
  #endif
196
237
 
238
+ if (parse_options_int & XML_PARSE_NONET) {
239
+ old_loader = xmlGetExternalEntityLoader();
240
+ xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
241
+ }
242
+
197
243
  schema = xmlSchemaParse(ctx);
198
244
 
245
+ if (old_loader) {
246
+ xmlSetExternalEntityLoader(old_loader);
247
+ }
248
+
199
249
  xmlSetStructuredErrorFunc(NULL, NULL);
200
250
  xmlSchemaFreeParserCtxt(ctx);
201
251
 
202
- if(NULL == schema) {
252
+ if (NULL == schema) {
203
253
  xmlErrorPtr error = xmlGetLastError();
204
- if(error)
254
+ if (error) {
205
255
  Nokogiri_error_raise(NULL, error);
206
- else
256
+ } else {
207
257
  rb_raise(rb_eRuntimeError, "Could not parse document");
258
+ }
208
259
 
209
260
  return Qnil;
210
261
  }
211
262
 
212
263
  rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
213
264
  rb_iv_set(rb_schema, "@errors", errors);
265
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
214
266
 
215
267
  return rb_schema;
216
268
 
217
269
  return Qnil;
218
270
  }
219
271
 
220
- VALUE cNokogiriXmlSchema;
221
- void init_xml_schema()
272
+ void
273
+ noko_init_xml_schema()
222
274
  {
223
- VALUE nokogiri = rb_define_module("Nokogiri");
224
- VALUE xml = rb_define_module_under(nokogiri, "XML");
225
- VALUE klass = rb_define_class_under(xml, "Schema", rb_cObject);
226
-
227
- cNokogiriXmlSchema = klass;
275
+ cNokogiriXmlSchema = rb_define_class_under(mNokogiriXml, "Schema", rb_cObject);
228
276
 
229
- rb_define_singleton_method(klass, "read_memory", read_memory, 1);
230
- rb_define_singleton_method(klass, "from_document", from_document, 1);
277
+ rb_define_singleton_method(cNokogiriXmlSchema, "read_memory", read_memory, -1);
278
+ rb_define_singleton_method(cNokogiriXmlSchema, "from_document", from_document, -1);
231
279
 
232
- rb_define_private_method(klass, "validate_document", validate_document, 1);
233
- rb_define_private_method(klass, "validate_file", validate_file, 1);
280
+ rb_define_private_method(cNokogiriXmlSchema, "validate_document", validate_document, 1);
281
+ rb_define_private_method(cNokogiriXmlSchema, "validate_file", validate_file, 1);
234
282
  }
@@ -1,38 +1,64 @@
1
- #include <xml_syntax_error.h>
1
+ #include <nokogiri.h>
2
2
 
3
- void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error)
3
+ VALUE cNokogiriXmlSyntaxError;
4
+
5
+ void
6
+ Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state)
7
+ {
8
+ /* this method is tightly coupled to the implementation of xmlSetStructuredErrorFunc */
9
+ handler_state->user_data = xmlStructuredErrorContext;
10
+ handler_state->handler = xmlStructuredError;
11
+ }
12
+
13
+ void
14
+ Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state,
15
+ void *user_data,
16
+ xmlStructuredErrorFunc handler)
17
+ {
18
+ Nokogiri_structured_error_func_save(handler_state);
19
+ xmlSetStructuredErrorFunc(user_data, handler);
20
+ }
21
+
22
+ void
23
+ Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state)
24
+ {
25
+ xmlSetStructuredErrorFunc(handler_state->user_data, handler_state->handler);
26
+ }
27
+
28
+ void
29
+ Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error)
4
30
  {
5
31
  VALUE list = (VALUE)ctx;
6
32
  Check_Type(list, T_ARRAY);
7
33
  rb_ary_push(list, Nokogiri_wrap_xml_syntax_error(error));
8
34
  }
9
35
 
10
- void Nokogiri_error_raise(void * ctx, xmlErrorPtr error)
36
+ void
37
+ Nokogiri_error_raise(void *ctx, xmlErrorPtr error)
11
38
  {
12
39
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
13
40
  }
14
41
 
15
- VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error)
42
+ VALUE
43
+ Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error)
16
44
  {
17
45
  VALUE msg, e, klass;
18
46
 
19
47
  klass = cNokogiriXmlSyntaxError;
20
48
 
21
49
  if (error && error->domain == XML_FROM_XPATH) {
22
- VALUE xpath = rb_const_get(mNokogiriXml, rb_intern("XPath"));
23
- klass = rb_const_get(xpath, rb_intern("SyntaxError"));
50
+ klass = cNokogiriXmlXpathSyntaxError;
24
51
  }
25
52
 
26
53
  msg = (error && error->message) ? NOKOGIRI_STR_NEW2(error->message) : Qnil;
27
54
 
28
55
  e = rb_class_new_instance(
29
- 1,
30
- &msg,
31
- klass
32
- );
56
+ 1,
57
+ &msg,
58
+ klass
59
+ );
33
60
 
34
- if (error)
35
- {
61
+ if (error) {
36
62
  rb_iv_set(e, "@domain", INT2NUM(error->domain));
37
63
  rb_iv_set(e, "@code", INT2NUM(error->code));
38
64
  rb_iv_set(e, "@level", INT2NUM((short)error->level));
@@ -48,17 +74,12 @@ VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error)
48
74
  return e;
49
75
  }
50
76
 
51
- VALUE cNokogiriXmlSyntaxError;
52
- void init_xml_syntax_error()
77
+ void
78
+ noko_init_xml_syntax_error()
53
79
  {
54
- VALUE nokogiri = rb_define_module("Nokogiri");
55
- VALUE xml = rb_define_module_under(nokogiri, "XML");
56
-
80
+ assert(cNokogiriSyntaxError);
57
81
  /*
58
82
  * The XML::SyntaxError is raised on parse errors
59
83
  */
60
- VALUE syntax_error_mommy = rb_define_class_under(nokogiri, "SyntaxError", rb_eStandardError);
61
- VALUE klass = rb_define_class_under(xml, "SyntaxError", syntax_error_mommy);
62
- cNokogiriXmlSyntaxError = klass;
63
-
84
+ cNokogiriXmlSyntaxError = rb_define_class_under(mNokogiriXml, "SyntaxError", cNokogiriSyntaxError);
64
85
  }
@@ -1,4 +1,6 @@
1
- #include <xml_text.h>
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriXmlText ;
2
4
 
3
5
  /*
4
6
  * call-seq:
@@ -6,7 +8,8 @@
6
8
  *
7
9
  * Create a new Text element on the +document+ with +content+
8
10
  */
9
- static VALUE new(int argc, VALUE *argv, VALUE klass)
11
+ static VALUE
12
+ new (int argc, VALUE *argv, VALUE klass)
10
13
  {
11
14
  xmlDocPtr doc;
12
15
  xmlNodePtr node;
@@ -22,31 +25,24 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
22
25
  node = xmlNewText((xmlChar *)StringValueCStr(string));
23
26
  node->doc = doc->doc;
24
27
 
25
- nokogiri_root_node(node);
28
+ noko_xml_document_pin_node(node);
26
29
 
27
- rb_node = Nokogiri_wrap_xml_node(klass, node) ;
30
+ rb_node = noko_xml_node_wrap(klass, node) ;
28
31
  rb_obj_call_init(rb_node, argc, argv);
29
32
 
30
- if(rb_block_given_p()) rb_yield(rb_node);
33
+ if (rb_block_given_p()) { rb_yield(rb_node); }
31
34
 
32
35
  return rb_node;
33
36
  }
34
37
 
35
- VALUE cNokogiriXmlText ;
36
- void init_xml_text()
38
+ void
39
+ noko_init_xml_text()
37
40
  {
38
- VALUE nokogiri = rb_define_module("Nokogiri");
39
- VALUE xml = rb_define_module_under(nokogiri, "XML");
40
- /* */
41
- VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
42
- VALUE char_data = rb_define_class_under(xml, "CharacterData", node);
43
-
41
+ assert(cNokogiriXmlCharacterData);
44
42
  /*
45
43
  * Wraps Text nodes.
46
44
  */
47
- VALUE klass = rb_define_class_under(xml, "Text", char_data);
48
-
49
- cNokogiriXmlText = klass;
45
+ cNokogiriXmlText = rb_define_class_under(mNokogiriXml, "Text", cNokogiriXmlCharacterData);
50
46
 
51
- rb_define_singleton_method(klass, "new", new, -1);
47
+ rb_define_singleton_method(cNokogiriXmlText, "new", new, -1);
52
48
  }
@@ -1,29 +1,107 @@
1
- #include <xml_xpath_context.h>
1
+ #include <nokogiri.h>
2
2
 
3
- int vasprintf (char **strp, const char *fmt, va_list ap);
3
+ VALUE cNokogiriXmlXpathContext;
4
4
 
5
- static void deallocate(xmlXPathContextPtr ctx)
5
+ /*
6
+ * these constants have matching declarations in
7
+ * ext/java/nokogiri/internals/NokogiriNamespaceContext.java
8
+ */
9
+ static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-builtin";
10
+ static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins";
11
+
12
+ static void
13
+ deallocate(xmlXPathContextPtr ctx)
6
14
  {
7
15
  NOKOGIRI_DEBUG_START(ctx);
8
16
  xmlXPathFreeContext(ctx);
9
17
  NOKOGIRI_DEBUG_END(ctx);
10
18
  }
11
19
 
20
+ /* find a CSS class in an HTML element's `class` attribute */
21
+ static const xmlChar *
22
+ builtin_css_class(const xmlChar *str, const xmlChar *val)
23
+ {
24
+ int val_len;
25
+
26
+ if (str == NULL) { return (NULL); }
27
+ if (val == NULL) { return (NULL); }
28
+
29
+ val_len = xmlStrlen(val);
30
+ if (val_len == 0) { return (str); }
31
+
32
+ while (*str != 0) {
33
+ if ((*str == *val) && !xmlStrncmp(str, val, val_len)) {
34
+ const xmlChar *next_byte = str + val_len;
35
+
36
+ /* only match if the next byte is whitespace or end of string */
37
+ if ((*next_byte == 0) || (IS_BLANK_CH(*next_byte))) {
38
+ return ((const xmlChar *)str);
39
+ }
40
+ }
41
+
42
+ /* advance str to whitespace */
43
+ while ((*str != 0) && !IS_BLANK_CH(*str)) {
44
+ str++;
45
+ }
46
+
47
+ /* advance str to start of next word or end of string */
48
+ while ((*str != 0) && IS_BLANK_CH(*str)) {
49
+ str++;
50
+ }
51
+ }
52
+
53
+ return (NULL);
54
+ }
55
+
56
+ /* xmlXPathFunction to wrap builtin_css_class() */
57
+ static void
58
+ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
59
+ {
60
+ xmlXPathObjectPtr hay, needle;
61
+
62
+ CHECK_ARITY(2);
63
+
64
+ CAST_TO_STRING;
65
+ needle = valuePop(ctxt);
66
+ if ((needle == NULL) || (needle->type != XPATH_STRING)) {
67
+ xmlXPathFreeObject(needle);
68
+ XP_ERROR(XPATH_INVALID_TYPE);
69
+ }
70
+
71
+ CAST_TO_STRING;
72
+ hay = valuePop(ctxt);
73
+ if ((hay == NULL) || (hay->type != XPATH_STRING)) {
74
+ xmlXPathFreeObject(hay);
75
+ xmlXPathFreeObject(needle);
76
+ XP_ERROR(XPATH_INVALID_TYPE);
77
+ }
78
+
79
+ if (builtin_css_class(hay->stringval, needle->stringval)) {
80
+ valuePush(ctxt, xmlXPathNewBoolean(1));
81
+ } else {
82
+ valuePush(ctxt, xmlXPathNewBoolean(0));
83
+ }
84
+
85
+ xmlXPathFreeObject(hay);
86
+ xmlXPathFreeObject(needle);
87
+ }
88
+
12
89
  /*
13
90
  * call-seq:
14
91
  * register_ns(prefix, uri)
15
92
  *
16
93
  * Register the namespace with +prefix+ and +uri+.
17
94
  */
18
- static VALUE register_ns(VALUE self, VALUE prefix, VALUE uri)
95
+ static VALUE
96
+ register_ns(VALUE self, VALUE prefix, VALUE uri)
19
97
  {
20
98
  xmlXPathContextPtr ctx;
21
99
  Data_Get_Struct(self, xmlXPathContext, ctx);
22
100
 
23
- xmlXPathRegisterNs( ctx,
24
- (const xmlChar *)StringValueCStr(prefix),
25
- (const xmlChar *)StringValueCStr(uri)
26
- );
101
+ xmlXPathRegisterNs(ctx,
102
+ (const xmlChar *)StringValueCStr(prefix),
103
+ (const xmlChar *)StringValueCStr(uri)
104
+ );
27
105
  return self;
28
106
  }
29
107
 
@@ -33,23 +111,26 @@ static VALUE register_ns(VALUE self, VALUE prefix, VALUE uri)
33
111
  *
34
112
  * Register the variable +name+ with +value+.
35
113
  */
36
- static VALUE register_variable(VALUE self, VALUE name, VALUE value)
114
+ static VALUE
115
+ register_variable(VALUE self, VALUE name, VALUE value)
37
116
  {
38
- xmlXPathContextPtr ctx;
39
- xmlXPathObjectPtr xmlValue;
40
- Data_Get_Struct(self, xmlXPathContext, ctx);
117
+ xmlXPathContextPtr ctx;
118
+ xmlXPathObjectPtr xmlValue;
119
+ Data_Get_Struct(self, xmlXPathContext, ctx);
41
120
 
42
- xmlValue = xmlXPathNewCString(StringValueCStr(value));
121
+ xmlValue = xmlXPathNewCString(StringValueCStr(value));
43
122
 
44
- xmlXPathRegisterVariable( ctx,
45
- (const xmlChar *)StringValueCStr(name),
46
- xmlValue
47
- );
123
+ xmlXPathRegisterVariable(ctx,
124
+ (const xmlChar *)StringValueCStr(name),
125
+ xmlValue
126
+ );
48
127
 
49
- return self;
128
+ return self;
50
129
  }
51
130
 
52
- void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler, const char* function_name)
131
+ void
132
+ Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
133
+ const char *function_name)
53
134
  {
54
135
  int i;
55
136
  VALUE result, doc;
@@ -72,76 +153,76 @@ void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr c
72
153
  i = nargs - 1;
73
154
  do {
74
155
  obj = valuePop(ctx);
75
- switch(obj->type) {
76
- case XPATH_STRING:
77
- argv[i] = NOKOGIRI_STR_NEW2(obj->stringval);
78
- break;
79
- case XPATH_BOOLEAN:
80
- argv[i] = obj->boolval == 1 ? Qtrue : Qfalse;
81
- break;
82
- case XPATH_NUMBER:
83
- argv[i] = rb_float_new(obj->floatval);
84
- break;
85
- case XPATH_NODESET:
86
- argv[i] = Nokogiri_wrap_xml_node_set(obj->nodesetval, doc);
87
- break;
88
- default:
89
- argv[i] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
156
+ switch (obj->type) {
157
+ case XPATH_STRING:
158
+ argv[i] = NOKOGIRI_STR_NEW2(obj->stringval);
159
+ break;
160
+ case XPATH_BOOLEAN:
161
+ argv[i] = obj->boolval == 1 ? Qtrue : Qfalse;
162
+ break;
163
+ case XPATH_NUMBER:
164
+ argv[i] = rb_float_new(obj->floatval);
165
+ break;
166
+ case XPATH_NODESET:
167
+ argv[i] = noko_xml_node_set_wrap(obj->nodesetval, doc);
168
+ break;
169
+ default:
170
+ argv[i] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
90
171
  }
91
172
  xmlXPathFreeNodeSetList(obj);
92
- } while(i-- > 0);
173
+ } while (i-- > 0);
93
174
  }
94
175
 
95
- result = rb_funcall2(handler, rb_intern((const char*)function_name), nargs, argv);
176
+ result = rb_funcall2(handler, rb_intern((const char *)function_name), nargs, argv);
96
177
 
97
178
  for (i = 0 ; i < nargs ; ++i) {
98
179
  rb_gc_unregister_address(&argv[i]);
99
180
  }
100
181
  free(argv);
101
182
 
102
- switch(TYPE(result)) {
103
- case T_FLOAT:
104
- case T_BIGNUM:
105
- case T_FIXNUM:
106
- xmlXPathReturnNumber(ctx, NUM2DBL(result));
107
- break;
108
- case T_STRING:
109
- xmlXPathReturnString(
110
- ctx,
111
- xmlCharStrdup(StringValueCStr(result))
112
- );
113
- break;
114
- case T_TRUE:
115
- xmlXPathReturnTrue(ctx);
116
- break;
117
- case T_FALSE:
118
- xmlXPathReturnFalse(ctx);
119
- break;
120
- case T_NIL:
121
- break;
122
- case T_ARRAY:
123
- {
124
- VALUE args[2];
125
- args[0] = doc;
126
- args[1] = result;
127
- node_set = rb_class_new_instance(2, args, cNokogiriXmlNodeSet);
128
- Data_Get_Struct(node_set, xmlNodeSet, xml_node_set);
129
- xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
130
- }
183
+ switch (TYPE(result)) {
184
+ case T_FLOAT:
185
+ case T_BIGNUM:
186
+ case T_FIXNUM:
187
+ xmlXPathReturnNumber(ctx, NUM2DBL(result));
131
188
  break;
132
- case T_DATA:
133
- if(rb_obj_is_kind_of(result, cNokogiriXmlNodeSet)) {
134
- Data_Get_Struct(result, xmlNodeSet, xml_node_set);
135
- /* Copy the node set, otherwise it will get GC'd. */
136
- xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
137
- break;
138
- }
139
- default:
140
- rb_raise(rb_eRuntimeError, "Invalid return type");
189
+ case T_STRING:
190
+ xmlXPathReturnString(
191
+ ctx,
192
+ xmlCharStrdup(StringValueCStr(result))
193
+ );
194
+ break;
195
+ case T_TRUE:
196
+ xmlXPathReturnTrue(ctx);
197
+ break;
198
+ case T_FALSE:
199
+ xmlXPathReturnFalse(ctx);
200
+ break;
201
+ case T_NIL:
202
+ break;
203
+ case T_ARRAY: {
204
+ VALUE args[2];
205
+ args[0] = doc;
206
+ args[1] = result;
207
+ node_set = rb_class_new_instance(2, args, cNokogiriXmlNodeSet);
208
+ Data_Get_Struct(node_set, xmlNodeSet, xml_node_set);
209
+ xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
210
+ }
211
+ break;
212
+ case T_DATA:
213
+ if (rb_obj_is_kind_of(result, cNokogiriXmlNodeSet)) {
214
+ Data_Get_Struct(result, xmlNodeSet, xml_node_set);
215
+ /* Copy the node set, otherwise it will get GC'd. */
216
+ xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
217
+ break;
141
218
  }
219
+ default:
220
+ rb_raise(rb_eRuntimeError, "Invalid return type");
221
+ }
142
222
  }
143
223
 
144
- static void ruby_funcall(xmlXPathParserContextPtr ctx, int nargs)
224
+ static void
225
+ ruby_funcall(xmlXPathParserContextPtr ctx, int nargs)
145
226
  {
146
227
  VALUE handler = Qnil;
147
228
  const char *function = NULL ;
@@ -152,26 +233,29 @@ static void ruby_funcall(xmlXPathParserContextPtr ctx, int nargs)
152
233
  assert(ctx->context->function);
153
234
 
154
235
  handler = (VALUE)(ctx->context->userData);
155
- function = (const char*)(ctx->context->function);
236
+ function = (const char *)(ctx->context->function);
156
237
 
157
238
  Nokogiri_marshal_xpath_funcall_and_return_values(ctx, nargs, handler, function);
158
239
  }
159
240
 
160
- static xmlXPathFunction lookup( void *ctx,
161
- const xmlChar * name,
162
- const xmlChar* ns_uri )
241
+ static xmlXPathFunction
242
+ lookup(void *ctx,
243
+ const xmlChar *name,
244
+ const xmlChar *ns_uri)
163
245
  {
164
246
  VALUE xpath_handler = (VALUE)ctx;
165
- if(rb_respond_to(xpath_handler, rb_intern((const char *)name)))
247
+ if (rb_respond_to(xpath_handler, rb_intern((const char *)name))) {
166
248
  return ruby_funcall;
249
+ }
167
250
 
168
251
  return NULL;
169
252
  }
170
253
 
171
- NORETURN(static void xpath_generic_exception_handler(void * ctx, const char *msg, ...));
172
- static void xpath_generic_exception_handler(void * ctx, const char *msg, ...)
254
+ NORETURN(static void xpath_generic_exception_handler(void *ctx, const char *msg, ...));
255
+ static void
256
+ xpath_generic_exception_handler(void *ctx, const char *msg, ...)
173
257
  {
174
- char * message;
258
+ char *message;
175
259
 
176
260
  va_list args;
177
261
  va_start(args, msg);
@@ -187,7 +271,8 @@ static void xpath_generic_exception_handler(void * ctx, const char *msg, ...)
187
271
  *
188
272
  * Evaluate the +search_path+ returning an XML::XPath object.
189
273
  */
190
- static VALUE evaluate(int argc, VALUE *argv, VALUE self)
274
+ static VALUE
275
+ evaluate(int argc, VALUE *argv, VALUE self)
191
276
  {
192
277
  VALUE search_path, xpath_handler;
193
278
  VALUE thing = Qnil;
@@ -197,12 +282,13 @@ static VALUE evaluate(int argc, VALUE *argv, VALUE self)
197
282
 
198
283
  Data_Get_Struct(self, xmlXPathContext, ctx);
199
284
 
200
- if(rb_scan_args(argc, argv, "11", &search_path, &xpath_handler) == 1)
285
+ if (rb_scan_args(argc, argv, "11", &search_path, &xpath_handler) == 1) {
201
286
  xpath_handler = Qnil;
287
+ }
202
288
 
203
289
  query = (xmlChar *)StringValueCStr(search_path);
204
290
 
205
- if(Qnil != xpath_handler) {
291
+ if (Qnil != xpath_handler) {
206
292
  /* FIXME: not sure if this is the correct place to shove private data. */
207
293
  ctx->userData = (void *)xpath_handler;
208
294
  xmlXPathRegisterFuncLookup(ctx, lookup, (void *)xpath_handler);
@@ -219,7 +305,7 @@ static VALUE evaluate(int argc, VALUE *argv, VALUE self)
219
305
  xmlSetStructuredErrorFunc(NULL, NULL);
220
306
  xmlSetGenericErrorFunc(NULL, NULL);
221
307
 
222
- if(xpath == NULL) {
308
+ if (xpath == NULL) {
223
309
  xmlErrorPtr error = xmlGetLastError();
224
310
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
225
311
  }
@@ -227,23 +313,23 @@ static VALUE evaluate(int argc, VALUE *argv, VALUE self)
227
313
  assert(ctx->doc);
228
314
  assert(DOC_RUBY_OBJECT_TEST(ctx->doc));
229
315
 
230
- switch(xpath->type) {
231
- case XPATH_STRING:
232
- thing = NOKOGIRI_STR_NEW2(xpath->stringval);
233
- xmlFree(xpath->stringval);
234
- break;
235
- case XPATH_NODESET:
236
- thing = Nokogiri_wrap_xml_node_set(xpath->nodesetval,
237
- DOC_RUBY_OBJECT(ctx->doc));
238
- break;
239
- case XPATH_NUMBER:
240
- thing = rb_float_new(xpath->floatval);
241
- break;
242
- case XPATH_BOOLEAN:
243
- thing = xpath->boolval == 1 ? Qtrue : Qfalse;
244
- break;
245
- default:
246
- thing = Nokogiri_wrap_xml_node_set(NULL, DOC_RUBY_OBJECT(ctx->doc));
316
+ switch (xpath->type) {
317
+ case XPATH_STRING:
318
+ thing = NOKOGIRI_STR_NEW2(xpath->stringval);
319
+ xmlFree(xpath->stringval);
320
+ break;
321
+ case XPATH_NODESET:
322
+ thing = noko_xml_node_set_wrap(xpath->nodesetval,
323
+ DOC_RUBY_OBJECT(ctx->doc));
324
+ break;
325
+ case XPATH_NUMBER:
326
+ thing = rb_float_new(xpath->floatval);
327
+ break;
328
+ case XPATH_BOOLEAN:
329
+ thing = xpath->boolval == 1 ? Qtrue : Qfalse;
330
+ break;
331
+ default:
332
+ thing = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
247
333
  }
248
334
 
249
335
  xmlXPathFreeNodeSetList(xpath);
@@ -257,42 +343,39 @@ static VALUE evaluate(int argc, VALUE *argv, VALUE self)
257
343
  *
258
344
  * Create a new XPathContext with +node+ as the reference point.
259
345
  */
260
- static VALUE new(VALUE klass, VALUE nodeobj)
346
+ static VALUE
347
+ new (VALUE klass, VALUE nodeobj)
261
348
  {
262
349
  xmlNodePtr node;
263
350
  xmlXPathContextPtr ctx;
264
351
  VALUE self;
265
352
 
266
- xmlXPathInit();
267
-
268
353
  Data_Get_Struct(nodeobj, xmlNode, node);
269
354
 
355
+ xmlXPathInit();
356
+
270
357
  ctx = xmlXPathNewContext(node->doc);
271
358
  ctx->node = node;
359
+
360
+ xmlXPathRegisterNs(ctx, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
361
+ xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
362
+ xpath_builtin_css_class);
363
+
272
364
  self = Data_Wrap_Struct(klass, 0, deallocate, ctx);
273
- /*rb_iv_set(self, "@xpath_handler", Qnil); */
274
365
  return self;
275
366
  }
276
367
 
277
- VALUE cNokogiriXmlXpathContext;
278
- void init_xml_xpath_context(void)
368
+ void
369
+ noko_init_xml_xpath_context(void)
279
370
  {
280
- VALUE module = rb_define_module("Nokogiri");
281
-
282
- /*
283
- * Nokogiri::XML
284
- */
285
- VALUE xml = rb_define_module_under(module, "XML");
286
-
287
371
  /*
288
372
  * XPathContext is the entry point for searching a Document by using XPath.
289
373
  */
290
- VALUE klass = rb_define_class_under(xml, "XPathContext", rb_cObject);
374
+ cNokogiriXmlXpathContext = rb_define_class_under(mNokogiriXml, "XPathContext", rb_cObject);
291
375
 
292
- cNokogiriXmlXpathContext = klass;
376
+ rb_define_singleton_method(cNokogiriXmlXpathContext, "new", new, 1);
293
377
 
294
- rb_define_singleton_method(klass, "new", new, 1);
295
- rb_define_method(klass, "evaluate", evaluate, -1);
296
- rb_define_method(klass, "register_variable", register_variable, 2);
297
- rb_define_method(klass, "register_ns", register_ns, 2);
378
+ rb_define_method(cNokogiriXmlXpathContext, "evaluate", evaluate, -1);
379
+ rb_define_method(cNokogiriXmlXpathContext, "register_variable", register_variable, 2);
380
+ rb_define_method(cNokogiriXmlXpathContext, "register_ns", register_ns, 2);
298
381
  }