nokogiri 1.11.1 → 1.11.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (89) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +12 -12
  3. data/LICENSE.md +1 -1
  4. data/README.md +20 -15
  5. data/ext/nokogiri/depend +34 -474
  6. data/ext/nokogiri/extconf.rb +253 -183
  7. data/ext/nokogiri/html_document.c +10 -15
  8. data/ext/nokogiri/html_element_description.c +84 -71
  9. data/ext/nokogiri/html_entity_lookup.c +21 -16
  10. data/ext/nokogiri/html_sax_parser_context.c +66 -65
  11. data/ext/nokogiri/html_sax_push_parser.c +29 -27
  12. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  13. data/ext/nokogiri/nokogiri.c +171 -63
  14. data/ext/nokogiri/nokogiri.h +158 -75
  15. data/ext/nokogiri/test_global_handlers.c +3 -4
  16. data/ext/nokogiri/xml_attr.c +15 -15
  17. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  18. data/ext/nokogiri/xml_cdata.c +13 -18
  19. data/ext/nokogiri/xml_comment.c +19 -26
  20. data/ext/nokogiri/xml_document.c +221 -164
  21. data/ext/nokogiri/xml_document_fragment.c +13 -15
  22. data/ext/nokogiri/xml_dtd.c +54 -48
  23. data/ext/nokogiri/xml_element_content.c +30 -27
  24. data/ext/nokogiri/xml_element_decl.c +22 -22
  25. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  26. data/ext/nokogiri/xml_entity_decl.c +32 -30
  27. data/ext/nokogiri/xml_entity_reference.c +16 -18
  28. data/ext/nokogiri/xml_namespace.c +56 -49
  29. data/ext/nokogiri/xml_node.c +338 -286
  30. data/ext/nokogiri/xml_node_set.c +168 -156
  31. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  32. data/ext/nokogiri/xml_reader.c +191 -157
  33. data/ext/nokogiri/xml_relax_ng.c +29 -23
  34. data/ext/nokogiri/xml_sax_parser.c +117 -112
  35. data/ext/nokogiri/xml_sax_parser_context.c +100 -85
  36. data/ext/nokogiri/xml_sax_push_parser.c +34 -27
  37. data/ext/nokogiri/xml_schema.c +48 -42
  38. data/ext/nokogiri/xml_syntax_error.c +21 -23
  39. data/ext/nokogiri/xml_text.c +13 -17
  40. data/ext/nokogiri/xml_xpath_context.c +134 -127
  41. data/ext/nokogiri/xslt_stylesheet.c +157 -157
  42. data/lib/nokogiri.rb +1 -22
  43. data/lib/nokogiri/css/parser.rb +1 -1
  44. data/lib/nokogiri/extension.rb +26 -0
  45. data/lib/nokogiri/html/document_fragment.rb +15 -15
  46. data/lib/nokogiri/version/constant.rb +1 -1
  47. data/lib/nokogiri/version/info.rb +31 -8
  48. data/lib/nokogiri/xml/document.rb +31 -11
  49. data/lib/nokogiri/xml/node.rb +38 -42
  50. data/lib/nokogiri/xml/reader.rb +2 -9
  51. data/lib/nokogiri/xml/xpath.rb +1 -3
  52. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  53. data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +70 -0
  54. metadata +8 -41
  55. data/ext/nokogiri/html_document.h +0 -10
  56. data/ext/nokogiri/html_element_description.h +0 -10
  57. data/ext/nokogiri/html_entity_lookup.h +0 -8
  58. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  59. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  60. data/ext/nokogiri/xml_attr.h +0 -9
  61. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  62. data/ext/nokogiri/xml_cdata.h +0 -9
  63. data/ext/nokogiri/xml_comment.h +0 -9
  64. data/ext/nokogiri/xml_document.h +0 -23
  65. data/ext/nokogiri/xml_document_fragment.h +0 -10
  66. data/ext/nokogiri/xml_dtd.h +0 -10
  67. data/ext/nokogiri/xml_element_content.h +0 -10
  68. data/ext/nokogiri/xml_element_decl.h +0 -9
  69. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  70. data/ext/nokogiri/xml_entity_decl.h +0 -10
  71. data/ext/nokogiri/xml_entity_reference.h +0 -9
  72. data/ext/nokogiri/xml_io.c +0 -63
  73. data/ext/nokogiri/xml_io.h +0 -11
  74. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  75. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  76. data/ext/nokogiri/xml_namespace.h +0 -14
  77. data/ext/nokogiri/xml_node.h +0 -13
  78. data/ext/nokogiri/xml_node_set.h +0 -12
  79. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  80. data/ext/nokogiri/xml_reader.h +0 -10
  81. data/ext/nokogiri/xml_relax_ng.h +0 -9
  82. data/ext/nokogiri/xml_sax_parser.h +0 -39
  83. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  84. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  85. data/ext/nokogiri/xml_schema.h +0 -9
  86. data/ext/nokogiri/xml_syntax_error.h +0 -25
  87. data/ext/nokogiri/xml_text.h +0 -9
  88. data/ext/nokogiri/xml_xpath_context.h +0 -10
  89. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -1,4 +1,6 @@
1
- #include <html_document.h>
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriHtmlDocument ;
2
4
 
3
5
  static ID id_encoding_found;
4
6
  static ID id_to_s;
@@ -23,8 +25,7 @@ rb_html_document_s_new(int argc, VALUE *argv, VALUE klass)
23
25
  RTEST(uri) ? (const xmlChar *)StringValueCStr(uri) : NULL,
24
26
  RTEST(external_id) ? (const xmlChar *)StringValueCStr(external_id) : NULL
25
27
  );
26
- rb_doc = Nokogiri_wrap_xml_document(klass, doc);
27
- rb_obj_call_init(rb_doc, argc, argv);
28
+ rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv);
28
29
  return rb_doc ;
29
30
  }
30
31
 
@@ -47,7 +48,7 @@ rb_html_document_s_read_io(VALUE klass, VALUE rb_io, VALUE rb_url, VALUE rb_enco
47
48
 
48
49
  xmlSetStructuredErrorFunc((void *)rb_error_list, Nokogiri_error_array_pusher);
49
50
 
50
- c_doc = htmlReadIO(io_read_callback, io_close_callback, (void *)rb_io, c_url, c_encoding, options);
51
+ c_doc = htmlReadIO(noko_io_read, noko_io_close, (void *)rb_io, c_url, c_encoding, options);
51
52
 
52
53
  xmlSetStructuredErrorFunc(NULL, NULL);
53
54
 
@@ -81,7 +82,7 @@ rb_html_document_s_read_io(VALUE klass, VALUE rb_io, VALUE rb_url, VALUE rb_enco
81
82
  return Qnil;
82
83
  }
83
84
 
84
- rb_doc = Nokogiri_wrap_xml_document(klass, c_doc);
85
+ rb_doc = noko_xml_document_wrap(klass, c_doc);
85
86
  rb_iv_set(rb_doc, "@errors", rb_error_list);
86
87
  return rb_doc;
87
88
  }
@@ -129,7 +130,7 @@ rb_html_document_s_read_memory(VALUE klass, VALUE rb_html, VALUE rb_url, VALUE r
129
130
  return Qnil;
130
131
  }
131
132
 
132
- rb_doc = Nokogiri_wrap_xml_document(klass, c_doc);
133
+ rb_doc = noko_xml_document_wrap(klass, c_doc);
133
134
  rb_iv_set(rb_doc, "@errors", rb_error_list);
134
135
  return rb_doc;
135
136
  }
@@ -148,17 +149,11 @@ rb_html_document_type(VALUE self)
148
149
  return INT2NUM((long)doc->type);
149
150
  }
150
151
 
151
- VALUE cNokogiriHtmlDocument ;
152
-
153
152
  void
154
- init_html_document()
153
+ noko_init_html_document()
155
154
  {
156
- VALUE nokogiri = rb_define_module("Nokogiri");
157
- VALUE nokogiri_xml = rb_define_module_under(nokogiri, "XML");
158
- VALUE nokogiri_xml_node = rb_define_class_under(nokogiri_xml, "Node", rb_cObject);
159
- VALUE nokogiri_xml_document = rb_define_class_under(nokogiri_xml, "Document", nokogiri_xml_node);
160
- VALUE nokogiri_html = rb_define_module_under(nokogiri, "HTML");
161
- cNokogiriHtmlDocument = rb_define_class_under(nokogiri_html, "Document", nokogiri_xml_document);
155
+ assert(cNokogiriXmlDocument);
156
+ cNokogiriHtmlDocument = rb_define_class_under(mNokogiriHtml, "Document", cNokogiriXmlDocument);
162
157
 
163
158
  rb_define_singleton_method(cNokogiriHtmlDocument, "read_memory", rb_html_document_s_read_memory, 4);
164
159
  rb_define_singleton_method(cNokogiriHtmlDocument, "read_io", rb_html_document_s_read_io, 4);
@@ -1,4 +1,6 @@
1
- #include <html_element_description.h>
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriHtmlElementDescription ;
2
4
 
3
5
  /*
4
6
  * call-seq:
@@ -6,9 +8,10 @@
6
8
  *
7
9
  * A list of required attributes for this element
8
10
  */
9
- static VALUE required_attributes(VALUE self)
11
+ static VALUE
12
+ required_attributes(VALUE self)
10
13
  {
11
- const htmlElemDesc * description;
14
+ const htmlElemDesc *description;
12
15
  VALUE list;
13
16
  int i;
14
17
 
@@ -16,9 +19,9 @@ static VALUE required_attributes(VALUE self)
16
19
 
17
20
  list = rb_ary_new();
18
21
 
19
- if(NULL == description->attrs_req) return list;
22
+ if (NULL == description->attrs_req) { return list; }
20
23
 
21
- for(i = 0; description->attrs_depr[i]; i++) {
24
+ for (i = 0; description->attrs_depr[i]; i++) {
22
25
  rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_req[i]));
23
26
  }
24
27
 
@@ -31,9 +34,10 @@ static VALUE required_attributes(VALUE self)
31
34
  *
32
35
  * A list of deprecated attributes for this element
33
36
  */
34
- static VALUE deprecated_attributes(VALUE self)
37
+ static VALUE
38
+ deprecated_attributes(VALUE self)
35
39
  {
36
- const htmlElemDesc * description;
40
+ const htmlElemDesc *description;
37
41
  VALUE list;
38
42
  int i;
39
43
 
@@ -41,9 +45,9 @@ static VALUE deprecated_attributes(VALUE self)
41
45
 
42
46
  list = rb_ary_new();
43
47
 
44
- if(NULL == description->attrs_depr) return list;
48
+ if (NULL == description->attrs_depr) { return list; }
45
49
 
46
- for(i = 0; description->attrs_depr[i]; i++) {
50
+ for (i = 0; description->attrs_depr[i]; i++) {
47
51
  rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_depr[i]));
48
52
  }
49
53
 
@@ -56,9 +60,10 @@ static VALUE deprecated_attributes(VALUE self)
56
60
  *
57
61
  * A list of optional attributes for this element
58
62
  */
59
- static VALUE optional_attributes(VALUE self)
63
+ static VALUE
64
+ optional_attributes(VALUE self)
60
65
  {
61
- const htmlElemDesc * description;
66
+ const htmlElemDesc *description;
62
67
  VALUE list;
63
68
  int i;
64
69
 
@@ -66,9 +71,9 @@ static VALUE optional_attributes(VALUE self)
66
71
 
67
72
  list = rb_ary_new();
68
73
 
69
- if(NULL == description->attrs_opt) return list;
74
+ if (NULL == description->attrs_opt) { return list; }
70
75
 
71
- for(i = 0; description->attrs_opt[i]; i++) {
76
+ for (i = 0; description->attrs_opt[i]; i++) {
72
77
  rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_opt[i]));
73
78
  }
74
79
 
@@ -81,13 +86,15 @@ static VALUE optional_attributes(VALUE self)
81
86
  *
82
87
  * The default sub element for this element
83
88
  */
84
- static VALUE default_sub_element(VALUE self)
89
+ static VALUE
90
+ default_sub_element(VALUE self)
85
91
  {
86
- const htmlElemDesc * description;
92
+ const htmlElemDesc *description;
87
93
  Data_Get_Struct(self, htmlElemDesc, description);
88
94
 
89
- if (description->defaultsubelt)
95
+ if (description->defaultsubelt) {
90
96
  return NOKOGIRI_STR_NEW2(description->defaultsubelt);
97
+ }
91
98
 
92
99
  return Qnil;
93
100
  }
@@ -98,9 +105,10 @@ static VALUE default_sub_element(VALUE self)
98
105
  *
99
106
  * A list of allowed sub elements for this element.
100
107
  */
101
- static VALUE sub_elements(VALUE self)
108
+ static VALUE
109
+ sub_elements(VALUE self)
102
110
  {
103
- const htmlElemDesc * description;
111
+ const htmlElemDesc *description;
104
112
  VALUE list;
105
113
  int i;
106
114
 
@@ -108,9 +116,9 @@ static VALUE sub_elements(VALUE self)
108
116
 
109
117
  list = rb_ary_new();
110
118
 
111
- if(NULL == description->subelts) return list;
119
+ if (NULL == description->subelts) { return list; }
112
120
 
113
- for(i = 0; description->subelts[i]; i++) {
121
+ for (i = 0; description->subelts[i]; i++) {
114
122
  rb_ary_push(list, NOKOGIRI_STR_NEW2(description->subelts[i]));
115
123
  }
116
124
 
@@ -123,9 +131,10 @@ static VALUE sub_elements(VALUE self)
123
131
  *
124
132
  * The description for this element
125
133
  */
126
- static VALUE description(VALUE self)
134
+ static VALUE
135
+ description(VALUE self)
127
136
  {
128
- const htmlElemDesc * description;
137
+ const htmlElemDesc *description;
129
138
  Data_Get_Struct(self, htmlElemDesc, description);
130
139
 
131
140
  return NOKOGIRI_STR_NEW2(description->desc);
@@ -137,12 +146,13 @@ static VALUE description(VALUE self)
137
146
  *
138
147
  * Is this element an inline element?
139
148
  */
140
- static VALUE inline_eh(VALUE self)
149
+ static VALUE
150
+ inline_eh(VALUE self)
141
151
  {
142
- const htmlElemDesc * description;
152
+ const htmlElemDesc *description;
143
153
  Data_Get_Struct(self, htmlElemDesc, description);
144
154
 
145
- if(description->isinline) return Qtrue;
155
+ if (description->isinline) { return Qtrue; }
146
156
  return Qfalse;
147
157
  }
148
158
 
@@ -152,12 +162,13 @@ static VALUE inline_eh(VALUE self)
152
162
  *
153
163
  * Is this element deprecated?
154
164
  */
155
- static VALUE deprecated_eh(VALUE self)
165
+ static VALUE
166
+ deprecated_eh(VALUE self)
156
167
  {
157
- const htmlElemDesc * description;
168
+ const htmlElemDesc *description;
158
169
  Data_Get_Struct(self, htmlElemDesc, description);
159
170
 
160
- if(description->depr) return Qtrue;
171
+ if (description->depr) { return Qtrue; }
161
172
  return Qfalse;
162
173
  }
163
174
 
@@ -167,12 +178,13 @@ static VALUE deprecated_eh(VALUE self)
167
178
  *
168
179
  * Is this an empty element?
169
180
  */
170
- static VALUE empty_eh(VALUE self)
181
+ static VALUE
182
+ empty_eh(VALUE self)
171
183
  {
172
- const htmlElemDesc * description;
184
+ const htmlElemDesc *description;
173
185
  Data_Get_Struct(self, htmlElemDesc, description);
174
186
 
175
- if(description->empty) return Qtrue;
187
+ if (description->empty) { return Qtrue; }
176
188
  return Qfalse;
177
189
  }
178
190
 
@@ -182,12 +194,13 @@ static VALUE empty_eh(VALUE self)
182
194
  *
183
195
  * Should the end tag be saved?
184
196
  */
185
- static VALUE save_end_tag_eh(VALUE self)
197
+ static VALUE
198
+ save_end_tag_eh(VALUE self)
186
199
  {
187
- const htmlElemDesc * description;
200
+ const htmlElemDesc *description;
188
201
  Data_Get_Struct(self, htmlElemDesc, description);
189
202
 
190
- if(description->saveEndTag) return Qtrue;
203
+ if (description->saveEndTag) { return Qtrue; }
191
204
  return Qfalse;
192
205
  }
193
206
 
@@ -197,12 +210,13 @@ static VALUE save_end_tag_eh(VALUE self)
197
210
  *
198
211
  * Can the end tag be implied for this tag?
199
212
  */
200
- static VALUE implied_end_tag_eh(VALUE self)
213
+ static VALUE
214
+ implied_end_tag_eh(VALUE self)
201
215
  {
202
- const htmlElemDesc * description;
216
+ const htmlElemDesc *description;
203
217
  Data_Get_Struct(self, htmlElemDesc, description);
204
218
 
205
- if(description->endTag) return Qtrue;
219
+ if (description->endTag) { return Qtrue; }
206
220
  return Qfalse;
207
221
  }
208
222
 
@@ -212,12 +226,13 @@ static VALUE implied_end_tag_eh(VALUE self)
212
226
  *
213
227
  * Can the start tag be implied for this tag?
214
228
  */
215
- static VALUE implied_start_tag_eh(VALUE self)
229
+ static VALUE
230
+ implied_start_tag_eh(VALUE self)
216
231
  {
217
- const htmlElemDesc * description;
232
+ const htmlElemDesc *description;
218
233
  Data_Get_Struct(self, htmlElemDesc, description);
219
234
 
220
- if(description->startTag) return Qtrue;
235
+ if (description->startTag) { return Qtrue; }
221
236
  return Qfalse;
222
237
  }
223
238
 
@@ -227,12 +242,13 @@ static VALUE implied_start_tag_eh(VALUE self)
227
242
  *
228
243
  * Get the tag name for this ElemementDescription
229
244
  */
230
- static VALUE name(VALUE self)
245
+ static VALUE
246
+ name(VALUE self)
231
247
  {
232
- const htmlElemDesc * description;
248
+ const htmlElemDesc *description;
233
249
  Data_Get_Struct(self, htmlElemDesc, description);
234
250
 
235
- if(NULL == description->name) return Qnil;
251
+ if (NULL == description->name) { return Qnil; }
236
252
  return NOKOGIRI_STR_NEW2(description->name);
237
253
  }
238
254
 
@@ -242,38 +258,35 @@ static VALUE name(VALUE self)
242
258
  *
243
259
  * Get ElemementDescription for +tag_name+
244
260
  */
245
- static VALUE get_description(VALUE klass, VALUE tag_name)
261
+ static VALUE
262
+ get_description(VALUE klass, VALUE tag_name)
246
263
  {
247
- const htmlElemDesc * description = htmlTagLookup(
248
- (const xmlChar *)StringValueCStr(tag_name)
249
- );
264
+ const htmlElemDesc *description = htmlTagLookup(
265
+ (const xmlChar *)StringValueCStr(tag_name)
266
+ );
250
267
 
251
- if(NULL == description) return Qnil;
268
+ if (NULL == description) { return Qnil; }
252
269
  return Data_Wrap_Struct(klass, 0, 0, (void *)(uintptr_t)description);
253
270
  }
254
271
 
255
- VALUE cNokogiriHtmlElementDescription ;
256
- void init_html_element_description()
272
+ void
273
+ noko_init_html_element_description()
257
274
  {
258
- VALUE nokogiri = rb_define_module("Nokogiri");
259
- VALUE html = rb_define_module_under(nokogiri, "HTML");
260
- VALUE klass = rb_define_class_under(html, "ElementDescription",rb_cObject);
261
-
262
- cNokogiriHtmlElementDescription = klass;
263
-
264
- rb_define_singleton_method(klass, "[]", get_description, 1);
265
-
266
- rb_define_method(klass, "name", name, 0);
267
- rb_define_method(klass, "implied_start_tag?", implied_start_tag_eh, 0);
268
- rb_define_method(klass, "implied_end_tag?", implied_end_tag_eh, 0);
269
- rb_define_method(klass, "save_end_tag?", save_end_tag_eh, 0);
270
- rb_define_method(klass, "empty?", empty_eh, 0);
271
- rb_define_method(klass, "deprecated?", deprecated_eh, 0);
272
- rb_define_method(klass, "inline?", inline_eh, 0);
273
- rb_define_method(klass, "description", description, 0);
274
- rb_define_method(klass, "sub_elements", sub_elements, 0);
275
- rb_define_method(klass, "default_sub_element", default_sub_element, 0);
276
- rb_define_method(klass, "optional_attributes", optional_attributes, 0);
277
- rb_define_method(klass, "deprecated_attributes", deprecated_attributes, 0);
278
- rb_define_method(klass, "required_attributes", required_attributes, 0);
275
+ cNokogiriHtmlElementDescription = rb_define_class_under(mNokogiriHtml, "ElementDescription", rb_cObject);
276
+
277
+ rb_define_singleton_method(cNokogiriHtmlElementDescription, "[]", get_description, 1);
278
+
279
+ rb_define_method(cNokogiriHtmlElementDescription, "name", name, 0);
280
+ rb_define_method(cNokogiriHtmlElementDescription, "implied_start_tag?", implied_start_tag_eh, 0);
281
+ rb_define_method(cNokogiriHtmlElementDescription, "implied_end_tag?", implied_end_tag_eh, 0);
282
+ rb_define_method(cNokogiriHtmlElementDescription, "save_end_tag?", save_end_tag_eh, 0);
283
+ rb_define_method(cNokogiriHtmlElementDescription, "empty?", empty_eh, 0);
284
+ rb_define_method(cNokogiriHtmlElementDescription, "deprecated?", deprecated_eh, 0);
285
+ rb_define_method(cNokogiriHtmlElementDescription, "inline?", inline_eh, 0);
286
+ rb_define_method(cNokogiriHtmlElementDescription, "description", description, 0);
287
+ rb_define_method(cNokogiriHtmlElementDescription, "sub_elements", sub_elements, 0);
288
+ rb_define_method(cNokogiriHtmlElementDescription, "default_sub_element", default_sub_element, 0);
289
+ rb_define_method(cNokogiriHtmlElementDescription, "optional_attributes", optional_attributes, 0);
290
+ rb_define_method(cNokogiriHtmlElementDescription, "deprecated_attributes", deprecated_attributes, 0);
291
+ rb_define_method(cNokogiriHtmlElementDescription, "required_attributes", required_attributes, 0);
279
292
  }
@@ -1,4 +1,6 @@
1
- #include <html_entity_lookup.h>
1
+ #include <nokogiri.h>
2
+
3
+ static VALUE cNokogiriHtmlEntityLookup;
2
4
 
3
5
  /*
4
6
  * call-seq:
@@ -6,27 +8,30 @@
6
8
  *
7
9
  * Get the HTML::EntityDescription for +key+
8
10
  */
9
- static VALUE get(VALUE self, VALUE key)
11
+ static VALUE
12
+ get(VALUE _, VALUE rb_entity_name)
10
13
  {
11
- const htmlEntityDesc * desc =
12
- htmlEntityLookup((const xmlChar *)StringValueCStr(key));
13
- VALUE klass, args[3];
14
+ VALUE cNokogiriHtmlEntityDescription;
15
+ const htmlEntityDesc *c_entity_desc;
16
+ VALUE rb_constructor_args[3];
14
17
 
15
- if(NULL == desc) return Qnil;
16
- klass = rb_const_get(mNokogiriHtml, rb_intern("EntityDescription"));
18
+ c_entity_desc = htmlEntityLookup((const xmlChar *)StringValueCStr(rb_entity_name));
19
+ if (NULL == c_entity_desc) {
20
+ return Qnil;
21
+ }
17
22
 
18
- args[0] = INT2NUM((long)desc->value);
19
- args[1] = NOKOGIRI_STR_NEW2(desc->name);
20
- args[2] = NOKOGIRI_STR_NEW2(desc->desc);
23
+ rb_constructor_args[0] = INT2NUM((long)c_entity_desc->value);
24
+ rb_constructor_args[1] = NOKOGIRI_STR_NEW2(c_entity_desc->name);
25
+ rb_constructor_args[2] = NOKOGIRI_STR_NEW2(c_entity_desc->desc);
21
26
 
22
- return rb_class_new_instance(3, args, klass);
27
+ cNokogiriHtmlEntityDescription = rb_const_get_at(mNokogiriHtml, rb_intern("EntityDescription"));
28
+ return rb_class_new_instance(3, rb_constructor_args, cNokogiriHtmlEntityDescription);
23
29
  }
24
30
 
25
- void init_html_entity_lookup()
31
+ void
32
+ noko_init_html_entity_lookup()
26
33
  {
27
- VALUE nokogiri = rb_define_module("Nokogiri");
28
- VALUE html = rb_define_module_under(nokogiri, "HTML");
29
- VALUE klass = rb_define_class_under(html, "EntityLookup", rb_cObject);
34
+ cNokogiriHtmlEntityLookup = rb_define_class_under(mNokogiriHtml, "EntityLookup", rb_cObject);
30
35
 
31
- rb_define_method(klass, "get", get, 1);
36
+ rb_define_method(cNokogiriHtmlEntityLookup, "get", get, 1);
32
37
  }
@@ -1,8 +1,9 @@
1
- #include <html_sax_parser_context.h>
1
+ #include <nokogiri.h>
2
2
 
3
3
  VALUE cNokogiriHtmlSaxParserContext ;
4
4
 
5
- static void deallocate(xmlParserCtxtPtr ctxt)
5
+ static void
6
+ deallocate(xmlParserCtxtPtr ctxt)
6
7
  {
7
8
  NOKOGIRI_DEBUG_START(ctxt);
8
9
 
@@ -16,103 +17,103 @@ static void deallocate(xmlParserCtxtPtr ctxt)
16
17
  static VALUE
17
18
  parse_memory(VALUE klass, VALUE data, VALUE encoding)
18
19
  {
19
- htmlParserCtxtPtr ctxt;
20
-
21
- if (NIL_P(data))
22
- rb_raise(rb_eArgError, "data cannot be nil");
23
- if (!(int)RSTRING_LEN(data))
24
- rb_raise(rb_eRuntimeError, "data cannot be empty");
25
-
26
- ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
27
- (int)RSTRING_LEN(data));
28
- if (ctxt->sax) {
29
- xmlFree(ctxt->sax);
30
- ctxt->sax = NULL;
20
+ htmlParserCtxtPtr ctxt;
21
+
22
+ if (NIL_P(data)) {
23
+ rb_raise(rb_eArgError, "data cannot be nil");
24
+ }
25
+ if (!(int)RSTRING_LEN(data)) {
26
+ rb_raise(rb_eRuntimeError, "data cannot be empty");
27
+ }
28
+
29
+ ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
30
+ (int)RSTRING_LEN(data));
31
+ if (ctxt->sax) {
32
+ xmlFree(ctxt->sax);
33
+ ctxt->sax = NULL;
34
+ }
35
+
36
+ if (RTEST(encoding)) {
37
+ xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValueCStr(encoding));
38
+ if (enc != NULL) {
39
+ xmlSwitchToEncoding(ctxt, enc);
40
+ if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
41
+ rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
42
+ StringValueCStr(encoding));
43
+ }
31
44
  }
45
+ }
32
46
 
33
- if (RTEST(encoding)) {
34
- xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValueCStr(encoding));
35
- if (enc != NULL) {
36
- xmlSwitchToEncoding(ctxt, enc);
37
- if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
38
- rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
39
- StringValueCStr(encoding));
40
- }
41
- }
42
- }
43
-
44
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
47
+ return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
45
48
  }
46
49
 
47
- static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
50
+ static VALUE
51
+ parse_file(VALUE klass, VALUE filename, VALUE encoding)
48
52
  {
49
53
  htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(
50
- StringValueCStr(filename),
51
- StringValueCStr(encoding)
52
- );
54
+ StringValueCStr(filename),
55
+ StringValueCStr(encoding)
56
+ );
53
57
  return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
54
58
  }
55
59
 
56
60
  static VALUE
57
61
  parse_doc(VALUE ctxt_val)
58
62
  {
59
- htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
60
- htmlParseDocument(ctxt);
61
- return Qnil;
63
+ htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
64
+ htmlParseDocument(ctxt);
65
+ return Qnil;
62
66
  }
63
67
 
64
68
  static VALUE
65
69
  parse_doc_finalize(VALUE ctxt_val)
66
70
  {
67
- htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
71
+ htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
68
72
 
69
- if (ctxt->myDoc)
70
- xmlFreeDoc(ctxt->myDoc);
73
+ if (ctxt->myDoc) {
74
+ xmlFreeDoc(ctxt->myDoc);
75
+ }
71
76
 
72
- NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
73
- return Qnil;
77
+ NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
78
+ return Qnil;
74
79
  }
75
80
 
76
81
  static VALUE
77
82
  parse_with(VALUE self, VALUE sax_handler)
78
83
  {
79
- htmlParserCtxtPtr ctxt;
80
- htmlSAXHandlerPtr sax;
84
+ htmlParserCtxtPtr ctxt;
85
+ htmlSAXHandlerPtr sax;
81
86
 
82
- if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser))
83
- rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
87
+ if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
88
+ rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
89
+ }
84
90
 
85
- Data_Get_Struct(self, htmlParserCtxt, ctxt);
86
- Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
91
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
92
+ Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
87
93
 
88
- /* Free the sax handler since we'll assign our own */
89
- if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
90
- xmlFree(ctxt->sax);
94
+ /* Free the sax handler since we'll assign our own */
95
+ if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) {
96
+ xmlFree(ctxt->sax);
97
+ }
91
98
 
92
- ctxt->sax = sax;
93
- ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
99
+ ctxt->sax = sax;
100
+ ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
94
101
 
95
- xmlSetStructuredErrorFunc(NULL, NULL);
102
+ xmlSetStructuredErrorFunc(NULL, NULL);
96
103
 
97
- rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
104
+ rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
98
105
 
99
- return self;
106
+ return self;
100
107
  }
101
108
 
102
- void init_html_sax_parser_context()
109
+ void
110
+ noko_init_html_sax_parser_context()
103
111
  {
104
- VALUE nokogiri = rb_define_module("Nokogiri");
105
- VALUE xml = rb_define_module_under(nokogiri, "XML");
106
- VALUE html = rb_define_module_under(nokogiri, "HTML");
107
- VALUE sax = rb_define_module_under(xml, "SAX");
108
- VALUE hsax = rb_define_module_under(html, "SAX");
109
- VALUE pc = rb_define_class_under(sax, "ParserContext", rb_cObject);
110
- VALUE klass = rb_define_class_under(hsax, "ParserContext", pc);
111
-
112
- cNokogiriHtmlSaxParserContext = klass;
112
+ assert(cNokogiriXmlSaxParserContext);
113
+ cNokogiriHtmlSaxParserContext = rb_define_class_under(mNokogiriHtmlSax, "ParserContext", cNokogiriXmlSaxParserContext);
113
114
 
114
- rb_define_singleton_method(klass, "memory", parse_memory, 2);
115
- rb_define_singleton_method(klass, "file", parse_file, 2);
115
+ rb_define_singleton_method(cNokogiriHtmlSaxParserContext, "memory", parse_memory, 2);
116
+ rb_define_singleton_method(cNokogiriHtmlSaxParserContext, "file", parse_file, 2);
116
117
 
117
- rb_define_method(klass, "parse_with", parse_with, 1);
118
+ rb_define_method(cNokogiriHtmlSaxParserContext, "parse_with", parse_with, 1);
118
119
  }