nokogiri 1.11.0.rc3-java → 1.11.4-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (187) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +168 -91
  6. data/dependencies.yml +12 -12
  7. data/ext/java/nokogiri/EncodingHandler.java +76 -89
  8. data/ext/java/nokogiri/HtmlDocument.java +135 -144
  9. data/ext/java/nokogiri/HtmlElementDescription.java +102 -117
  10. data/ext/java/nokogiri/HtmlEntityLookup.java +33 -60
  11. data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -222
  12. data/ext/java/nokogiri/HtmlSaxPushParser.java +162 -169
  13. data/ext/java/nokogiri/NokogiriService.java +595 -556
  14. data/ext/java/nokogiri/XmlAttr.java +118 -126
  15. data/ext/java/nokogiri/XmlAttributeDecl.java +95 -106
  16. data/ext/java/nokogiri/XmlCdata.java +35 -58
  17. data/ext/java/nokogiri/XmlComment.java +46 -67
  18. data/ext/java/nokogiri/XmlDocument.java +645 -572
  19. data/ext/java/nokogiri/XmlDocumentFragment.java +125 -137
  20. data/ext/java/nokogiri/XmlDtd.java +448 -414
  21. data/ext/java/nokogiri/XmlElement.java +23 -48
  22. data/ext/java/nokogiri/XmlElementContent.java +343 -316
  23. data/ext/java/nokogiri/XmlElementDecl.java +124 -125
  24. data/ext/java/nokogiri/XmlEntityDecl.java +119 -127
  25. data/ext/java/nokogiri/XmlEntityReference.java +49 -72
  26. data/ext/java/nokogiri/XmlNamespace.java +175 -175
  27. data/ext/java/nokogiri/XmlNode.java +1843 -1622
  28. data/ext/java/nokogiri/XmlNodeSet.java +361 -331
  29. data/ext/java/nokogiri/XmlProcessingInstruction.java +47 -69
  30. data/ext/java/nokogiri/XmlReader.java +513 -450
  31. data/ext/java/nokogiri/XmlRelaxng.java +89 -101
  32. data/ext/java/nokogiri/XmlSaxParserContext.java +328 -310
  33. data/ext/java/nokogiri/XmlSaxPushParser.java +227 -220
  34. data/ext/java/nokogiri/XmlSchema.java +335 -242
  35. data/ext/java/nokogiri/XmlSyntaxError.java +113 -119
  36. data/ext/java/nokogiri/XmlText.java +55 -76
  37. data/ext/java/nokogiri/XmlXpathContext.java +242 -210
  38. data/ext/java/nokogiri/XsltStylesheet.java +280 -269
  39. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  40. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +201 -190
  41. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  42. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  43. data/ext/java/nokogiri/internals/NokogiriDomParser.java +63 -80
  44. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  45. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +27 -52
  46. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -286
  47. data/ext/java/nokogiri/internals/NokogiriHelpers.java +736 -652
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +184 -173
  49. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +79 -89
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +64 -79
  51. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +84 -99
  52. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +48 -65
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +119 -78
  54. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -54
  55. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +23 -46
  56. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +55 -72
  57. data/ext/java/nokogiri/internals/ParserContext.java +206 -211
  58. data/ext/java/nokogiri/internals/ReaderNode.java +478 -403
  59. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -739
  60. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +31 -54
  61. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  62. data/ext/java/nokogiri/internals/XmlDeclHandler.java +3 -34
  63. data/ext/java/nokogiri/internals/XmlDomParserContext.java +206 -207
  64. data/ext/java/nokogiri/internals/XmlSaxParser.java +22 -47
  65. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  66. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  67. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  68. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  78. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  81. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  82. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  83. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  84. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  85. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  86. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  87. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  88. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  89. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  90. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  91. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  93. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -574
  94. data/ext/nokogiri/depend +37 -358
  95. data/ext/nokogiri/extconf.rb +581 -374
  96. data/ext/nokogiri/html_document.c +78 -82
  97. data/ext/nokogiri/html_element_description.c +84 -71
  98. data/ext/nokogiri/html_entity_lookup.c +21 -16
  99. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  100. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  101. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  102. data/ext/nokogiri/nokogiri.c +192 -93
  103. data/ext/nokogiri/test_global_handlers.c +40 -0
  104. data/ext/nokogiri/xml_attr.c +15 -15
  105. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  106. data/ext/nokogiri/xml_cdata.c +13 -18
  107. data/ext/nokogiri/xml_comment.c +19 -26
  108. data/ext/nokogiri/xml_document.c +246 -188
  109. data/ext/nokogiri/xml_document_fragment.c +13 -15
  110. data/ext/nokogiri/xml_dtd.c +54 -48
  111. data/ext/nokogiri/xml_element_content.c +30 -27
  112. data/ext/nokogiri/xml_element_decl.c +22 -22
  113. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  114. data/ext/nokogiri/xml_entity_decl.c +32 -30
  115. data/ext/nokogiri/xml_entity_reference.c +16 -18
  116. data/ext/nokogiri/xml_namespace.c +56 -49
  117. data/ext/nokogiri/xml_node.c +371 -320
  118. data/ext/nokogiri/xml_node_set.c +168 -156
  119. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  120. data/ext/nokogiri/xml_reader.c +191 -157
  121. data/ext/nokogiri/xml_relax_ng.c +52 -28
  122. data/ext/nokogiri/xml_sax_parser.c +118 -118
  123. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  124. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  125. data/ext/nokogiri/xml_schema.c +95 -47
  126. data/ext/nokogiri/xml_syntax_error.c +42 -21
  127. data/ext/nokogiri/xml_text.c +13 -17
  128. data/ext/nokogiri/xml_xpath_context.c +206 -123
  129. data/ext/nokogiri/xslt_stylesheet.c +158 -161
  130. data/lib/nokogiri.rb +3 -7
  131. data/lib/nokogiri/css/parser.rb +3 -3
  132. data/lib/nokogiri/css/parser.y +2 -2
  133. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  134. data/lib/nokogiri/extension.rb +26 -0
  135. data/lib/nokogiri/html/document.rb +12 -26
  136. data/lib/nokogiri/html/document_fragment.rb +15 -15
  137. data/lib/nokogiri/nokogiri.jar +0 -0
  138. data/lib/nokogiri/version.rb +2 -149
  139. data/lib/nokogiri/version/constant.rb +5 -0
  140. data/lib/nokogiri/version/info.rb +205 -0
  141. data/lib/nokogiri/xml/document.rb +91 -35
  142. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  143. data/lib/nokogiri/xml/node.rb +89 -69
  144. data/lib/nokogiri/xml/parse_options.rb +6 -0
  145. data/lib/nokogiri/xml/reader.rb +2 -9
  146. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  147. data/lib/nokogiri/xml/schema.rb +12 -4
  148. data/lib/nokogiri/xml/searchable.rb +3 -1
  149. data/lib/nokogiri/xml/xpath.rb +1 -3
  150. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  151. metadata +86 -177
  152. data/ext/nokogiri/html_document.h +0 -10
  153. data/ext/nokogiri/html_element_description.h +0 -10
  154. data/ext/nokogiri/html_entity_lookup.h +0 -8
  155. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  156. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  157. data/ext/nokogiri/nokogiri.h +0 -134
  158. data/ext/nokogiri/xml_attr.h +0 -9
  159. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  160. data/ext/nokogiri/xml_cdata.h +0 -9
  161. data/ext/nokogiri/xml_comment.h +0 -9
  162. data/ext/nokogiri/xml_document.h +0 -23
  163. data/ext/nokogiri/xml_document_fragment.h +0 -10
  164. data/ext/nokogiri/xml_dtd.h +0 -10
  165. data/ext/nokogiri/xml_element_content.h +0 -10
  166. data/ext/nokogiri/xml_element_decl.h +0 -9
  167. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  168. data/ext/nokogiri/xml_entity_decl.h +0 -10
  169. data/ext/nokogiri/xml_entity_reference.h +0 -9
  170. data/ext/nokogiri/xml_io.c +0 -63
  171. data/ext/nokogiri/xml_io.h +0 -11
  172. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  173. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  174. data/ext/nokogiri/xml_namespace.h +0 -14
  175. data/ext/nokogiri/xml_node.h +0 -13
  176. data/ext/nokogiri/xml_node_set.h +0 -12
  177. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  178. data/ext/nokogiri/xml_reader.h +0 -10
  179. data/ext/nokogiri/xml_relax_ng.h +0 -9
  180. data/ext/nokogiri/xml_sax_parser.h +0 -39
  181. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  182. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  183. data/ext/nokogiri/xml_schema.h +0 -9
  184. data/ext/nokogiri/xml_syntax_error.h +0 -13
  185. data/ext/nokogiri/xml_text.h +0 -9
  186. data/ext/nokogiri/xml_xpath_context.h +0 -10
  187. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -1,4 +1,6 @@
1
- #include <html_entity_lookup.h>
1
+ #include <nokogiri.h>
2
+
3
+ static VALUE cNokogiriHtmlEntityLookup;
2
4
 
3
5
  /*
4
6
  * call-seq:
@@ -6,27 +8,30 @@
6
8
  *
7
9
  * Get the HTML::EntityDescription for +key+
8
10
  */
9
- static VALUE get(VALUE self, VALUE key)
11
+ static VALUE
12
+ get(VALUE _, VALUE rb_entity_name)
10
13
  {
11
- const htmlEntityDesc * desc =
12
- htmlEntityLookup((const xmlChar *)StringValueCStr(key));
13
- VALUE klass, args[3];
14
+ VALUE cNokogiriHtmlEntityDescription;
15
+ const htmlEntityDesc *c_entity_desc;
16
+ VALUE rb_constructor_args[3];
14
17
 
15
- if(NULL == desc) return Qnil;
16
- klass = rb_const_get(mNokogiriHtml, rb_intern("EntityDescription"));
18
+ c_entity_desc = htmlEntityLookup((const xmlChar *)StringValueCStr(rb_entity_name));
19
+ if (NULL == c_entity_desc) {
20
+ return Qnil;
21
+ }
17
22
 
18
- args[0] = INT2NUM((long)desc->value);
19
- args[1] = NOKOGIRI_STR_NEW2(desc->name);
20
- args[2] = NOKOGIRI_STR_NEW2(desc->desc);
23
+ rb_constructor_args[0] = INT2NUM((long)c_entity_desc->value);
24
+ rb_constructor_args[1] = NOKOGIRI_STR_NEW2(c_entity_desc->name);
25
+ rb_constructor_args[2] = NOKOGIRI_STR_NEW2(c_entity_desc->desc);
21
26
 
22
- return rb_class_new_instance(3, args, klass);
27
+ cNokogiriHtmlEntityDescription = rb_const_get_at(mNokogiriHtml, rb_intern("EntityDescription"));
28
+ return rb_class_new_instance(3, rb_constructor_args, cNokogiriHtmlEntityDescription);
23
29
  }
24
30
 
25
- void init_html_entity_lookup()
31
+ void
32
+ noko_init_html_entity_lookup()
26
33
  {
27
- VALUE nokogiri = rb_define_module("Nokogiri");
28
- VALUE html = rb_define_module_under(nokogiri, "HTML");
29
- VALUE klass = rb_define_class_under(html, "EntityLookup", rb_cObject);
34
+ cNokogiriHtmlEntityLookup = rb_define_class_under(mNokogiriHtml, "EntityLookup", rb_cObject);
30
35
 
31
- rb_define_method(klass, "get", get, 1);
36
+ rb_define_method(cNokogiriHtmlEntityLookup, "get", get, 1);
32
37
  }
@@ -1,116 +1,119 @@
1
- #include <html_sax_parser_context.h>
1
+ #include <nokogiri.h>
2
2
 
3
3
  VALUE cNokogiriHtmlSaxParserContext ;
4
4
 
5
- static void deallocate(xmlParserCtxtPtr ctxt)
5
+ static void
6
+ deallocate(xmlParserCtxtPtr ctxt)
6
7
  {
7
- NOKOGIRI_DEBUG_START(handler);
8
+ NOKOGIRI_DEBUG_START(ctxt);
8
9
 
9
10
  ctxt->sax = NULL;
10
11
 
11
12
  htmlFreeParserCtxt(ctxt);
12
13
 
13
- NOKOGIRI_DEBUG_END(handler);
14
+ NOKOGIRI_DEBUG_END(ctxt);
14
15
  }
15
16
 
16
17
  static VALUE
17
18
  parse_memory(VALUE klass, VALUE data, VALUE encoding)
18
19
  {
19
- htmlParserCtxtPtr ctxt;
20
-
21
- if (NIL_P(data))
22
- rb_raise(rb_eArgError, "data cannot be nil");
23
- if (!(int)RSTRING_LEN(data))
24
- rb_raise(rb_eRuntimeError, "data cannot be empty");
25
-
26
- ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
27
- (int)RSTRING_LEN(data));
28
- if (ctxt->sax) {
29
- xmlFree(ctxt->sax);
30
- ctxt->sax = NULL;
20
+ htmlParserCtxtPtr ctxt;
21
+
22
+ if (NIL_P(data)) {
23
+ rb_raise(rb_eArgError, "data cannot be nil");
24
+ }
25
+ if (!(int)RSTRING_LEN(data)) {
26
+ rb_raise(rb_eRuntimeError, "data cannot be empty");
27
+ }
28
+
29
+ ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
30
+ (int)RSTRING_LEN(data));
31
+ if (ctxt->sax) {
32
+ xmlFree(ctxt->sax);
33
+ ctxt->sax = NULL;
34
+ }
35
+
36
+ if (RTEST(encoding)) {
37
+ xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValueCStr(encoding));
38
+ if (enc != NULL) {
39
+ xmlSwitchToEncoding(ctxt, enc);
40
+ if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
41
+ rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
42
+ StringValueCStr(encoding));
43
+ }
31
44
  }
45
+ }
32
46
 
33
- if (RTEST(encoding)) {
34
- xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValueCStr(encoding));
35
- if (enc != NULL) {
36
- xmlSwitchToEncoding(ctxt, enc);
37
- if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
38
- rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
39
- StringValueCStr(encoding));
40
- }
41
- }
42
- }
43
-
44
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
47
+ return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
45
48
  }
46
49
 
47
- static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
50
+ static VALUE
51
+ parse_file(VALUE klass, VALUE filename, VALUE encoding)
48
52
  {
49
53
  htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(
50
- StringValueCStr(filename),
51
- StringValueCStr(encoding)
52
- );
54
+ StringValueCStr(filename),
55
+ StringValueCStr(encoding)
56
+ );
53
57
  return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
54
58
  }
55
59
 
56
60
  static VALUE
57
61
  parse_doc(VALUE ctxt_val)
58
62
  {
59
- htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
60
- htmlParseDocument(ctxt);
61
- return Qnil;
63
+ htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
64
+ htmlParseDocument(ctxt);
65
+ return Qnil;
62
66
  }
63
67
 
64
68
  static VALUE
65
69
  parse_doc_finalize(VALUE ctxt_val)
66
70
  {
67
- htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
71
+ htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
68
72
 
69
- if (ctxt->myDoc)
70
- xmlFreeDoc(ctxt->myDoc);
73
+ if (ctxt->myDoc) {
74
+ xmlFreeDoc(ctxt->myDoc);
75
+ }
71
76
 
72
- NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
73
- return Qnil;
77
+ NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
78
+ return Qnil;
74
79
  }
75
80
 
76
81
  static VALUE
77
82
  parse_with(VALUE self, VALUE sax_handler)
78
83
  {
79
- htmlParserCtxtPtr ctxt;
80
- htmlSAXHandlerPtr sax;
84
+ htmlParserCtxtPtr ctxt;
85
+ htmlSAXHandlerPtr sax;
86
+
87
+ if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
88
+ rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
89
+ }
81
90
 
82
- if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser))
83
- rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
91
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
92
+ Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
84
93
 
85
- Data_Get_Struct(self, htmlParserCtxt, ctxt);
86
- Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
94
+ /* Free the sax handler since we'll assign our own */
95
+ if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) {
96
+ xmlFree(ctxt->sax);
97
+ }
87
98
 
88
- /* Free the sax handler since we'll assign our own */
89
- if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
90
- xmlFree(ctxt->sax);
99
+ ctxt->sax = sax;
100
+ ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
91
101
 
92
- ctxt->sax = sax;
93
- ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
102
+ xmlSetStructuredErrorFunc(NULL, NULL);
94
103
 
95
- rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
104
+ rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
96
105
 
97
- return self;
106
+ return self;
98
107
  }
99
108
 
100
- void init_html_sax_parser_context()
109
+ void
110
+ noko_init_html_sax_parser_context()
101
111
  {
102
- VALUE nokogiri = rb_define_module("Nokogiri");
103
- VALUE xml = rb_define_module_under(nokogiri, "XML");
104
- VALUE html = rb_define_module_under(nokogiri, "HTML");
105
- VALUE sax = rb_define_module_under(xml, "SAX");
106
- VALUE hsax = rb_define_module_under(html, "SAX");
107
- VALUE pc = rb_define_class_under(sax, "ParserContext", rb_cObject);
108
- VALUE klass = rb_define_class_under(hsax, "ParserContext", pc);
109
-
110
- cNokogiriHtmlSaxParserContext = klass;
112
+ assert(cNokogiriXmlSaxParserContext);
113
+ cNokogiriHtmlSaxParserContext = rb_define_class_under(mNokogiriHtmlSax, "ParserContext", cNokogiriXmlSaxParserContext);
111
114
 
112
- rb_define_singleton_method(klass, "memory", parse_memory, 2);
113
- rb_define_singleton_method(klass, "file", parse_file, 2);
115
+ rb_define_singleton_method(cNokogiriHtmlSaxParserContext, "memory", parse_memory, 2);
116
+ rb_define_singleton_method(cNokogiriHtmlSaxParserContext, "file", parse_file, 2);
114
117
 
115
- rb_define_method(klass, "parse_with", parse_with, 1);
118
+ rb_define_method(cNokogiriHtmlSaxParserContext, "parse_with", parse_with, 1);
116
119
  }
@@ -1,4 +1,6 @@
1
- #include <html_sax_push_parser.h>
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriHtmlSaxPushParser;
2
4
 
3
5
  /*
4
6
  * call-seq:
@@ -6,25 +8,32 @@
6
8
  *
7
9
  * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
8
10
  */
9
- static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
11
+ static VALUE
12
+ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
10
13
  {
11
14
  xmlParserCtxtPtr ctx;
12
- const char * chunk = NULL;
13
- int size = 0;
14
-
15
+ const char *chunk = NULL;
16
+ int size = 0;
17
+ int status = 0;
18
+ libxmlStructuredErrorHandlerState handler_state;
15
19
 
16
20
  Data_Get_Struct(self, xmlParserCtxt, ctx);
17
21
 
18
- if(Qnil != _chunk) {
22
+ if (Qnil != _chunk) {
19
23
  chunk = StringValuePtr(_chunk);
20
24
  size = (int)RSTRING_LEN(_chunk);
21
25
  }
22
26
 
23
- if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
24
- if (!(ctx->options & XML_PARSE_RECOVER)) {
25
- xmlErrorPtr e = xmlCtxtGetLastError(ctx);
26
- Nokogiri_error_raise(NULL, e);
27
- }
27
+ Nokogiri_structured_error_func_save_and_set(&handler_state, NULL, NULL);
28
+
29
+ status = htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0);
30
+
31
+ Nokogiri_structured_error_func_restore(&handler_state);
32
+
33
+ if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
34
+ // TODO: there appear to be no tests for this block
35
+ xmlErrorPtr e = xmlCtxtGetLastError(ctx);
36
+ Nokogiri_error_raise(NULL, e);
28
37
  }
29
38
 
30
39
  return self;
@@ -36,34 +45,37 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
36
45
  *
37
46
  * Initialize the push parser with +xml_sax+ using +filename+
38
47
  */
39
- static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
40
- VALUE encoding)
48
+ static VALUE
49
+ initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
50
+ VALUE encoding)
41
51
  {
42
52
  htmlSAXHandlerPtr sax;
43
- const char * filename = NULL;
53
+ const char *filename = NULL;
44
54
  htmlParserCtxtPtr ctx;
45
55
  xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
46
56
 
47
57
  Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
48
58
 
49
- if(_filename != Qnil) filename = StringValueCStr(_filename);
59
+ if (_filename != Qnil) { filename = StringValueCStr(_filename); }
50
60
 
51
61
  if (!NIL_P(encoding)) {
52
62
  enc = xmlParseCharEncoding(StringValueCStr(encoding));
53
- if (enc == XML_CHAR_ENCODING_ERROR)
63
+ if (enc == XML_CHAR_ENCODING_ERROR) {
54
64
  rb_raise(rb_eArgError, "Unsupported Encoding");
65
+ }
55
66
  }
56
67
 
57
68
  ctx = htmlCreatePushParserCtxt(
58
- sax,
59
- NULL,
60
- NULL,
61
- 0,
62
- filename,
63
- enc
64
- );
65
- if(ctx == NULL)
69
+ sax,
70
+ NULL,
71
+ NULL,
72
+ 0,
73
+ filename,
74
+ enc
75
+ );
76
+ if (ctx == NULL) {
66
77
  rb_raise(rb_eRuntimeError, "Could not create a parser context");
78
+ }
67
79
 
68
80
  ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
69
81
 
@@ -72,16 +84,12 @@ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
72
84
  return self;
73
85
  }
74
86
 
75
- VALUE cNokogiriHtmlSaxPushParser;
76
- void init_html_sax_push_parser()
87
+ void
88
+ noko_init_html_sax_push_parser()
77
89
  {
78
- VALUE nokogiri = rb_define_module("Nokogiri");
79
- VALUE html = rb_define_module_under(nokogiri, "HTML");
80
- VALUE sax = rb_define_module_under(html, "SAX");
81
- VALUE klass = rb_define_class_under(sax, "PushParser", cNokogiriXmlSaxPushParser);
82
-
83
- cNokogiriHtmlSaxPushParser = klass;
90
+ assert(cNokogiriXmlSaxPushParser);
91
+ cNokogiriHtmlSaxPushParser = rb_define_class_under(mNokogiriHtmlSax, "PushParser", cNokogiriXmlSaxPushParser);
84
92
 
85
- rb_define_private_method(klass, "initialize_native", initialize_native, 3);
86
- rb_define_private_method(klass, "native_write", native_write, 2);
93
+ rb_define_private_method(cNokogiriHtmlSaxPushParser, "initialize_native", initialize_native, 3);
94
+ rb_define_private_method(cNokogiriHtmlSaxPushParser, "native_write", native_write, 2);
87
95
  }
@@ -0,0 +1,121 @@
1
+ #ifndef HAVE_XMLFIRSTELEMENTCHILD
2
+ #include <nokogiri.h>
3
+ /**
4
+ * xmlFirstElementChild:
5
+ * @parent: the parent node
6
+ *
7
+ * Finds the first child node of that element which is a Element node
8
+ * Note the handling of entities references is different than in
9
+ * the W3C DOM element traversal spec since we don't have back reference
10
+ * from entities content to entities references.
11
+ *
12
+ * Returns the first element child or NULL if not available
13
+ */
14
+ xmlNodePtr
15
+ xmlFirstElementChild(xmlNodePtr parent)
16
+ {
17
+ xmlNodePtr cur = NULL;
18
+
19
+ if (parent == NULL) {
20
+ return (NULL);
21
+ }
22
+ switch (parent->type) {
23
+ case XML_ELEMENT_NODE:
24
+ case XML_ENTITY_NODE:
25
+ case XML_DOCUMENT_NODE:
26
+ case XML_HTML_DOCUMENT_NODE:
27
+ cur = parent->children;
28
+ break;
29
+ default:
30
+ return (NULL);
31
+ }
32
+ while (cur != NULL) {
33
+ if (cur->type == XML_ELEMENT_NODE) {
34
+ return (cur);
35
+ }
36
+ cur = cur->next;
37
+ }
38
+ return (NULL);
39
+ }
40
+
41
+ /**
42
+ * xmlNextElementSibling:
43
+ * @node: the current node
44
+ *
45
+ * Finds the first closest next sibling of the node which is an
46
+ * element node.
47
+ * Note the handling of entities references is different than in
48
+ * the W3C DOM element traversal spec since we don't have back reference
49
+ * from entities content to entities references.
50
+ *
51
+ * Returns the next element sibling or NULL if not available
52
+ */
53
+ xmlNodePtr
54
+ xmlNextElementSibling(xmlNodePtr node)
55
+ {
56
+ if (node == NULL) {
57
+ return (NULL);
58
+ }
59
+ switch (node->type) {
60
+ case XML_ELEMENT_NODE:
61
+ case XML_TEXT_NODE:
62
+ case XML_CDATA_SECTION_NODE:
63
+ case XML_ENTITY_REF_NODE:
64
+ case XML_ENTITY_NODE:
65
+ case XML_PI_NODE:
66
+ case XML_COMMENT_NODE:
67
+ case XML_DTD_NODE:
68
+ case XML_XINCLUDE_START:
69
+ case XML_XINCLUDE_END:
70
+ node = node->next;
71
+ break;
72
+ default:
73
+ return (NULL);
74
+ }
75
+ while (node != NULL) {
76
+ if (node->type == XML_ELEMENT_NODE) {
77
+ return (node);
78
+ }
79
+ node = node->next;
80
+ }
81
+ return (NULL);
82
+ }
83
+
84
+ /**
85
+ * xmlLastElementChild:
86
+ * @parent: the parent node
87
+ *
88
+ * Finds the last child node of that element which is a Element node
89
+ * Note the handling of entities references is different than in
90
+ * the W3C DOM element traversal spec since we don't have back reference
91
+ * from entities content to entities references.
92
+ *
93
+ * Returns the last element child or NULL if not available
94
+ */
95
+ xmlNodePtr
96
+ xmlLastElementChild(xmlNodePtr parent)
97
+ {
98
+ xmlNodePtr cur = NULL;
99
+
100
+ if (parent == NULL) {
101
+ return (NULL);
102
+ }
103
+ switch (parent->type) {
104
+ case XML_ELEMENT_NODE:
105
+ case XML_ENTITY_NODE:
106
+ case XML_DOCUMENT_NODE:
107
+ case XML_HTML_DOCUMENT_NODE:
108
+ cur = parent->last;
109
+ break;
110
+ default:
111
+ return (NULL);
112
+ }
113
+ while (cur != NULL) {
114
+ if (cur->type == XML_ELEMENT_NODE) {
115
+ return (cur);
116
+ }
117
+ cur = cur->prev;
118
+ }
119
+ return (NULL);
120
+ }
121
+ #endif