nokogiri 1.5.0-java → 1.5.1-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (81) hide show
  1. data/CHANGELOG.ja.rdoc +56 -12
  2. data/CHANGELOG.rdoc +45 -0
  3. data/C_CODING_STYLE.rdoc +27 -0
  4. data/Manifest.txt +4 -0
  5. data/README.rdoc +11 -7
  6. data/Rakefile +44 -26
  7. data/bin/nokogiri +10 -2
  8. data/ext/java/nokogiri/HtmlDocument.java +37 -2
  9. data/ext/java/nokogiri/NokogiriService.java +10 -2
  10. data/ext/java/nokogiri/XmlAttr.java +1 -1
  11. data/ext/java/nokogiri/XmlDocument.java +68 -11
  12. data/ext/java/nokogiri/XmlDocumentFragment.java +16 -5
  13. data/ext/java/nokogiri/XmlElement.java +0 -40
  14. data/ext/java/nokogiri/XmlNamespace.java +8 -1
  15. data/ext/java/nokogiri/XmlNode.java +131 -27
  16. data/ext/java/nokogiri/XmlNodeSet.java +4 -1
  17. data/ext/java/nokogiri/XmlSaxParserContext.java +2 -13
  18. data/ext/java/nokogiri/XmlXpathContext.java +4 -1
  19. data/ext/java/nokogiri/XsltStylesheet.java +198 -37
  20. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +40 -2
  21. data/ext/java/nokogiri/internals/NokogiriHelpers.java +82 -9
  22. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +4 -3
  23. data/ext/java/nokogiri/internals/ParserContext.java +33 -3
  24. data/ext/java/nokogiri/internals/SaveContextVisitor.java +203 -12
  25. data/ext/java/nokogiri/internals/XmlDomParser.java +33 -2
  26. data/ext/java/nokogiri/internals/XmlDomParserContext.java +32 -12
  27. data/ext/nokogiri/extconf.rb +11 -3
  28. data/ext/nokogiri/html_document.c +16 -0
  29. data/ext/nokogiri/html_sax_parser_context.c +59 -37
  30. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  31. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  32. data/ext/nokogiri/nokogiri.c +6 -8
  33. data/ext/nokogiri/nokogiri.h +3 -0
  34. data/ext/nokogiri/xml_document.c +101 -3
  35. data/ext/nokogiri/xml_document.h +3 -3
  36. data/ext/nokogiri/xml_node.c +150 -58
  37. data/ext/nokogiri/xml_node_set.c +169 -120
  38. data/ext/nokogiri/xml_node_set.h +5 -0
  39. data/ext/nokogiri/xml_sax_parser_context.c +64 -41
  40. data/ext/nokogiri/xml_text.c +2 -0
  41. data/ext/nokogiri/xml_xpath_context.c +30 -24
  42. data/ext/nokogiri/xslt_stylesheet.c +62 -16
  43. data/ext/nokogiri/xslt_stylesheet.h +5 -0
  44. data/lib/nokogiri/css/parser.rb +163 -157
  45. data/lib/nokogiri/css/parser.y +6 -3
  46. data/lib/nokogiri/css/tokenizer.rb +1 -1
  47. data/lib/nokogiri/css/tokenizer.rex +1 -1
  48. data/lib/nokogiri/html.rb +1 -0
  49. data/lib/nokogiri/html/document.rb +82 -42
  50. data/lib/nokogiri/html/sax/push_parser.rb +16 -0
  51. data/lib/nokogiri/nokogiri.jar +0 -0
  52. data/lib/nokogiri/version.rb +1 -1
  53. data/lib/nokogiri/xml.rb +6 -0
  54. data/lib/nokogiri/xml/builder.rb +7 -1
  55. data/lib/nokogiri/xml/document.rb +32 -17
  56. data/lib/nokogiri/xml/document_fragment.rb +6 -1
  57. data/lib/nokogiri/xml/node.rb +40 -9
  58. data/lib/nokogiri/xslt.rb +5 -1
  59. data/tasks/cross_compile.rb +1 -0
  60. data/tasks/nokogiri.org.rb +6 -0
  61. data/tasks/test.rb +1 -0
  62. data/test/css/test_xpath_visitor.rb +6 -0
  63. data/test/helper.rb +1 -0
  64. data/test/html/test_document.rb +26 -0
  65. data/test/html/test_document_fragment.rb +1 -2
  66. data/test/test_memory_leak.rb +81 -1
  67. data/test/test_xslt_transforms.rb +152 -123
  68. data/test/xml/test_builder.rb +24 -2
  69. data/test/xml/test_c14n.rb +151 -0
  70. data/test/xml/test_document.rb +48 -0
  71. data/test/xml/test_namespace.rb +5 -0
  72. data/test/xml/test_node.rb +82 -1
  73. data/test/xml/test_node_attributes.rb +19 -0
  74. data/test/xml/test_node_inheritance.rb +32 -0
  75. data/test/xml/test_node_reparenting.rb +32 -0
  76. data/test/xml/test_node_set.rb +16 -8
  77. data/test/xml/test_reader_encoding.rb +16 -0
  78. data/test/xml/test_unparented_node.rb +32 -0
  79. data/test/xml/test_xinclude.rb +83 -0
  80. data/test/xml/test_xpath.rb +22 -0
  81. metadata +147 -123
@@ -35,8 +35,14 @@ package nokogiri.internals;
35
35
  import java.io.IOException;
36
36
 
37
37
  import nokogiri.XmlDocument;
38
+ import nokogiri.internals.ParserContext.Options;
38
39
 
39
40
  import org.apache.xerces.parsers.DOMParser;
41
+ import org.apache.xerces.parsers.XIncludeParserConfiguration;
42
+ import org.apache.xerces.xni.XMLResourceIdentifier;
43
+ import org.apache.xerces.xni.XNIException;
44
+ import org.apache.xerces.xni.parser.XMLEntityResolver;
45
+ import org.apache.xerces.xni.parser.XMLInputSource;
40
46
  import org.apache.xerces.xni.parser.XMLParserConfiguration;
41
47
  import org.cyberneko.dtd.DTDConfiguration;
42
48
  import org.w3c.dom.Document;
@@ -51,14 +57,21 @@ import org.xml.sax.SAXException;
51
57
  */
52
58
  public class XmlDomParser extends DOMParser {
53
59
  DOMParser dtd;
60
+ ParserContext.Options options;
54
61
 
55
- public XmlDomParser() {
62
+ public XmlDomParser(ParserContext.Options options) {
56
63
  super();
64
+ this.options = options;
57
65
 
58
66
  DTDConfiguration dtdConfig = new DTDConfiguration();
59
67
  dtd = new DOMParser(dtdConfig);
60
68
 
61
- XMLParserConfiguration config = getXMLParserConfiguration();
69
+ XMLParserConfiguration config;
70
+ if (options.xInclude) {
71
+ config = new XIncludeParserConfiguration();
72
+ } else {
73
+ config = getXMLParserConfiguration();
74
+ }
62
75
  config.setDTDHandler(dtdConfig);
63
76
  config.setDTDContentModelHandler(dtdConfig);
64
77
  }
@@ -66,6 +79,9 @@ public class XmlDomParser extends DOMParser {
66
79
  @Override
67
80
  public void parse(InputSource source) throws SAXException, IOException {
68
81
  dtd.reset();
82
+ if (options.xInclude) {
83
+ super.setEntityResolver(new NokogiriXInlcudeEntityResolver(source));
84
+ }
69
85
  super.parse(source);
70
86
  Document doc = getDocument();
71
87
  if (doc == null)
@@ -73,4 +89,19 @@ public class XmlDomParser extends DOMParser {
73
89
 
74
90
  doc.setUserData(XmlDocument.DTD_RAW_DOCUMENT, dtd.getDocument(), null);
75
91
  }
92
+
93
+ private class NokogiriXInlcudeEntityResolver implements org.xml.sax.EntityResolver {
94
+ InputSource source;
95
+ private NokogiriXInlcudeEntityResolver(InputSource source) {
96
+ this.source = source;
97
+ }
98
+
99
+ @Override
100
+ public InputSource resolveEntity(String publicId, String systemId)
101
+ throws SAXException, IOException {
102
+ if (systemId != null) source.setSystemId(systemId);
103
+ if (publicId != null) source.setPublicId(publicId);
104
+ return source;
105
+ }
106
+ }
76
107
  }
@@ -34,7 +34,6 @@ package nokogiri.internals;
34
34
 
35
35
  import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
36
36
  import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
37
-
38
37
  import java.io.ByteArrayInputStream;
39
38
  import java.io.IOException;
40
39
  import java.util.ArrayList;
@@ -73,6 +72,8 @@ public class XmlDomParserContext extends ParserContext {
73
72
  protected static final String FEATURE_INCLUDE_IGNORABLE_WHITESPACE =
74
73
  "http://apache.org/xml/features/dom/include-ignorable-whitespace";
75
74
  protected static final String FEATURE_VALIDATION = "http://xml.org/sax/features/validation";
75
+ private static final String XINCLUDE_FEATURE_ID = "http://apache.org/xml/features/xinclude";
76
+ private static final String SECURITY_MANAGER = "http://apache.org/xml/properties/security-manager";
76
77
 
77
78
  protected ParserContext.Options options;
78
79
  protected DOMParser parser;
@@ -102,9 +103,17 @@ public class XmlDomParserContext extends ParserContext {
102
103
  }
103
104
 
104
105
  protected void initParser(Ruby runtime) {
105
- parser = new XmlDomParser();
106
+ if (options.xInclude) {
107
+ System.setProperty("org.apache.xerces.xni.parser.XMLParserConfiguration",
108
+ "org.apache.xerces.parsers.XIncludeParserConfiguration");
109
+ }
110
+
111
+ parser = new XmlDomParser(options);
106
112
  parser.setErrorHandler(errorHandler);
107
113
 
114
+ // Fix for Issue#586. This limits entity expansion up to 100000 and nodes up to 3000.
115
+ setProperty(SECURITY_MANAGER, new org.apache.xerces.util.SecurityManager());
116
+
108
117
  if (options.noBlanks) {
109
118
  setFeature(FEATURE_INCLUDE_IGNORABLE_WHITESPACE, false);
110
119
  }
@@ -160,20 +169,30 @@ public class XmlDomParserContext extends ParserContext {
160
169
  doc.setInstanceVariable("@errors", errors);
161
170
  }
162
171
 
163
- public XmlDocument getDocumentWithErrorsOrRaiseException(ThreadContext context, Exception ex) {
172
+ public XmlDocument getDocumentWithErrorsOrRaiseException(ThreadContext context, RubyClass klazz, Exception ex) {
164
173
  if (options.recover) {
165
- XmlDocument doc = this.getNewEmptyDocument(context);
166
- this.addErrorsIfNecessary(context, doc);
174
+ XmlDocument xmlDocument = getInterruptedOrNewXmlDocument(context, klazz);
175
+ this.addErrorsIfNecessary(context, xmlDocument);
167
176
  XmlSyntaxError xmlSyntaxError = (XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(context.getRuntime(), getNokogiriClass(context.getRuntime(), "Nokogiri::XML::SyntaxError"));
168
177
  xmlSyntaxError.setException(ex);
169
- ((RubyArray) doc.getInstanceVariable("@errors")).append(xmlSyntaxError);
170
- return doc;
178
+ ((RubyArray) xmlDocument.getInstanceVariable("@errors")).append(xmlSyntaxError);
179
+ return xmlDocument;
171
180
  } else {
172
181
  XmlSyntaxError xmlSyntaxError = (XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(context.getRuntime(), getNokogiriClass(context.getRuntime(), "Nokogiri::XML::SyntaxError"));
173
182
  xmlSyntaxError.setException(ex);
174
183
  throw new RaiseException(xmlSyntaxError);
175
184
  }
176
185
  }
186
+
187
+ private XmlDocument getInterruptedOrNewXmlDocument(ThreadContext context, RubyClass klazz) {
188
+ Document document = parser.getDocument();
189
+ XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz);
190
+ if (document != null) {
191
+ xmlDocument.setDocumentNode(context, document);
192
+ }
193
+ xmlDocument.setEncoding(ruby_encoding);
194
+ return xmlDocument;
195
+ }
177
196
 
178
197
  protected XmlDocument getNewEmptyDocument(ThreadContext context) {
179
198
  IRubyObject[] args = new IRubyObject[0];
@@ -188,7 +207,7 @@ public class XmlDomParserContext extends ParserContext {
188
207
  RubyClass klazz,
189
208
  Document doc) {
190
209
  XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz);
191
- xmlDocument.setNode(context, doc);
210
+ xmlDocument.setDocumentNode(context, doc);
192
211
  xmlDocument.setEncoding(ruby_encoding);
193
212
 
194
213
  if (options.dtdLoad) {
@@ -202,18 +221,19 @@ public class XmlDomParserContext extends ParserContext {
202
221
  * Must call setInputSource() before this method.
203
222
  */
204
223
  public XmlDocument parse(ThreadContext context,
205
- IRubyObject klass,
224
+ IRubyObject klazz,
206
225
  IRubyObject url) {
226
+ XmlDocument xmlDoc;
207
227
  try {
208
228
  Document doc = do_parse();
209
- XmlDocument xmlDoc = wrapDocument(context, (RubyClass)klass, doc);
229
+ xmlDoc = wrapDocument(context, (RubyClass)klazz, doc);
210
230
  xmlDoc.setUrl(url);
211
231
  addErrorsIfNecessary(context, xmlDoc);
212
232
  return xmlDoc;
213
233
  } catch (SAXException e) {
214
- return getDocumentWithErrorsOrRaiseException(context, e);
234
+ return getDocumentWithErrorsOrRaiseException(context, (RubyClass)klazz, e);
215
235
  } catch (IOException e) {
216
- return getDocumentWithErrorsOrRaiseException(context, e);
236
+ return getDocumentWithErrorsOrRaiseException(context, (RubyClass)klazz, e);
217
237
  }
218
238
  }
219
239
 
@@ -17,7 +17,7 @@ end
17
17
  $CFLAGS << " #{ENV["CFLAGS"]}"
18
18
  $LIBS << " #{ENV["LIBS"]}"
19
19
 
20
- if RbConfig::CONFIG['target_os'] == 'mingw32' || RbConfig::CONFIG['target_os'] =~ /mswin32/
20
+ if RbConfig::CONFIG['target_os'] == 'mingw32' || RbConfig::CONFIG['target_os'] =~ /mswin/
21
21
  $CFLAGS << " -DXP_WIN -DXP_WIN32 -DUSE_INCLUDED_VASPRINTF"
22
22
  elsif RbConfig::CONFIG['target_os'] =~ /solaris/
23
23
  $CFLAGS << " -DUSE_INCLUDED_VASPRINTF"
@@ -30,10 +30,11 @@ if RbConfig::MAKEFILE_CONFIG['CC'] =~ /mingw/
30
30
  end
31
31
 
32
32
  if RbConfig::MAKEFILE_CONFIG['CC'] =~ /gcc/
33
- $CFLAGS << " -O3 -Wall -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
33
+ $CFLAGS << " -O3" unless $CFLAGS[/-O\d/]
34
+ $CFLAGS << " -Wall -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
34
35
  end
35
36
 
36
- if RbConfig::CONFIG['target_os'] =~ /mswin32/
37
+ if RbConfig::CONFIG['target_os'] =~ /mswin/
37
38
  lib_prefix = 'lib'
38
39
 
39
40
  # There's no default include/lib dir on Windows. Let's just add the Ruby ones
@@ -80,6 +81,13 @@ else
80
81
  '/usr/local/include/libxml2',
81
82
  File.join(INCLUDEDIR, "libxml2")
82
83
  ] + HEADER_DIRS
84
+
85
+ # If the user has homebrew installed, use the libxml2 inside homebrew
86
+ brew_prefix = `brew --prefix libxml2 2> /dev/null`.chomp
87
+ unless brew_prefix.empty?
88
+ LIB_DIRS.unshift File.join(brew_prefix, 'lib')
89
+ XML2_HEADER_DIRS.unshift File.join(brew_prefix, 'include/libxml2')
90
+ end
83
91
  end
84
92
 
85
93
  dir_config('zlib', HEADER_DIRS, LIB_DIRS)
@@ -1,5 +1,7 @@
1
1
  #include <html_document.h>
2
2
 
3
+ static ID id_encoding_found;
4
+
3
5
  /*
4
6
  * call-seq:
5
7
  * new
@@ -56,6 +58,18 @@ static VALUE read_io( VALUE klass,
56
58
  );
57
59
  xmlSetStructuredErrorFunc(NULL, NULL);
58
60
 
61
+ /*
62
+ * If EncodingFound has occurred in EncodingReader, make sure to do
63
+ * a cleanup and propagate the error.
64
+ */
65
+ if (rb_respond_to(io, id_encoding_found)) {
66
+ VALUE encoding_found = rb_funcall(io, id_encoding_found, 0);
67
+ if (!NIL_P(encoding_found)) {
68
+ xmlFreeDoc(doc);
69
+ rb_exc_raise(encoding_found);
70
+ }
71
+ }
72
+
59
73
  if(doc == NULL) {
60
74
  xmlErrorPtr error;
61
75
 
@@ -151,4 +165,6 @@ void init_html_document()
151
165
  rb_define_singleton_method(klass, "new", new, -1);
152
166
 
153
167
  rb_define_method(klass, "type", type, 0);
168
+
169
+ id_encoding_found = rb_intern("encoding_found");
154
170
  }
@@ -13,31 +13,35 @@ static void deallocate(xmlParserCtxtPtr ctxt)
13
13
  NOKOGIRI_DEBUG_END(handler);
14
14
  }
15
15
 
16
- static VALUE parse_memory(VALUE klass, VALUE data, VALUE encoding)
16
+ static VALUE
17
+ parse_memory(VALUE klass, VALUE data, VALUE encoding)
17
18
  {
18
- htmlParserCtxtPtr ctxt;
19
-
20
- if(NIL_P(data)) rb_raise(rb_eArgError, "data cannot be nil");
21
- if(!(int)RSTRING_LEN(data))
22
- rb_raise(rb_eRuntimeError, "data cannot be empty");
23
-
24
- ctxt = htmlCreateMemoryParserCtxt(
25
- StringValuePtr(data),
26
- (int)RSTRING_LEN(data)
27
- );
19
+ htmlParserCtxtPtr ctxt;
20
+
21
+ if (NIL_P(data))
22
+ rb_raise(rb_eArgError, "data cannot be nil");
23
+ if (!(int)RSTRING_LEN(data))
24
+ rb_raise(rb_eRuntimeError, "data cannot be empty");
25
+
26
+ ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
27
+ (int)RSTRING_LEN(data));
28
+ if (ctxt->sax) {
29
+ xmlFree(ctxt->sax);
30
+ ctxt->sax = NULL;
31
+ }
28
32
 
29
- if(RTEST(encoding)) {
30
- xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValuePtr(encoding));
31
- if(enc != NULL) {
32
- xmlSwitchToEncoding(ctxt, enc);
33
- if(ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
34
- rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
35
- StringValuePtr(encoding));
36
- }
33
+ if (RTEST(encoding)) {
34
+ xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValuePtr(encoding));
35
+ if (enc != NULL) {
36
+ xmlSwitchToEncoding(ctxt, enc);
37
+ if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
38
+ rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
39
+ StringValuePtr(encoding));
40
+ }
41
+ }
37
42
  }
38
- }
39
43
 
40
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
44
+ return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
41
45
  }
42
46
 
43
47
  static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
@@ -49,30 +53,48 @@ static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
49
53
  return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
50
54
  }
51
55
 
52
- static VALUE parse_with(VALUE self, VALUE sax_handler)
56
+ static VALUE
57
+ parse_doc(VALUE ctxt_val)
58
+ {
59
+ htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
60
+ htmlParseDocument(ctxt);
61
+ return Qnil;
62
+ }
63
+
64
+ static VALUE
65
+ parse_doc_finalize(VALUE ctxt_val)
53
66
  {
54
- htmlParserCtxtPtr ctxt;
55
- htmlSAXHandlerPtr sax;
67
+ htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
68
+
69
+ if (ctxt->myDoc)
70
+ xmlFreeDoc(ctxt->myDoc);
56
71
 
57
- if(!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser))
58
- rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
72
+ NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
73
+ return Qnil;
74
+ }
75
+
76
+ static VALUE
77
+ parse_with(VALUE self, VALUE sax_handler)
78
+ {
79
+ htmlParserCtxtPtr ctxt;
80
+ htmlSAXHandlerPtr sax;
59
81
 
60
- Data_Get_Struct(self, htmlParserCtxt, ctxt);
61
- Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
82
+ if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser))
83
+ rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
62
84
 
63
- /* Free the sax handler since we'll assign our own */
64
- if(ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
65
- xmlFree(ctxt->sax);
85
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
86
+ Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
66
87
 
67
- ctxt->sax = sax;
68
- ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
88
+ /* Free the sax handler since we'll assign our own */
89
+ if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
90
+ xmlFree(ctxt->sax);
69
91
 
70
- htmlParseDocument(ctxt);
92
+ ctxt->sax = sax;
93
+ ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
71
94
 
72
- if(NULL != ctxt->myDoc) xmlFreeDoc(ctxt->myDoc);
95
+ rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
73
96
 
74
- NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
75
- return self;
97
+ return self;
76
98
  }
77
99
 
78
100
  void init_html_sax_parser_context()
@@ -0,0 +1,87 @@
1
+ #include <html_sax_push_parser.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * native_write(chunk, last_chunk)
6
+ *
7
+ * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
8
+ */
9
+ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
10
+ {
11
+ xmlParserCtxtPtr ctx;
12
+ const char * chunk = NULL;
13
+ int size = 0;
14
+
15
+
16
+ Data_Get_Struct(self, xmlParserCtxt, ctx);
17
+
18
+ if(Qnil != _chunk) {
19
+ chunk = StringValuePtr(_chunk);
20
+ size = (int)RSTRING_LEN(_chunk);
21
+ }
22
+
23
+ if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
24
+ if (!(ctx->options & XML_PARSE_RECOVER)) {
25
+ xmlErrorPtr e = xmlCtxtGetLastError(ctx);
26
+ Nokogiri_error_raise(NULL, e);
27
+ }
28
+ }
29
+
30
+ return self;
31
+ }
32
+
33
+ /*
34
+ * call-seq:
35
+ * initialize_native(xml_sax, filename)
36
+ *
37
+ * Initialize the push parser with +xml_sax+ using +filename+
38
+ */
39
+ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
40
+ VALUE encoding)
41
+ {
42
+ htmlSAXHandlerPtr sax;
43
+ const char * filename = NULL;
44
+ htmlParserCtxtPtr ctx;
45
+ xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
46
+
47
+ Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
48
+
49
+ if(_filename != Qnil) filename = StringValuePtr(_filename);
50
+
51
+ if (!NIL_P(encoding)) {
52
+ enc = xmlParseCharEncoding(StringValuePtr(encoding));
53
+ if (enc == XML_CHAR_ENCODING_ERROR)
54
+ rb_raise(rb_eArgError, "Unsupported Encoding");
55
+ }
56
+
57
+ ctx = htmlCreatePushParserCtxt(
58
+ sax,
59
+ NULL,
60
+ NULL,
61
+ 0,
62
+ filename,
63
+ enc
64
+ );
65
+ if(ctx == NULL)
66
+ rb_raise(rb_eRuntimeError, "Could not create a parser context");
67
+
68
+ ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
69
+
70
+ ctx->sax2 = 1;
71
+ DATA_PTR(self) = ctx;
72
+ return self;
73
+ }
74
+
75
+ VALUE cNokogiriHtmlSaxPushParser;
76
+ void init_html_sax_push_parser()
77
+ {
78
+ VALUE nokogiri = rb_define_module("Nokogiri");
79
+ VALUE html = rb_define_module_under(nokogiri, "HTML");
80
+ VALUE sax = rb_define_module_under(html, "SAX");
81
+ VALUE klass = rb_define_class_under(sax, "PushParser", cNokogiriXmlSaxPushParser);
82
+
83
+ cNokogiriHtmlSaxPushParser = klass;
84
+
85
+ rb_define_private_method(klass, "initialize_native", initialize_native, 3);
86
+ rb_define_private_method(klass, "native_write", native_write, 2);
87
+ }