libxml-ruby 0.9.7 → 0.9.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (97) hide show
  1. data/CHANGES +53 -0
  2. data/Rakefile +1 -0
  3. data/ext/libxml/build.log +4 -0
  4. data/ext/libxml/cbg.c +86 -86
  5. data/ext/libxml/libxml.c +878 -876
  6. data/ext/libxml/ruby_libxml.h +8 -4
  7. data/ext/libxml/ruby_xml_attr.c +36 -168
  8. data/ext/libxml/ruby_xml_attr.h +2 -4
  9. data/ext/libxml/ruby_xml_attr_decl.c +177 -0
  10. data/ext/libxml/ruby_xml_attr_decl.h +13 -0
  11. data/ext/libxml/ruby_xml_attributes.c +29 -20
  12. data/ext/libxml/ruby_xml_document.c +895 -898
  13. data/ext/libxml/ruby_xml_dtd.c +18 -1
  14. data/ext/libxml/ruby_xml_dtd.h +1 -0
  15. data/ext/libxml/ruby_xml_encoding.c +116 -0
  16. data/ext/libxml/ruby_xml_encoding.h +12 -0
  17. data/ext/libxml/ruby_xml_error.c +8 -2
  18. data/ext/libxml/ruby_xml_html_parser.c +53 -74
  19. data/ext/libxml/ruby_xml_html_parser.h +2 -3
  20. data/ext/libxml/ruby_xml_html_parser_context.c +145 -0
  21. data/ext/libxml/ruby_xml_html_parser_context.h +12 -0
  22. data/ext/libxml/ruby_xml_html_parser_options.c +48 -0
  23. data/ext/libxml/ruby_xml_html_parser_options.h +12 -0
  24. data/ext/libxml/ruby_xml_input_cbg.c +1 -1
  25. data/ext/libxml/ruby_xml_io.c +30 -0
  26. data/ext/libxml/ruby_xml_io.h +9 -0
  27. data/ext/libxml/ruby_xml_namespace.c +34 -16
  28. data/ext/libxml/ruby_xml_namespace.h +2 -2
  29. data/ext/libxml/ruby_xml_namespaces.c +6 -6
  30. data/ext/libxml/ruby_xml_node.c +1367 -1324
  31. data/ext/libxml/ruby_xml_node.h +2 -2
  32. data/ext/libxml/ruby_xml_parser.c +26 -78
  33. data/ext/libxml/ruby_xml_parser.h +1 -1
  34. data/ext/libxml/ruby_xml_parser_context.c +284 -13
  35. data/ext/libxml/ruby_xml_parser_context.h +1 -2
  36. data/ext/libxml/ruby_xml_parser_options.c +75 -0
  37. data/ext/libxml/ruby_xml_parser_options.h +14 -0
  38. data/ext/libxml/ruby_xml_reader.c +277 -183
  39. data/ext/libxml/ruby_xml_sax_parser.c +60 -57
  40. data/ext/libxml/ruby_xml_xpath_context.c +43 -8
  41. data/ext/libxml/ruby_xml_xpath_expression.c +6 -0
  42. data/ext/libxml/ruby_xml_xpath_object.c +107 -95
  43. data/ext/libxml/ruby_xml_xpath_object.h +9 -1
  44. data/ext/libxml/ruby_xml_xpointer.c +107 -107
  45. data/ext/libxml/version.h +2 -2
  46. data/ext/vc/libxml_ruby.vcproj +43 -3
  47. data/lib/libxml.rb +2 -3
  48. data/lib/libxml/attr.rb +71 -2
  49. data/lib/libxml/attr_decl.rb +81 -0
  50. data/lib/libxml/document.rb +78 -14
  51. data/lib/libxml/html_parser.rb +75 -42
  52. data/lib/libxml/node.rb +11 -0
  53. data/lib/libxml/parser.rb +106 -62
  54. data/lib/libxml/reader.rb +12 -0
  55. data/lib/libxml/sax_parser.rb +42 -52
  56. data/lib/libxml/xpath_object.rb +15 -0
  57. data/test/model/atom.xml +12 -12
  58. data/test/model/bands.xml +4 -4
  59. data/test/model/books.xml +146 -147
  60. data/test/model/merge_bug_data.xml +1 -1
  61. data/test/model/rubynet.xml +1 -0
  62. data/test/model/shiporder.rng +1 -1
  63. data/test/model/shiporder.xml +22 -22
  64. data/test/model/shiporder.xsd +30 -30
  65. data/test/model/xinclude.xml +1 -1
  66. data/test/{tc_node_attr.rb → tc_attr.rb} +1 -1
  67. data/test/tc_attr_decl.rb +131 -0
  68. data/test/tc_deprecated_require.rb +1 -3
  69. data/test/tc_document.rb +13 -3
  70. data/test/tc_document_write.rb +5 -5
  71. data/test/tc_dtd.rb +13 -5
  72. data/test/tc_html_parser.rb +14 -26
  73. data/test/tc_node_cdata.rb +1 -3
  74. data/test/tc_node_comment.rb +2 -4
  75. data/test/tc_node_edit.rb +2 -3
  76. data/test/tc_node_text.rb +35 -1
  77. data/test/tc_node_write.rb +3 -3
  78. data/test/tc_node_xlink.rb +2 -4
  79. data/test/tc_parser.rb +163 -70
  80. data/test/tc_parser_context.rb +103 -42
  81. data/test/tc_reader.rb +173 -45
  82. data/test/tc_relaxng.rb +2 -2
  83. data/test/tc_sax_parser.rb +48 -52
  84. data/test/tc_schema.rb +2 -2
  85. data/test/tc_xpath.rb +37 -6
  86. data/test/tc_xpath_context.rb +7 -1
  87. data/test/tc_xpath_expression.rb +1 -3
  88. data/test/tc_xpointer.rb +1 -3
  89. data/test/test_suite.rb +2 -3
  90. metadata +20 -13
  91. data/ext/libxml/ruby_xml_input.c +0 -329
  92. data/ext/libxml/ruby_xml_input.h +0 -20
  93. data/lib/libxml/parser_context.rb +0 -17
  94. data/lib/libxml/parser_options.rb +0 -25
  95. data/test/model/simple.xml +0 -7
  96. data/test/tc_input.rb +0 -13
  97. data/test/tc_well_formed.rb +0 -11
@@ -40,6 +40,23 @@ static VALUE rxml_dtd_alloc(VALUE klass)
40
40
  return Data_Wrap_Struct(klass, NULL, rxml_dtd_free, NULL);
41
41
  }
42
42
 
43
+ VALUE rxml_dtd_wrap(xmlDtdPtr xdtd)
44
+ {
45
+ VALUE result;
46
+
47
+ // This node is already wrapped
48
+ if (xdtd->_private != NULL)
49
+ return (VALUE) xdtd->_private;
50
+
51
+ result = Data_Wrap_Struct(cXMLDtd, NULL, NULL, xdtd);
52
+
53
+ xdtd->_private = (void*) result;
54
+
55
+ return result;
56
+ }
57
+
58
+
59
+
43
60
  /*
44
61
  * call-seq:
45
62
  * XML::Dtd.new("public system") -> dtd
@@ -72,7 +89,7 @@ static VALUE rxml_dtd_initialize(int argc, VALUE *argv, VALUE self)
72
89
  if (xdtd == NULL)
73
90
  rxml_raise(&xmlLastError);
74
91
 
75
- DATA_PTR( self) = xdtd;
92
+ DATA_PTR(self) = xdtd;
76
93
 
77
94
  xmlSetTreeDoc((xmlNodePtr) xdtd, NULL);
78
95
  break;
@@ -4,5 +4,6 @@
4
4
  extern VALUE cXMLDtd;
5
5
 
6
6
  void ruby_init_xml_dtd(void);
7
+ VALUE rxml_dtd_wrap(xmlDtdPtr xdtd);
7
8
 
8
9
  #endif
@@ -0,0 +1,116 @@
1
+ /* $Id: rxml_input.c 528 2008-11-15 23:43:48Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #include <stdarg.h>
6
+ #include "ruby_libxml.h"
7
+
8
+ /*
9
+ * Document-class: LibXML::XML::Encoding
10
+ *
11
+ * The encoding class defines the encodings that libxml
12
+ * supports. LibXML converts all data sources to UTF8
13
+ * internally before processing them.
14
+ *
15
+ * By default, LibXML determines a data source's encoding
16
+ * using the algorithm described on its
17
+ * website[* http://xmlsoft.org/encoding.html].
18
+ *
19
+ * However, you may override a data source's encoding
20
+ * by using the encoding constants defined in this
21
+ * module.
22
+ *
23
+ * Example 1:
24
+ *
25
+ * io = File.open('some_file', 'rb')
26
+ * parser = XML::Parser.io(io, :encoding => XML::Encoding::ISO_8859_1)
27
+ * doc = parser.parse
28
+ *
29
+ * Example 2:
30
+ *
31
+ * parser = XML::HTMLParser.file("some_file", :encoding => XML::Encoding::ISO_8859_1)
32
+ * doc = parser.parse
33
+ *
34
+ * Example 3:
35
+ *
36
+ * document = XML::Document.new
37
+ * document.encoding = XML::Encoding::ISO_8859_1
38
+ * doc << XML::Node.new
39
+ */
40
+
41
+ VALUE mXMLEncoding;
42
+
43
+
44
+ /*
45
+ * call-seq:
46
+ * Input.s_to_encoding("UTF_8") -> XML::Encoding::UTF_8
47
+ *
48
+ * Converts an encoding string to an encoding constant
49
+ * defined on the XML::Encoding class.
50
+ */
51
+ static VALUE rxml_encoding_from_s(VALUE klass, VALUE encoding)
52
+ {
53
+ xmlCharEncoding xencoding;
54
+
55
+ if (encoding == Qnil)
56
+ return Qnil;
57
+
58
+ xencoding = xmlParseCharEncoding(StringValuePtr(encoding));
59
+ return NUM2INT(xencoding);
60
+ }
61
+
62
+ /*
63
+ * call-seq:
64
+ * Input.encoding_to_s(Input::ENCODING) -> "encoding"
65
+ *
66
+ * Converts an encoding contstant defined on the XML::Encoding
67
+ * class to its text representation.
68
+ */
69
+ static VALUE rxml_encoding_to_s(VALUE klass, VALUE encoding)
70
+ {
71
+ const char* xecoding = xmlGetCharEncodingName(NUM2INT(encoding));
72
+
73
+ if (!xecoding)
74
+ return Qnil;
75
+ else
76
+ return rb_str_new2(xecoding);
77
+ }
78
+
79
+ // Rdoc needs to know
80
+ #ifdef RDOC_NEVER_DEFINED
81
+ mLibXML = rb_define_module("LibXML");
82
+ mXML = rb_define_module_under(mLibXML, "XML");
83
+ #endif
84
+
85
+ void ruby_init_xml_encoding(void)
86
+ {
87
+ mXMLEncoding = rb_define_module_under(mXML, "Encoding");
88
+ rb_define_module_function(mXMLEncoding, "from_s", rxml_encoding_from_s, 1);
89
+ rb_define_module_function(mXMLEncoding, "to_s", rxml_encoding_to_s, 1);
90
+
91
+ rb_define_const(mXMLEncoding, "UNDEFINED", INT2NUM(XPATH_UNDEFINED));
92
+ rb_define_const(mXMLEncoding, "ERROR", INT2NUM(XML_CHAR_ENCODING_ERROR)); /* No char encoding detected */
93
+ rb_define_const(mXMLEncoding, "NONE", INT2NUM(XML_CHAR_ENCODING_NONE)); /* No char encoding detected */
94
+ rb_define_const(mXMLEncoding, "UTF_8", INT2NUM(XML_CHAR_ENCODING_UTF8)); /* UTF-8 */
95
+ rb_define_const(mXMLEncoding, "UTF_16LE", INT2NUM(XML_CHAR_ENCODING_UTF16LE)); /* UTF-16 little endian */
96
+ rb_define_const(mXMLEncoding, "UTF_16BE", INT2NUM(XML_CHAR_ENCODING_UTF16BE)); /* UTF-16 big endian */
97
+ rb_define_const(mXMLEncoding, "UCS_4LE", INT2NUM(XML_CHAR_ENCODING_UCS4LE)); /* UCS-4 little endian */
98
+ rb_define_const(mXMLEncoding, "UCS_4BE", INT2NUM(XML_CHAR_ENCODING_UCS4BE)); /* UCS-4 big endian */
99
+ rb_define_const(mXMLEncoding, "EBCDIC", INT2NUM(XML_CHAR_ENCODING_EBCDIC)); /* EBCDIC uh! */
100
+ rb_define_const(mXMLEncoding, "UCS_4_2143", INT2NUM(XML_CHAR_ENCODING_UCS4_2143)); /* UCS-4 unusual ordering */
101
+ rb_define_const(mXMLEncoding, "UCS_4_3412", INT2NUM(XML_CHAR_ENCODING_UCS4_3412)); /* UCS-4 unusual ordering */
102
+ rb_define_const(mXMLEncoding, "UCS_2", INT2NUM(XML_CHAR_ENCODING_UCS2)); /* UCS-2 */
103
+ rb_define_const(mXMLEncoding, "ISO_8859_1", INT2NUM(XML_CHAR_ENCODING_8859_1)); /* ISO-8859-1 ISO Latin 1 */
104
+ rb_define_const(mXMLEncoding, "ISO_8859_2", INT2NUM(XML_CHAR_ENCODING_8859_2)); /* ISO-8859-2 ISO Latin 2 */
105
+ rb_define_const(mXMLEncoding, "ISO_8859_3", INT2NUM(XML_CHAR_ENCODING_8859_3)); /* ISO-8859-3 */
106
+ rb_define_const(mXMLEncoding, "ISO_8859_4", INT2NUM(XML_CHAR_ENCODING_8859_4)); /* ISO-8859-4 */
107
+ rb_define_const(mXMLEncoding, "ISO_8859_5", INT2NUM(XML_CHAR_ENCODING_8859_5)); /* ISO-8859-5 */
108
+ rb_define_const(mXMLEncoding, "ISO_8859_6", INT2NUM(XML_CHAR_ENCODING_8859_6)); /* ISO-8859-6 */
109
+ rb_define_const(mXMLEncoding, "ISO_8859_7", INT2NUM(XML_CHAR_ENCODING_8859_7)); /* ISO-8859-7 */
110
+ rb_define_const(mXMLEncoding, "ISO_8859_8", INT2NUM(XML_CHAR_ENCODING_8859_8)); /* ISO-8859-8 */
111
+ rb_define_const(mXMLEncoding, "ISO_8859_9", INT2NUM(XML_CHAR_ENCODING_8859_9)); /* ISO-8859-9 */
112
+ rb_define_const(mXMLEncoding, "ISO_2022_JP", INT2NUM(XML_CHAR_ENCODING_2022_JP)); /* ISO-2022-JP */
113
+ rb_define_const(mXMLEncoding, "SHIFT_JIS", INT2NUM(XML_CHAR_ENCODING_SHIFT_JIS)); /* Shift_JIS */
114
+ rb_define_const(mXMLEncoding, "EUC_JP", INT2NUM(XML_CHAR_ENCODING_EUC_JP)); /* EUC-JP */
115
+ rb_define_const(mXMLEncoding, "ASCII", INT2NUM(XML_CHAR_ENCODING_ASCII)); /* pure ASCII */
116
+ }
@@ -0,0 +1,12 @@
1
+ /* $Id: rxml_parser.h 39 2006-02-21 20:40:16Z roscopeco $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #ifndef __RXML_ENCODING__
6
+ #define __RXML_ENCODING__
7
+
8
+ extern VALUE mXMLEncoding;
9
+
10
+ void ruby_init_xml_encoding();
11
+
12
+ #endif
@@ -108,10 +108,16 @@ VALUE rxml_error_wrap(xmlErrorPtr xerror)
108
108
  rb_iv_set(result, "@int1", INT2NUM(xerror->int1));
109
109
  rb_iv_set(result, "@int2", INT2NUM(xerror->int2));
110
110
 
111
- //rb_define_attr(eXMLError, "ctxt", 1, 0);
112
111
  if (xerror->node)
113
112
  {
114
- VALUE node = rxml_node_wrap(cXMLNode, xerror->node);
113
+ /* Returning the original node is too dangerous because its
114
+ parent document is never returned to Ruby. So return a
115
+ copy of the node, which does not belong to any document,
116
+ and can free itself when Ruby calls its free method. Note
117
+ we just copy the node, and don't bother with the overhead
118
+ of a recursive query. */
119
+ xmlNodePtr xNode = xmlCopyNode((const xmlNodePtr)xerror->node, 2);
120
+ VALUE node = rxml_node_wrap(xNode);
115
121
  rb_iv_set(result, "@node", node);
116
122
  }
117
123
  return result;
@@ -1,78 +1,60 @@
1
- /* $Id: ruby_xml_html_parser.c 665 2008-12-06 07:52:49Z cfis $ */
1
+ /* $Id: ruby_xml_html_parser.c 737 2009-01-23 01:23:08Z walltndr $ */
2
2
 
3
3
  /* Please see the LICENSE file for copyright and distribution information */
4
4
 
5
5
  #include "ruby_libxml.h"
6
6
 
7
- VALUE cXMLHTMLParser;
8
- static ID INPUT_ATTR;
9
-
10
- /*
11
- * Document-class: LibXML::XML::HTMLParser
7
+ /* Document-class: LibXML::XML::HTMLParser
12
8
  *
13
9
  * The HTML parser implements an HTML 4.0 non-verifying parser with an API
14
10
  * compatible with the XML::Parser. In contrast with the XML::Parser,
15
11
  * it can parse "real world" HTML, even if it severely broken from a
16
- * specification point of view. */
12
+ * specification point of view.
13
+ *
14
+ * The HTML parser creates an in-memory document object
15
+ * that consist of any number of XML::Node instances. This is simple
16
+ * and powerful model, but has the major limitation that the size of
17
+ * the document that can be processed is limited by the amount of
18
+ * memory available.
19
+ *
20
+ * Using the html parser is simple:
21
+ *
22
+ * parser = XML::HTMLParser.file('my_file')
23
+ * doc = parser.parse
24
+ *
25
+ * You can also parse documents (see XML::HTMLParser.document),
26
+ * strings (see XML::HTMLParser.string) and io objects (see
27
+ * XML::HTMLParser.io).
28
+ */
17
29
 
18
- /*
19
- * call-seq:
30
+ VALUE cXMLHtmlParser;
31
+ static ID CONTEXT_ATTR;
32
+
33
+
34
+ /* call-seq:
20
35
  * XML::HTMLParser.initialize -> parser
21
36
  *
22
37
  * Initializes a new parser instance with no pre-determined source.
23
38
  */
24
- static VALUE rxml_html_parser_initialize(VALUE self)
39
+ static VALUE rxml_html_parser_initialize(int argc, VALUE *argv, VALUE self)
25
40
  {
26
- VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
27
- rb_iv_set(self, "@input", input);
28
- return self;
29
- }
41
+ VALUE context = Qnil;
30
42
 
31
- static htmlDocPtr rxml_html_parser_read_file(VALUE input)
32
- {
33
- VALUE file = rb_ivar_get(input, FILE_ATTR);
34
- VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
35
- VALUE encoding_str = rxml_input_encoding_to_s(cXMLInput, encoding);
36
- char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
37
- encoding_str));
38
- int options = 0;
39
-
40
- return htmlReadFile(StringValuePtr(file), xencoding_str, options);
41
- }
43
+ rb_scan_args(argc, argv, "01", &context);
42
44
 
43
- static htmlDocPtr rxml_html_parser_read_string(VALUE input)
44
- {
45
- VALUE string = rb_ivar_get(input, STRING_ATTR);
46
- VALUE base_url = rb_ivar_get(input, BASE_URL_ATTR);
47
- char *xbase_url = (base_url == Qnil ? NULL : StringValuePtr(base_url));
48
- VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
49
- VALUE encoding_str = rxml_input_encoding_to_s(cXMLInput, encoding);
50
- char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
51
- encoding_str));
52
- int options = 0;
53
-
54
- return htmlReadMemory(StringValuePtr(string), RSTRING_LEN(string),
55
- xbase_url, xencoding_str, options);
56
- }
45
+ if (context == Qnil)
46
+ {
47
+ rb_warn("Passing no parameters to XML::HTMLParser.new is deprecated. Pass an instance of XML::Parser::Context instead.");
48
+ context = rb_class_new_instance(0, NULL, cXMLParserContext);
49
+ }
57
50
 
58
- static htmlDocPtr rxml_html_parser_read_io(VALUE input)
59
- {
60
- VALUE io = rb_ivar_get(input, IO_ATTR);
61
- VALUE base_url = rb_ivar_get(input, BASE_URL_ATTR);
62
- char *xbase_url = (base_url == Qnil ? NULL : StringValuePtr(base_url));
63
- VALUE encoding = rb_ivar_get(input, ENCODING_ATTR);
64
- VALUE encoding_str = rxml_input_encoding_to_s(cXMLInput, encoding);
65
- char *xencoding_str = (encoding_str == Qnil ? NULL : StringValuePtr(
66
- encoding_str));
67
- int options = 0;
68
-
69
- return htmlReadIO((xmlInputReadCallback) rxml_read_callback, NULL,
70
- (void *) io, xbase_url, xencoding_str, options);
51
+ rb_ivar_set(self, CONTEXT_ATTR, context);
52
+ return self;
71
53
  }
72
54
 
73
55
  /*
74
56
  * call-seq:
75
- * parser.parse -> document
57
+ * parser.parse -> XML::Document
76
58
  *
77
59
  * Parse the input XML and create an XML::Document with
78
60
  * it's content. If an error occurs, XML::Parser::ParseError
@@ -80,22 +62,19 @@ static htmlDocPtr rxml_html_parser_read_io(VALUE input)
80
62
  */
81
63
  static VALUE rxml_html_parser_parse(VALUE self)
82
64
  {
83
- VALUE input = rb_ivar_get(self, INPUT_ATTR);
84
- htmlDocPtr xdoc;
85
-
86
- if (rb_ivar_get(input, FILE_ATTR) != Qnil)
87
- xdoc = rxml_html_parser_read_file(input);
88
- else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
89
- xdoc = rxml_html_parser_read_string(input);
90
- else if (rb_ivar_get(input, IO_ATTR) != Qnil)
91
- xdoc = rxml_html_parser_read_io(input);
92
- else
93
- rb_raise(rb_eArgError, "You must specify a parser data source");
94
-
95
- if (!xdoc)
96
- rxml_raise(&xmlLastError);
97
-
98
- return rxml_document_wrap(xdoc);
65
+ xmlParserCtxtPtr ctxt;
66
+ VALUE context = rb_ivar_get(self, CONTEXT_ATTR);
67
+
68
+ Data_Get_Struct(context, xmlParserCtxt, ctxt);
69
+
70
+ if (htmlParseDocument(ctxt) == -1 && ! ctxt->recovery)
71
+ {
72
+ if (ctxt->myDoc)
73
+ xmlFreeDoc(ctxt->myDoc);
74
+ rxml_raise(&ctxt->lastError);
75
+ }
76
+
77
+ return rxml_document_wrap(ctxt->myDoc);
99
78
  }
100
79
 
101
80
  // Rdoc needs to know
@@ -106,14 +85,14 @@ mXML = rb_define_module_under(mLibXML, "XML");
106
85
 
107
86
  void ruby_init_html_parser(void)
108
87
  {
109
- INPUT_ATTR = rb_intern("@input");
88
+ CONTEXT_ATTR = rb_intern("@context");
110
89
 
111
- cXMLHTMLParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
90
+ cXMLHtmlParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
112
91
 
113
92
  /* Atributes */
114
- rb_define_attr(cXMLHTMLParser, "input", 1, 0);
93
+ rb_define_attr(cXMLHtmlParser, "input", 1, 0);
115
94
 
116
95
  /* Instance methods */
117
- rb_define_method(cXMLHTMLParser, "initialize", rxml_html_parser_initialize, 0);
118
- rb_define_method(cXMLHTMLParser, "parse", rxml_html_parser_parse, 0);
96
+ rb_define_method(cXMLHtmlParser, "initialize", rxml_html_parser_initialize, -1);
97
+ rb_define_method(cXMLHtmlParser, "parse", rxml_html_parser_parse, 0);
119
98
  }
@@ -1,12 +1,11 @@
1
- /* $Id: ruby_xml_html_parser.h 666 2008-12-07 00:16:50Z cfis $ */
1
+ /* $Id: ruby_xml_html_parser.h 711 2009-01-20 07:17:15Z cfis $ */
2
2
 
3
3
  /* Please see the LICENSE file for copyright and distribution information */
4
4
 
5
5
  #ifndef __RXML_HTML_PARSER__
6
6
  #define __RXML_HTML_PARSER__
7
7
 
8
- extern int rxml_html_parser_count;
9
- extern VALUE cXMLHTMLParser;
8
+ extern VALUE cXMLHtmlParser;
10
9
 
11
10
  void ruby_init_html_parser(void);
12
11
 
@@ -0,0 +1,145 @@
1
+ /* $Id: ruby_xml_parser_context.c 710 2009-01-20 05:30:51Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #include "ruby_libxml.h"
6
+ #include "ruby_xml_html_parser_context.h"
7
+
8
+ /*
9
+ * Document-class: LibXML::XML::HTMLParser::Context
10
+ *
11
+ * The XML::HTMLParser::Context class provides in-depth control over how
12
+ * a document is parsed.
13
+ */
14
+
15
+ VALUE cXMLHtmlParserContext;
16
+
17
+ static void rxml_html_parser_context_free(xmlParserCtxtPtr ctxt)
18
+ {
19
+ xmlFreeParserCtxt(ctxt);
20
+ }
21
+
22
+ static VALUE rxml_html_parser_context_wrap(xmlParserCtxtPtr ctxt)
23
+ {
24
+ return Data_Wrap_Struct(cXMLHtmlParserContext, NULL, rxml_html_parser_context_free, ctxt);
25
+ }
26
+
27
+ /* call-seq:
28
+ * XML::HTMLParser::Context.file(file) -> XML::HTMLParser::Context
29
+ *
30
+ * Creates a new parser context based on the specified file or uri.
31
+ *
32
+ * Parameters:
33
+ *
34
+ * file - A filename or uri.
35
+ */
36
+ static VALUE rxml_html_parser_context_file(VALUE klass, VALUE file)
37
+ {
38
+ xmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(StringValuePtr(file), NULL);
39
+ return rxml_html_parser_context_wrap(ctxt);
40
+ }
41
+
42
+ /* call-seq:
43
+ * XML::HTMLParser::Context.io(io) -> XML::HTMLParser::Context
44
+ *
45
+ * Creates a new parser context based on the specified io object.
46
+ *
47
+ * Parameters:
48
+ *
49
+ * io - A ruby IO object.
50
+ */
51
+ static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io)
52
+ {
53
+ htmlParserCtxtPtr ctxt;
54
+ xmlParserInputBufferPtr input;
55
+ xmlParserInputPtr stream;
56
+
57
+ input = xmlParserInputBufferCreateIO((xmlInputReadCallback) rxml_read_callback, NULL,
58
+ (void*)io, XML_CHAR_ENCODING_NONE);
59
+
60
+ ctxt = htmlNewParserCtxt();
61
+ if (!ctxt)
62
+ {
63
+ xmlFreeParserInputBuffer(input);
64
+ rxml_raise(&xmlLastError);
65
+ }
66
+
67
+ stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
68
+
69
+ if (!stream)
70
+ {
71
+ xmlFreeParserInputBuffer(input);
72
+ xmlFreeParserCtxt(ctxt);
73
+ rxml_raise(&xmlLastError);
74
+ }
75
+ inputPush(ctxt, stream);
76
+
77
+ return rxml_html_parser_context_wrap(ctxt);
78
+ }
79
+
80
+ /* call-seq:
81
+ * XML::HTMLParser::Context.string(string) -> XML::HTMLParser::Context
82
+ *
83
+ * Creates a new parser context based on the specified string.
84
+ *
85
+ * Parameters:
86
+ *
87
+ * string - A string that contains the data to parse.
88
+ */
89
+ static VALUE rxml_html_parser_context_string(VALUE klass, VALUE string)
90
+ {
91
+ xmlParserCtxtPtr ctxt;
92
+ Check_Type(string, T_STRING);
93
+
94
+ if (RSTRING_LEN(string) == 0)
95
+ rb_raise(rb_eArgError, "Must specify a string with one or more characters");
96
+
97
+ ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string),
98
+ RSTRING_LEN(string));
99
+ if (!ctxt)
100
+ rxml_raise(&xmlLastError);
101
+
102
+ htmlDefaultSAXHandlerInit();
103
+ if (ctxt->sax != NULL)
104
+ memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
105
+
106
+ return rxml_html_parser_context_wrap(ctxt);
107
+ }
108
+
109
+ /*
110
+ * call-seq:
111
+ * context.options = XML::Parser::Options::NOENT |
112
+ XML::Parser::Options::NOCDATA
113
+ *
114
+ * Provides control over the execution of a parser. Valid values
115
+ * are the constants defined on XML::Parser::Options. Multiple
116
+ * options can be combined by using Bitwise OR (|).
117
+ */
118
+ static VALUE rxml_html_parser_context_options_set(VALUE self, VALUE options)
119
+ {
120
+ int result;
121
+ xmlParserCtxtPtr ctxt;
122
+ Check_Type(options, T_FIXNUM);
123
+
124
+ Data_Get_Struct(self, xmlParserCtxt, ctxt);
125
+ result = htmlCtxtUseOptions(ctxt, NUM2INT(options));
126
+
127
+ return self;
128
+ }
129
+
130
+ // Rdoc needs to know
131
+ #ifdef RDOC_NEVER_DEFINED
132
+ mLibXML = rb_define_module("LibXML");
133
+ mXML = rb_define_module_under(mLibXML, "XML");
134
+ cXMLHtmlParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
135
+ #endif
136
+
137
+ void ruby_init_html_parser_context(void)
138
+ {
139
+ cXMLHtmlParserContext = rb_define_class_under(cXMLHtmlParser, "Context", cXMLParserContext);
140
+
141
+ rb_define_singleton_method(cXMLHtmlParserContext, "file", rxml_html_parser_context_file, 1);
142
+ rb_define_singleton_method(cXMLHtmlParserContext, "io", rxml_html_parser_context_io, 1);
143
+ rb_define_singleton_method(cXMLHtmlParserContext, "string", rxml_html_parser_context_string, 1);
144
+ rb_define_method(cXMLHtmlParserContext, "options=", rxml_html_parser_context_options_set, 1);
145
+ }