libxml-ruby 2.0.0-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (163) hide show
  1. data/HISTORY +516 -0
  2. data/LICENSE +23 -0
  3. data/MANIFEST +165 -0
  4. data/README.rdoc +161 -0
  5. data/Rakefile +82 -0
  6. data/ext/libxml/extconf.rb +122 -0
  7. data/ext/libxml/libxml.c +93 -0
  8. data/ext/libxml/ruby_libxml.h +101 -0
  9. data/ext/libxml/ruby_xml.c +893 -0
  10. data/ext/libxml/ruby_xml.h +10 -0
  11. data/ext/libxml/ruby_xml_attr.c +352 -0
  12. data/ext/libxml/ruby_xml_attr.h +14 -0
  13. data/ext/libxml/ruby_xml_attr_decl.c +171 -0
  14. data/ext/libxml/ruby_xml_attr_decl.h +13 -0
  15. data/ext/libxml/ruby_xml_attributes.c +277 -0
  16. data/ext/libxml/ruby_xml_attributes.h +17 -0
  17. data/ext/libxml/ruby_xml_cbg.c +85 -0
  18. data/ext/libxml/ruby_xml_document.c +958 -0
  19. data/ext/libxml/ruby_xml_document.h +17 -0
  20. data/ext/libxml/ruby_xml_dtd.c +257 -0
  21. data/ext/libxml/ruby_xml_dtd.h +9 -0
  22. data/ext/libxml/ruby_xml_encoding.c +221 -0
  23. data/ext/libxml/ruby_xml_encoding.h +16 -0
  24. data/ext/libxml/ruby_xml_error.c +1004 -0
  25. data/ext/libxml/ruby_xml_error.h +14 -0
  26. data/ext/libxml/ruby_xml_html_parser.c +92 -0
  27. data/ext/libxml/ruby_xml_html_parser.h +12 -0
  28. data/ext/libxml/ruby_xml_html_parser_context.c +308 -0
  29. data/ext/libxml/ruby_xml_html_parser_context.h +12 -0
  30. data/ext/libxml/ruby_xml_html_parser_options.c +40 -0
  31. data/ext/libxml/ruby_xml_html_parser_options.h +12 -0
  32. data/ext/libxml/ruby_xml_input_cbg.c +191 -0
  33. data/ext/libxml/ruby_xml_input_cbg.h +20 -0
  34. data/ext/libxml/ruby_xml_io.c +30 -0
  35. data/ext/libxml/ruby_xml_io.h +9 -0
  36. data/ext/libxml/ruby_xml_namespace.c +170 -0
  37. data/ext/libxml/ruby_xml_namespace.h +12 -0
  38. data/ext/libxml/ruby_xml_namespaces.c +295 -0
  39. data/ext/libxml/ruby_xml_namespaces.h +11 -0
  40. data/ext/libxml/ruby_xml_node.c +1386 -0
  41. data/ext/libxml/ruby_xml_node.h +13 -0
  42. data/ext/libxml/ruby_xml_parser.c +94 -0
  43. data/ext/libxml/ruby_xml_parser.h +14 -0
  44. data/ext/libxml/ruby_xml_parser_context.c +982 -0
  45. data/ext/libxml/ruby_xml_parser_context.h +12 -0
  46. data/ext/libxml/ruby_xml_parser_options.c +68 -0
  47. data/ext/libxml/ruby_xml_parser_options.h +14 -0
  48. data/ext/libxml/ruby_xml_reader.c +1057 -0
  49. data/ext/libxml/ruby_xml_reader.h +14 -0
  50. data/ext/libxml/ruby_xml_relaxng.c +111 -0
  51. data/ext/libxml/ruby_xml_relaxng.h +10 -0
  52. data/ext/libxml/ruby_xml_sax2_handler.c +334 -0
  53. data/ext/libxml/ruby_xml_sax2_handler.h +12 -0
  54. data/ext/libxml/ruby_xml_sax_parser.c +136 -0
  55. data/ext/libxml/ruby_xml_sax_parser.h +12 -0
  56. data/ext/libxml/ruby_xml_schema.c +159 -0
  57. data/ext/libxml/ruby_xml_schema.h +11 -0
  58. data/ext/libxml/ruby_xml_version.h +9 -0
  59. data/ext/libxml/ruby_xml_xinclude.c +18 -0
  60. data/ext/libxml/ruby_xml_xinclude.h +13 -0
  61. data/ext/libxml/ruby_xml_xpath.c +107 -0
  62. data/ext/libxml/ruby_xml_xpath.h +12 -0
  63. data/ext/libxml/ruby_xml_xpath_context.c +390 -0
  64. data/ext/libxml/ruby_xml_xpath_context.h +11 -0
  65. data/ext/libxml/ruby_xml_xpath_expression.c +83 -0
  66. data/ext/libxml/ruby_xml_xpath_expression.h +12 -0
  67. data/ext/libxml/ruby_xml_xpath_object.c +336 -0
  68. data/ext/libxml/ruby_xml_xpath_object.h +19 -0
  69. data/ext/libxml/ruby_xml_xpointer.c +101 -0
  70. data/ext/libxml/ruby_xml_xpointer.h +13 -0
  71. data/ext/mingw/Rakefile +34 -0
  72. data/ext/mingw/build.rake +41 -0
  73. data/ext/vc/libxml_ruby.sln +26 -0
  74. data/lib/1.8/libxml_ruby.so +0 -0
  75. data/lib/1.9/libxml_ruby.so +0 -0
  76. data/lib/libxml.rb +30 -0
  77. data/lib/libxml/attr.rb +113 -0
  78. data/lib/libxml/attr_decl.rb +80 -0
  79. data/lib/libxml/attributes.rb +14 -0
  80. data/lib/libxml/document.rb +192 -0
  81. data/lib/libxml/error.rb +90 -0
  82. data/lib/libxml/hpricot.rb +78 -0
  83. data/lib/libxml/html_parser.rb +96 -0
  84. data/lib/libxml/namespace.rb +62 -0
  85. data/lib/libxml/namespaces.rb +38 -0
  86. data/lib/libxml/node.rb +399 -0
  87. data/lib/libxml/ns.rb +22 -0
  88. data/lib/libxml/parser.rb +367 -0
  89. data/lib/libxml/properties.rb +23 -0
  90. data/lib/libxml/reader.rb +29 -0
  91. data/lib/libxml/sax_callbacks.rb +180 -0
  92. data/lib/libxml/sax_parser.rb +58 -0
  93. data/lib/libxml/tree.rb +29 -0
  94. data/lib/libxml/xpath_object.rb +16 -0
  95. data/lib/xml.rb +16 -0
  96. data/lib/xml/libxml.rb +10 -0
  97. data/libxml-ruby.gemspec +50 -0
  98. data/script/benchmark/depixelate +634 -0
  99. data/script/benchmark/hamlet.xml +9055 -0
  100. data/script/benchmark/parsecount +170 -0
  101. data/script/benchmark/sock_entries.xml +507 -0
  102. data/script/benchmark/throughput +41 -0
  103. data/script/test +6 -0
  104. data/setup.rb +1585 -0
  105. data/test/etc_doc_to_s.rb +21 -0
  106. data/test/ets_doc_file.rb +17 -0
  107. data/test/ets_doc_to_s.rb +23 -0
  108. data/test/ets_gpx.rb +28 -0
  109. data/test/ets_node_gc.rb +23 -0
  110. data/test/ets_test.xml +2 -0
  111. data/test/ets_tsr.rb +11 -0
  112. data/test/model/atom.xml +13 -0
  113. data/test/model/bands.iso-8859-1.xml +5 -0
  114. data/test/model/bands.utf-8.xml +5 -0
  115. data/test/model/bands.xml +5 -0
  116. data/test/model/books.xml +146 -0
  117. data/test/model/merge_bug_data.xml +58 -0
  118. data/test/model/ruby-lang.html +238 -0
  119. data/test/model/rubynet.xml +79 -0
  120. data/test/model/rubynet_project +1 -0
  121. data/test/model/shiporder.rnc +28 -0
  122. data/test/model/shiporder.rng +86 -0
  123. data/test/model/shiporder.xml +23 -0
  124. data/test/model/shiporder.xsd +31 -0
  125. data/test/model/soap.xml +27 -0
  126. data/test/model/xinclude.xml +5 -0
  127. data/test/rb-magic-comment.rb +33 -0
  128. data/test/tc_attr.rb +181 -0
  129. data/test/tc_attr_decl.rb +133 -0
  130. data/test/tc_attributes.rb +135 -0
  131. data/test/tc_deprecated_require.rb +13 -0
  132. data/test/tc_document.rb +119 -0
  133. data/test/tc_document_write.rb +187 -0
  134. data/test/tc_dtd.rb +125 -0
  135. data/test/tc_error.rb +138 -0
  136. data/test/tc_html_parser.rb +140 -0
  137. data/test/tc_namespace.rb +62 -0
  138. data/test/tc_namespaces.rb +177 -0
  139. data/test/tc_node.rb +258 -0
  140. data/test/tc_node_cdata.rb +51 -0
  141. data/test/tc_node_comment.rb +33 -0
  142. data/test/tc_node_copy.rb +42 -0
  143. data/test/tc_node_edit.rb +160 -0
  144. data/test/tc_node_text.rb +71 -0
  145. data/test/tc_node_write.rb +108 -0
  146. data/test/tc_node_xlink.rb +29 -0
  147. data/test/tc_parser.rb +336 -0
  148. data/test/tc_parser_context.rb +189 -0
  149. data/test/tc_properties.rb +39 -0
  150. data/test/tc_reader.rb +298 -0
  151. data/test/tc_relaxng.rb +54 -0
  152. data/test/tc_sax_parser.rb +276 -0
  153. data/test/tc_schema.rb +53 -0
  154. data/test/tc_traversal.rb +222 -0
  155. data/test/tc_xinclude.rb +21 -0
  156. data/test/tc_xml.rb +226 -0
  157. data/test/tc_xpath.rb +195 -0
  158. data/test/tc_xpath_context.rb +80 -0
  159. data/test/tc_xpath_expression.rb +38 -0
  160. data/test/tc_xpointer.rb +74 -0
  161. data/test/test_helper.rb +14 -0
  162. data/test/test_suite.rb +39 -0
  163. metadata +254 -0
@@ -0,0 +1,14 @@
1
+ /* $Id: rxml_ns.h 324 2008-07-08 23:00:02Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #ifndef __RXML_ERROR__
6
+ #define __RXML_ERROR__
7
+
8
+ extern VALUE eXMLError;
9
+
10
+ void rxml_init_error();
11
+ VALUE rxml_error_wrap(xmlErrorPtr xerror);
12
+ void rxml_raise(xmlErrorPtr xerror);
13
+
14
+ #endif
@@ -0,0 +1,92 @@
1
+ /* $Id$ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #include "ruby_libxml.h"
6
+
7
+ /* Document-class: LibXML::XML::HTMLParser
8
+ *
9
+ * The HTML parser implements an HTML 4.0 non-verifying parser with an API
10
+ * compatible with the XML::Parser. In contrast with the XML::Parser,
11
+ * it can parse "real world" HTML, even if it severely broken from a
12
+ * specification point of view.
13
+ *
14
+ * The HTML parser creates an in-memory document object
15
+ * that consist of any number of XML::Node instances. This is simple
16
+ * and powerful model, but has the major limitation that the size of
17
+ * the document that can be processed is limited by the amount of
18
+ * memory available.
19
+ *
20
+ * Using the html parser is simple:
21
+ *
22
+ * parser = XML::HTMLParser.file('my_file')
23
+ * doc = parser.parse
24
+ *
25
+ * You can also parse documents (see XML::HTMLParser.document),
26
+ * strings (see XML::HTMLParser.string) and io objects (see
27
+ * XML::HTMLParser.io).
28
+ */
29
+
30
+ VALUE cXMLHtmlParser;
31
+ static ID CONTEXT_ATTR;
32
+
33
+
34
+ /* call-seq:
35
+ * XML::HTMLParser.initialize -> parser
36
+ *
37
+ * Initializes a new parser instance with no pre-determined source.
38
+ */
39
+ static VALUE rxml_html_parser_initialize(int argc, VALUE *argv, VALUE self)
40
+ {
41
+ VALUE context = Qnil;
42
+
43
+ rb_scan_args(argc, argv, "01", &context);
44
+
45
+ if (context == Qnil)
46
+ {
47
+ rb_warn("Passing no parameters to XML::HTMLParser.new is deprecated. Pass an instance of XML::Parser::Context instead.");
48
+ context = rb_class_new_instance(0, NULL, cXMLParserContext);
49
+ }
50
+
51
+ rb_ivar_set(self, CONTEXT_ATTR, context);
52
+ return self;
53
+ }
54
+
55
+ /*
56
+ * call-seq:
57
+ * parser.parse -> XML::Document
58
+ *
59
+ * Parse the input XML and create an XML::Document with
60
+ * it's content. If an error occurs, XML::Parser::ParseError
61
+ * is thrown.
62
+ */
63
+ static VALUE rxml_html_parser_parse(VALUE self)
64
+ {
65
+ xmlParserCtxtPtr ctxt;
66
+ VALUE context = rb_ivar_get(self, CONTEXT_ATTR);
67
+
68
+ Data_Get_Struct(context, xmlParserCtxt, ctxt);
69
+
70
+ if (htmlParseDocument(ctxt) == -1 && ! ctxt->recovery)
71
+ {
72
+ if (ctxt->myDoc)
73
+ xmlFreeDoc(ctxt->myDoc);
74
+ rxml_raise(&ctxt->lastError);
75
+ }
76
+
77
+ return rxml_document_wrap(ctxt->myDoc);
78
+ }
79
+
80
+ void rxml_init_html_parser(void)
81
+ {
82
+ CONTEXT_ATTR = rb_intern("@context");
83
+
84
+ cXMLHtmlParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
85
+
86
+ /* Atributes */
87
+ rb_define_attr(cXMLHtmlParser, "input", 1, 0);
88
+
89
+ /* Instance methods */
90
+ rb_define_method(cXMLHtmlParser, "initialize", rxml_html_parser_initialize, -1);
91
+ rb_define_method(cXMLHtmlParser, "parse", rxml_html_parser_parse, 0);
92
+ }
@@ -0,0 +1,12 @@
1
+ /* $Id$ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #ifndef __RXML_HTML_PARSER__
6
+ #define __RXML_HTML_PARSER__
7
+
8
+ extern VALUE cXMLHtmlParser;
9
+
10
+ void rxml_init_html_parser(void);
11
+
12
+ #endif
@@ -0,0 +1,308 @@
1
+ /* $Id: ruby_xml_parser_context.c 710 2009-01-20 05:30:51Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #include "ruby_libxml.h"
6
+ #include "ruby_xml_html_parser_context.h"
7
+
8
+
9
+ /*
10
+ * Document-class: LibXML::XML::HTMLParser::Context
11
+ *
12
+ * The XML::HTMLParser::Context class provides in-depth control over how
13
+ * a document is parsed.
14
+ */
15
+
16
+ VALUE cXMLHtmlParserContext;
17
+ static ID IO_ATTR;
18
+
19
+ /* OS X 10.5 ships with libxml2 version 2.6.16 which does not expose the
20
+ htmlNewParserCtxt (or htmlInitParserCtxt which it uses) method. htmlNewParserCtxt
21
+ wasn't added to the libxml2 header files until 2.6.27. So the next two
22
+ methods are simply copied from a newer version of libxml2 (2.7.2). */
23
+ #if LIBXML_VERSION < 20627
24
+ #define XML_CTXT_FINISH_DTD_0 0xabcd1234
25
+ static int htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
26
+ {
27
+ htmlSAXHandler *sax;
28
+ if (ctxt == NULL) return(-1);
29
+
30
+ memset(ctxt, 0, sizeof(htmlParserCtxt));
31
+ ctxt->dict = xmlDictCreate();
32
+ if (ctxt->dict == NULL) {
33
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
34
+ return(-1);
35
+ }
36
+ sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler));
37
+ if (sax == NULL) {
38
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
39
+ return(-1);
40
+ }
41
+ else
42
+ memset(sax, 0, sizeof(htmlSAXHandler));
43
+
44
+ ctxt->inputTab = (htmlParserInputPtr *) xmlMalloc(5 * sizeof(htmlParserInputPtr));
45
+ if (ctxt->inputTab == NULL) {
46
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
47
+ ctxt->inputNr = 0;
48
+ ctxt->inputMax = 0;
49
+ ctxt->input = NULL;
50
+ return(-1);
51
+ }
52
+ ctxt->inputNr = 0;
53
+ ctxt->inputMax = 5;
54
+ ctxt->input = NULL;
55
+ ctxt->version = NULL;
56
+ ctxt->encoding = NULL;
57
+ ctxt->standalone = -1;
58
+ ctxt->instate = XML_PARSER_START;
59
+
60
+ ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
61
+ if (ctxt->nodeTab == NULL) {
62
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
63
+ ctxt->nodeNr = 0;
64
+ ctxt->nodeMax = 0;
65
+ ctxt->node = NULL;
66
+ ctxt->inputNr = 0;
67
+ ctxt->inputMax = 0;
68
+ ctxt->input = NULL;
69
+ return(-1);
70
+ }
71
+ ctxt->nodeNr = 0;
72
+ ctxt->nodeMax = 10;
73
+ ctxt->node = NULL;
74
+
75
+ ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
76
+ if (ctxt->nameTab == NULL) {
77
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
78
+ ctxt->nameNr = 0;
79
+ ctxt->nameMax = 10;
80
+ ctxt->name = NULL;
81
+ ctxt->nodeNr = 0;
82
+ ctxt->nodeMax = 0;
83
+ ctxt->node = NULL;
84
+ ctxt->inputNr = 0;
85
+ ctxt->inputMax = 0;
86
+ ctxt->input = NULL;
87
+ return(-1);
88
+ }
89
+ ctxt->nameNr = 0;
90
+ ctxt->nameMax = 10;
91
+ ctxt->name = NULL;
92
+
93
+ if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
94
+ else {
95
+ ctxt->sax = sax;
96
+ memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
97
+ }
98
+ ctxt->userData = ctxt;
99
+ ctxt->myDoc = NULL;
100
+ ctxt->wellFormed = 1;
101
+ ctxt->replaceEntities = 0;
102
+ ctxt->linenumbers = xmlLineNumbersDefaultValue;
103
+ ctxt->html = 1;
104
+ ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
105
+ ctxt->vctxt.userData = ctxt;
106
+ ctxt->vctxt.error = xmlParserValidityError;
107
+ ctxt->vctxt.warning = xmlParserValidityWarning;
108
+ ctxt->record_info = 0;
109
+ ctxt->validate = 0;
110
+ ctxt->nbChars = 0;
111
+ ctxt->checkIndex = 0;
112
+ ctxt->catalogs = NULL;
113
+ xmlInitNodeInfoSeq(&ctxt->node_seq);
114
+ return(0);
115
+ }
116
+
117
+ static htmlParserCtxtPtr htmlNewParserCtxt(void)
118
+ {
119
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
120
+ if (ctxt == NULL) {
121
+ rb_raise(rb_eNoMemError, "NewParserCtxt: out of memory\n");
122
+ return(NULL);
123
+ }
124
+ memset(ctxt, 0, sizeof(xmlParserCtxt));
125
+ if (htmlInitParserCtxt(ctxt) < 0) {
126
+ htmlFreeParserCtxt(ctxt);
127
+ return(NULL);
128
+ }
129
+ return(ctxt);
130
+ }
131
+ #endif
132
+
133
+ static void rxml_html_parser_context_free(htmlParserCtxtPtr ctxt)
134
+ {
135
+ htmlFreeParserCtxt(ctxt);
136
+ }
137
+
138
+ static VALUE rxml_html_parser_context_wrap(htmlParserCtxtPtr ctxt)
139
+ {
140
+ return Data_Wrap_Struct(cXMLHtmlParserContext, NULL, rxml_html_parser_context_free, ctxt);
141
+ }
142
+
143
+ /* call-seq:
144
+ * XML::HTMLParser::Context.file(file) -> XML::HTMLParser::Context
145
+ *
146
+ * Creates a new parser context based on the specified file or uri.
147
+ *
148
+ * Parameters:
149
+ *
150
+ * file - A filename or uri.
151
+ */
152
+ static VALUE rxml_html_parser_context_file(VALUE klass, VALUE file)
153
+ {
154
+ htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(StringValuePtr(file), NULL);
155
+ if (!ctxt)
156
+ rxml_raise(&xmlLastError);
157
+
158
+ /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
159
+ xmlCtxtUseOptionsInternal (called below) initialize slightly different
160
+ context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
161
+ sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
162
+ htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
163
+
164
+ return rxml_html_parser_context_wrap(ctxt);
165
+ }
166
+
167
+ /* call-seq:
168
+ * XML::HTMLParser::Context.io(io) -> XML::HTMLParser::Context
169
+ *
170
+ * Creates a new parser context based on the specified io object.
171
+ *
172
+ * Parameters:
173
+ *
174
+ * io - A ruby IO object.
175
+ */
176
+ static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io)
177
+ {
178
+ VALUE result;
179
+ htmlParserCtxtPtr ctxt;
180
+ xmlParserInputBufferPtr input;
181
+ xmlParserInputPtr stream;
182
+
183
+ if (NIL_P(io))
184
+ rb_raise(rb_eTypeError, "Must pass in an IO object");
185
+
186
+ input = xmlParserInputBufferCreateIO((xmlInputReadCallback) rxml_read_callback, NULL,
187
+ (void*)io, XML_CHAR_ENCODING_NONE);
188
+
189
+ ctxt = htmlNewParserCtxt();
190
+ if (!ctxt)
191
+ {
192
+ xmlFreeParserInputBuffer(input);
193
+ rxml_raise(&xmlLastError);
194
+ }
195
+
196
+ /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
197
+ xmlCtxtUseOptionsInternal (called below) initialize slightly different
198
+ context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
199
+ sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
200
+ htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
201
+
202
+ stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
203
+
204
+ if (!stream)
205
+ {
206
+ xmlFreeParserInputBuffer(input);
207
+ xmlFreeParserCtxt(ctxt);
208
+ rxml_raise(&xmlLastError);
209
+ }
210
+ inputPush(ctxt, stream);
211
+ result = rxml_html_parser_context_wrap(ctxt);
212
+
213
+ /* Attach io object to parser so it won't get freed.*/
214
+ rb_ivar_set(result, IO_ATTR, io);
215
+
216
+ return result;
217
+ }
218
+
219
+ /* call-seq:
220
+ * XML::HTMLParser::Context.string(string) -> XML::HTMLParser::Context
221
+ *
222
+ * Creates a new parser context based on the specified string.
223
+ *
224
+ * Parameters:
225
+ *
226
+ * string - A string that contains the data to parse.
227
+ */
228
+ static VALUE rxml_html_parser_context_string(VALUE klass, VALUE string)
229
+ {
230
+ htmlParserCtxtPtr ctxt;
231
+ Check_Type(string, T_STRING);
232
+
233
+ if (RSTRING_LEN(string) == 0)
234
+ rb_raise(rb_eArgError, "Must specify a string with one or more characters");
235
+
236
+ ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string),
237
+ RSTRING_LEN(string));
238
+ if (!ctxt)
239
+ rxml_raise(&xmlLastError);
240
+
241
+ /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
242
+ xmlCtxtUseOptionsInternal (called below) initialize slightly different
243
+ context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
244
+ sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
245
+ htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
246
+
247
+ htmlDefaultSAXHandlerInit();
248
+ if (ctxt->sax != NULL)
249
+ memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
250
+
251
+ return rxml_html_parser_context_wrap(ctxt);
252
+ }
253
+
254
+ /*
255
+ * call-seq:
256
+ * context.disable_cdata = (true|false)
257
+ *
258
+ * Control whether the CDATA nodes will be created in this context.
259
+ */
260
+ static VALUE rxml_html_parser_context_disable_cdata_set(VALUE self, VALUE bool)
261
+ {
262
+ htmlParserCtxtPtr ctxt;
263
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
264
+
265
+ if (ctxt->sax == NULL)
266
+ rb_raise(rb_eRuntimeError, "Sax handler is not yet set");
267
+
268
+ /* LibXML controls this internally with the default SAX handler. */
269
+ if (bool)
270
+ ctxt->sax->cdataBlock = NULL;
271
+ else
272
+ ctxt->sax->cdataBlock = htmlDefaultSAXHandler.cdataBlock;
273
+
274
+ return bool;
275
+ }
276
+
277
+ /*
278
+ * call-seq:
279
+ * context.options = XML::Parser::Options::NOENT |
280
+ XML::Parser::Options::NOCDATA
281
+ *
282
+ * Provides control over the execution of a parser. Valid values
283
+ * are the constants defined on XML::Parser::Options. Multiple
284
+ * options can be combined by using Bitwise OR (|).
285
+ */
286
+ static VALUE rxml_html_parser_context_options_set(VALUE self, VALUE options)
287
+ {
288
+ int result;
289
+ htmlParserCtxtPtr ctxt;
290
+ Check_Type(options, T_FIXNUM);
291
+
292
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
293
+ result = htmlCtxtUseOptions(ctxt, NUM2INT(options));
294
+
295
+ return self;
296
+ }
297
+
298
+ void rxml_init_html_parser_context(void)
299
+ {
300
+ IO_ATTR = ID2SYM(rb_intern("@io"));
301
+ cXMLHtmlParserContext = rb_define_class_under(cXMLHtmlParser, "Context", cXMLParserContext);
302
+
303
+ rb_define_singleton_method(cXMLHtmlParserContext, "file", rxml_html_parser_context_file, 1);
304
+ rb_define_singleton_method(cXMLHtmlParserContext, "io", rxml_html_parser_context_io, 1);
305
+ rb_define_singleton_method(cXMLHtmlParserContext, "string", rxml_html_parser_context_string, 1);
306
+ rb_define_method(cXMLHtmlParserContext, "disable_cdata=", rxml_html_parser_context_disable_cdata_set, 1);
307
+ rb_define_method(cXMLHtmlParserContext, "options=", rxml_html_parser_context_options_set, 1);
308
+ }
@@ -0,0 +1,12 @@
1
+ /* $Id: ruby_xml_parser_context.h 666 2008-12-07 00:16:50Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #ifndef __RXML_HTML_PARSER_CONTEXT__
6
+ #define __RXML_HTML_PARSER_CONTEXT__
7
+
8
+ extern VALUE cXMLHtmlParserContext;
9
+
10
+ void rxml_init_html_parser_context(void);
11
+
12
+ #endif
@@ -0,0 +1,40 @@
1
+ /* $Id: ruby_xml_html_parser.c 710 2009-01-20 05:30:51Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #include "ruby_libxml.h"
6
+
7
+ /* Document-class: LibXML::XML::HTMLParser::Options
8
+ *
9
+ * Options to control the operation of the HTMLParser. The easiest
10
+ * way to set a parser's options is via the methods
11
+ * XML::HTMLParser.file, XML::HTMLParser.io or XML::HTMLParser.string.
12
+ * For additional control, see XML::HTMLParser::Context#options=.
13
+ */
14
+
15
+ VALUE mXMLHtmlParserOptions;
16
+
17
+ void rxml_init_html_parser_options(void)
18
+ {
19
+ mXMLHtmlParserOptions = rb_define_module_under(cXMLHtmlParser, "Options");
20
+
21
+
22
+ #if LIBXML_VERSION >= 20621
23
+ /* 1: Relax parsing. */
24
+ rb_define_const(mXMLHtmlParserOptions, "RECOVER", INT2NUM(HTML_PARSE_RECOVER));
25
+ #endif
26
+ /* 32: Suppress error reports. */
27
+ rb_define_const(mXMLHtmlParserOptions, "NOERROR", INT2NUM(HTML_PARSE_NOERROR));
28
+ /* 64: Suppress warning reports. */
29
+ rb_define_const(mXMLHtmlParserOptions, "NOWARNING", INT2NUM(HTML_PARSE_NOWARNING));
30
+ /* 128: Enable pedantic error reporting. */
31
+ rb_define_const(mXMLHtmlParserOptions, "PEDANTIC", INT2NUM(HTML_PARSE_PEDANTIC));
32
+ /* 256: Remove blank nodes. */
33
+ rb_define_const(mXMLHtmlParserOptions, "NOBLANKS", INT2NUM(HTML_PARSE_NOBLANKS));
34
+ #if LIBXML_VERSION >= 20621
35
+ /* 2048: Forbid network access. */
36
+ rb_define_const(mXMLHtmlParserOptions, "NONET", INT2NUM(HTML_PARSE_NONET));
37
+ /* 65536: Compact small text nodes. */
38
+ rb_define_const(mXMLHtmlParserOptions, "COMPACT", INT2NUM(HTML_PARSE_COMPACT));
39
+ #endif
40
+ }