libxml-ruby 3.2.2-x64-mingw-ucrt

Sign up to get free protection for your applications and to get access to all the features.
Files changed (205) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY +848 -0
  3. data/LICENSE +21 -0
  4. data/MANIFEST +166 -0
  5. data/README.rdoc +217 -0
  6. data/Rakefile +99 -0
  7. data/ext/libxml/extconf.rb +61 -0
  8. data/ext/libxml/libxml.c +80 -0
  9. data/ext/libxml/libxml_ruby.def +35 -0
  10. data/ext/libxml/ruby_libxml.h +67 -0
  11. data/ext/libxml/ruby_xml.c +933 -0
  12. data/ext/libxml/ruby_xml.h +10 -0
  13. data/ext/libxml/ruby_xml_attr.c +333 -0
  14. data/ext/libxml/ruby_xml_attr.h +12 -0
  15. data/ext/libxml/ruby_xml_attr_decl.c +153 -0
  16. data/ext/libxml/ruby_xml_attr_decl.h +11 -0
  17. data/ext/libxml/ruby_xml_attributes.c +275 -0
  18. data/ext/libxml/ruby_xml_attributes.h +15 -0
  19. data/ext/libxml/ruby_xml_cbg.c +85 -0
  20. data/ext/libxml/ruby_xml_document.c +1123 -0
  21. data/ext/libxml/ruby_xml_document.h +11 -0
  22. data/ext/libxml/ruby_xml_dtd.c +248 -0
  23. data/ext/libxml/ruby_xml_dtd.h +9 -0
  24. data/ext/libxml/ruby_xml_encoding.c +250 -0
  25. data/ext/libxml/ruby_xml_encoding.h +16 -0
  26. data/ext/libxml/ruby_xml_error.c +996 -0
  27. data/ext/libxml/ruby_xml_error.h +12 -0
  28. data/ext/libxml/ruby_xml_html_parser.c +89 -0
  29. data/ext/libxml/ruby_xml_html_parser.h +10 -0
  30. data/ext/libxml/ruby_xml_html_parser_context.c +337 -0
  31. data/ext/libxml/ruby_xml_html_parser_context.h +10 -0
  32. data/ext/libxml/ruby_xml_html_parser_options.c +46 -0
  33. data/ext/libxml/ruby_xml_html_parser_options.h +10 -0
  34. data/ext/libxml/ruby_xml_input_cbg.c +191 -0
  35. data/ext/libxml/ruby_xml_input_cbg.h +20 -0
  36. data/ext/libxml/ruby_xml_io.c +47 -0
  37. data/ext/libxml/ruby_xml_io.h +10 -0
  38. data/ext/libxml/ruby_xml_namespace.c +153 -0
  39. data/ext/libxml/ruby_xml_namespace.h +10 -0
  40. data/ext/libxml/ruby_xml_namespaces.c +293 -0
  41. data/ext/libxml/ruby_xml_namespaces.h +9 -0
  42. data/ext/libxml/ruby_xml_node.c +1402 -0
  43. data/ext/libxml/ruby_xml_node.h +13 -0
  44. data/ext/libxml/ruby_xml_parser.c +91 -0
  45. data/ext/libxml/ruby_xml_parser.h +12 -0
  46. data/ext/libxml/ruby_xml_parser_context.c +999 -0
  47. data/ext/libxml/ruby_xml_parser_context.h +10 -0
  48. data/ext/libxml/ruby_xml_parser_options.c +66 -0
  49. data/ext/libxml/ruby_xml_parser_options.h +12 -0
  50. data/ext/libxml/ruby_xml_reader.c +1239 -0
  51. data/ext/libxml/ruby_xml_reader.h +17 -0
  52. data/ext/libxml/ruby_xml_relaxng.c +110 -0
  53. data/ext/libxml/ruby_xml_relaxng.h +10 -0
  54. data/ext/libxml/ruby_xml_sax2_handler.c +326 -0
  55. data/ext/libxml/ruby_xml_sax2_handler.h +10 -0
  56. data/ext/libxml/ruby_xml_sax_parser.c +116 -0
  57. data/ext/libxml/ruby_xml_sax_parser.h +10 -0
  58. data/ext/libxml/ruby_xml_schema.c +278 -0
  59. data/ext/libxml/ruby_xml_schema.h +809 -0
  60. data/ext/libxml/ruby_xml_schema_attribute.c +109 -0
  61. data/ext/libxml/ruby_xml_schema_attribute.h +15 -0
  62. data/ext/libxml/ruby_xml_schema_element.c +95 -0
  63. data/ext/libxml/ruby_xml_schema_element.h +14 -0
  64. data/ext/libxml/ruby_xml_schema_facet.c +52 -0
  65. data/ext/libxml/ruby_xml_schema_facet.h +13 -0
  66. data/ext/libxml/ruby_xml_schema_type.c +232 -0
  67. data/ext/libxml/ruby_xml_schema_type.h +9 -0
  68. data/ext/libxml/ruby_xml_version.h +9 -0
  69. data/ext/libxml/ruby_xml_writer.c +1133 -0
  70. data/ext/libxml/ruby_xml_writer.h +10 -0
  71. data/ext/libxml/ruby_xml_xinclude.c +16 -0
  72. data/ext/libxml/ruby_xml_xinclude.h +11 -0
  73. data/ext/libxml/ruby_xml_xpath.c +194 -0
  74. data/ext/libxml/ruby_xml_xpath.h +13 -0
  75. data/ext/libxml/ruby_xml_xpath_context.c +360 -0
  76. data/ext/libxml/ruby_xml_xpath_context.h +9 -0
  77. data/ext/libxml/ruby_xml_xpath_expression.c +81 -0
  78. data/ext/libxml/ruby_xml_xpath_expression.h +10 -0
  79. data/ext/libxml/ruby_xml_xpath_object.c +338 -0
  80. data/ext/libxml/ruby_xml_xpath_object.h +17 -0
  81. data/ext/libxml/ruby_xml_xpointer.c +99 -0
  82. data/ext/libxml/ruby_xml_xpointer.h +11 -0
  83. data/ext/vc/libxml_ruby.sln +28 -0
  84. data/lib/3.1/libxml_ruby.so +0 -0
  85. data/lib/libxml/attr.rb +123 -0
  86. data/lib/libxml/attr_decl.rb +80 -0
  87. data/lib/libxml/attributes.rb +14 -0
  88. data/lib/libxml/document.rb +194 -0
  89. data/lib/libxml/error.rb +95 -0
  90. data/lib/libxml/hpricot.rb +78 -0
  91. data/lib/libxml/html_parser.rb +96 -0
  92. data/lib/libxml/namespace.rb +62 -0
  93. data/lib/libxml/namespaces.rb +38 -0
  94. data/lib/libxml/node.rb +323 -0
  95. data/lib/libxml/parser.rb +101 -0
  96. data/lib/libxml/sax_callbacks.rb +180 -0
  97. data/lib/libxml/sax_parser.rb +41 -0
  98. data/lib/libxml/schema/attribute.rb +19 -0
  99. data/lib/libxml/schema/element.rb +19 -0
  100. data/lib/libxml/schema/type.rb +21 -0
  101. data/lib/libxml/schema.rb +48 -0
  102. data/lib/libxml/tree.rb +29 -0
  103. data/lib/libxml-ruby.rb +30 -0
  104. data/lib/libxml.rb +5 -0
  105. data/lib/xml/libxml.rb +10 -0
  106. data/lib/xml.rb +14 -0
  107. data/libxml-ruby.gemspec +48 -0
  108. data/script/benchmark/depixelate +634 -0
  109. data/script/benchmark/hamlet.xml +9055 -0
  110. data/script/benchmark/parsecount +170 -0
  111. data/script/benchmark/sock_entries.xml +507 -0
  112. data/script/benchmark/throughput +41 -0
  113. data/script/test +6 -0
  114. data/setup.rb +1584 -0
  115. data/test/c14n/given/doc.dtd +1 -0
  116. data/test/c14n/given/example-1.xml +14 -0
  117. data/test/c14n/given/example-2.xml +11 -0
  118. data/test/c14n/given/example-3.xml +18 -0
  119. data/test/c14n/given/example-4.xml +9 -0
  120. data/test/c14n/given/example-5.xml +12 -0
  121. data/test/c14n/given/example-6.xml +2 -0
  122. data/test/c14n/given/example-7.xml +11 -0
  123. data/test/c14n/given/example-8.xml +11 -0
  124. data/test/c14n/given/example-8.xpath +10 -0
  125. data/test/c14n/given/world.txt +1 -0
  126. data/test/c14n/result/1-1-without-comments/example-1 +4 -0
  127. data/test/c14n/result/1-1-without-comments/example-2 +11 -0
  128. data/test/c14n/result/1-1-without-comments/example-3 +14 -0
  129. data/test/c14n/result/1-1-without-comments/example-4 +9 -0
  130. data/test/c14n/result/1-1-without-comments/example-5 +3 -0
  131. data/test/c14n/result/1-1-without-comments/example-6 +1 -0
  132. data/test/c14n/result/1-1-without-comments/example-7 +1 -0
  133. data/test/c14n/result/1-1-without-comments/example-8 +1 -0
  134. data/test/c14n/result/with-comments/example-1 +6 -0
  135. data/test/c14n/result/with-comments/example-2 +11 -0
  136. data/test/c14n/result/with-comments/example-3 +14 -0
  137. data/test/c14n/result/with-comments/example-4 +9 -0
  138. data/test/c14n/result/with-comments/example-5 +4 -0
  139. data/test/c14n/result/with-comments/example-6 +1 -0
  140. data/test/c14n/result/with-comments/example-7 +1 -0
  141. data/test/c14n/result/without-comments/example-1 +4 -0
  142. data/test/c14n/result/without-comments/example-2 +11 -0
  143. data/test/c14n/result/without-comments/example-3 +14 -0
  144. data/test/c14n/result/without-comments/example-4 +9 -0
  145. data/test/c14n/result/without-comments/example-5 +3 -0
  146. data/test/c14n/result/without-comments/example-6 +1 -0
  147. data/test/c14n/result/without-comments/example-7 +1 -0
  148. data/test/model/atom.xml +13 -0
  149. data/test/model/bands.iso-8859-1.xml +5 -0
  150. data/test/model/bands.utf-8.xml +5 -0
  151. data/test/model/bands.xml +5 -0
  152. data/test/model/books.xml +154 -0
  153. data/test/model/cwm_1_0.xml +11336 -0
  154. data/test/model/merge_bug_data.xml +58 -0
  155. data/test/model/ruby-lang.html +238 -0
  156. data/test/model/rubynet.xml +79 -0
  157. data/test/model/rubynet_project +1 -0
  158. data/test/model/shiporder.rnc +28 -0
  159. data/test/model/shiporder.rng +86 -0
  160. data/test/model/shiporder.xml +23 -0
  161. data/test/model/shiporder.xsd +40 -0
  162. data/test/model/soap.xml +27 -0
  163. data/test/model/xinclude.xml +5 -0
  164. data/test/test_attr.rb +181 -0
  165. data/test/test_attr_decl.rb +132 -0
  166. data/test/test_attributes.rb +136 -0
  167. data/test/test_canonicalize.rb +120 -0
  168. data/test/test_deprecated_require.rb +12 -0
  169. data/test/test_document.rb +132 -0
  170. data/test/test_document_write.rb +146 -0
  171. data/test/test_dtd.rb +129 -0
  172. data/test/test_encoding.rb +129 -0
  173. data/test/test_encoding_sax.rb +115 -0
  174. data/test/test_error.rb +178 -0
  175. data/test/test_helper.rb +9 -0
  176. data/test/test_html_parser.rb +162 -0
  177. data/test/test_html_parser_context.rb +23 -0
  178. data/test/test_namespace.rb +60 -0
  179. data/test/test_namespaces.rb +200 -0
  180. data/test/test_node.rb +237 -0
  181. data/test/test_node_cdata.rb +50 -0
  182. data/test/test_node_comment.rb +32 -0
  183. data/test/test_node_copy.rb +40 -0
  184. data/test/test_node_edit.rb +158 -0
  185. data/test/test_node_pi.rb +37 -0
  186. data/test/test_node_text.rb +69 -0
  187. data/test/test_node_write.rb +97 -0
  188. data/test/test_node_xlink.rb +28 -0
  189. data/test/test_parser.rb +324 -0
  190. data/test/test_parser_context.rb +198 -0
  191. data/test/test_properties.rb +38 -0
  192. data/test/test_reader.rb +364 -0
  193. data/test/test_relaxng.rb +53 -0
  194. data/test/test_sax_parser.rb +326 -0
  195. data/test/test_schema.rb +168 -0
  196. data/test/test_suite.rb +48 -0
  197. data/test/test_traversal.rb +152 -0
  198. data/test/test_writer.rb +468 -0
  199. data/test/test_xinclude.rb +20 -0
  200. data/test/test_xml.rb +263 -0
  201. data/test/test_xpath.rb +244 -0
  202. data/test/test_xpath_context.rb +88 -0
  203. data/test/test_xpath_expression.rb +37 -0
  204. data/test/test_xpointer.rb +72 -0
  205. metadata +325 -0
@@ -0,0 +1,12 @@
1
+ /* Please see the LICENSE file for copyright and distribution information */
2
+
3
+ #ifndef __RXML_ERROR__
4
+ #define __RXML_ERROR__
5
+
6
+ extern VALUE eXMLError;
7
+
8
+ void rxml_init_error();
9
+ VALUE rxml_error_wrap(xmlErrorPtr xerror);
10
+ NORETURN(void rxml_raise(xmlErrorPtr xerror));
11
+
12
+ #endif
@@ -0,0 +1,89 @@
1
+ /* Please see the LICENSE file for copyright and distribution information */
2
+
3
+ #include "ruby_libxml.h"
4
+
5
+ /* Document-class: LibXML::XML::HTMLParser
6
+ *
7
+ * The HTML parser implements an HTML 4.0 non-verifying parser with an API
8
+ * compatible with the XML::Parser. In contrast with the XML::Parser,
9
+ * it can parse "real world" HTML, even if it severely broken from a
10
+ * specification point of view.
11
+ *
12
+ * The HTML parser creates an in-memory document object
13
+ * that consist of any number of XML::Node instances. This is simple
14
+ * and powerful model, but has the major limitation that the size of
15
+ * the document that can be processed is limited by the amount of
16
+ * memory available.
17
+ *
18
+ * Using the html parser is simple:
19
+ *
20
+ * parser = XML::HTMLParser.file('my_file')
21
+ * doc = parser.parse
22
+ *
23
+ * You can also parse documents (see XML::HTMLParser.document),
24
+ * strings (see XML::HTMLParser.string) and io objects (see
25
+ * XML::HTMLParser.io).
26
+ */
27
+
28
+ VALUE cXMLHtmlParser;
29
+ static ID CONTEXT_ATTR;
30
+
31
+
32
+ /* call-seq:
33
+ * XML::HTMLParser.initialize -> parser
34
+ *
35
+ * Initializes a new parser instance with no pre-determined source.
36
+ */
37
+ static VALUE rxml_html_parser_initialize(int argc, VALUE *argv, VALUE self)
38
+ {
39
+ VALUE context = Qnil;
40
+
41
+ rb_scan_args(argc, argv, "01", &context);
42
+
43
+ if (context == Qnil)
44
+ {
45
+ rb_raise(rb_eArgError, "An instance of a XML::Parser::Context must be passed to XML::HTMLParser.new");
46
+ }
47
+
48
+ rb_ivar_set(self, CONTEXT_ATTR, context);
49
+ return self;
50
+ }
51
+
52
+ /*
53
+ * call-seq:
54
+ * parser.parse -> XML::Document
55
+ *
56
+ * Parse the input XML and create an XML::Document with
57
+ * it's content. If an error occurs, XML::Parser::ParseError
58
+ * is thrown.
59
+ */
60
+ static VALUE rxml_html_parser_parse(VALUE self)
61
+ {
62
+ xmlParserCtxtPtr ctxt;
63
+ VALUE context = rb_ivar_get(self, CONTEXT_ATTR);
64
+
65
+ Data_Get_Struct(context, xmlParserCtxt, ctxt);
66
+
67
+ if (htmlParseDocument(ctxt) == -1 && ! ctxt->recovery)
68
+ {
69
+ rxml_raise(&ctxt->lastError);
70
+ }
71
+
72
+ rb_funcall(context, rb_intern("close"), 0);
73
+
74
+ return rxml_document_wrap(ctxt->myDoc);
75
+ }
76
+
77
+ void rxml_init_html_parser(void)
78
+ {
79
+ CONTEXT_ATTR = rb_intern("@context");
80
+
81
+ cXMLHtmlParser = rb_define_class_under(mXML, "HTMLParser", rb_cObject);
82
+
83
+ /* Atributes */
84
+ rb_define_attr(cXMLHtmlParser, "input", 1, 0);
85
+
86
+ /* Instance methods */
87
+ rb_define_method(cXMLHtmlParser, "initialize", rxml_html_parser_initialize, -1);
88
+ rb_define_method(cXMLHtmlParser, "parse", rxml_html_parser_parse, 0);
89
+ }
@@ -0,0 +1,10 @@
1
+ /* Please see the LICENSE file for copyright and distribution information */
2
+
3
+ #ifndef __RXML_HTML_PARSER__
4
+ #define __RXML_HTML_PARSER__
5
+
6
+ extern VALUE cXMLHtmlParser;
7
+
8
+ void rxml_init_html_parser(void);
9
+
10
+ #endif
@@ -0,0 +1,337 @@
1
+ /* Please see the LICENSE file for copyright and distribution information */
2
+
3
+ #include "ruby_libxml.h"
4
+ #include "ruby_xml_html_parser_context.h"
5
+
6
+
7
+ /*
8
+ * Document-class: LibXML::XML::HTMLParser::Context
9
+ *
10
+ * The XML::HTMLParser::Context class provides in-depth control over how
11
+ * a document is parsed.
12
+ */
13
+
14
+ VALUE cXMLHtmlParserContext;
15
+ static ID IO_ATTR;
16
+
17
+ /* OS X 10.5 ships with libxml2 version 2.6.16 which does not expose the
18
+ htmlNewParserCtxt (or htmlInitParserCtxt which it uses) method. htmlNewParserCtxt
19
+ wasn't added to the libxml2 header files until 2.6.27. So the next two
20
+ methods are simply copied from a newer version of libxml2 (2.7.2). */
21
+ #if LIBXML_VERSION < 20627
22
+ #define XML_CTXT_FINISH_DTD_0 0xabcd1234
23
+ static int htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
24
+ {
25
+ htmlSAXHandler *sax;
26
+ if (ctxt == NULL) return(-1);
27
+
28
+ memset(ctxt, 0, sizeof(htmlParserCtxt));
29
+ ctxt->dict = xmlDictCreate();
30
+ if (ctxt->dict == NULL) {
31
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
32
+ return(-1);
33
+ }
34
+ sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler));
35
+ if (sax == NULL) {
36
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
37
+ return(-1);
38
+ }
39
+ else
40
+ memset(sax, 0, sizeof(htmlSAXHandler));
41
+
42
+ ctxt->inputTab = (htmlParserInputPtr *) xmlMalloc(5 * sizeof(htmlParserInputPtr));
43
+ if (ctxt->inputTab == NULL) {
44
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
45
+ ctxt->inputNr = 0;
46
+ ctxt->inputMax = 0;
47
+ ctxt->input = NULL;
48
+ return(-1);
49
+ }
50
+ ctxt->inputNr = 0;
51
+ ctxt->inputMax = 5;
52
+ ctxt->input = NULL;
53
+ ctxt->version = NULL;
54
+ ctxt->encoding = NULL;
55
+ ctxt->standalone = -1;
56
+ ctxt->instate = XML_PARSER_START;
57
+
58
+ ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
59
+ if (ctxt->nodeTab == NULL) {
60
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
61
+ ctxt->nodeNr = 0;
62
+ ctxt->nodeMax = 0;
63
+ ctxt->node = NULL;
64
+ ctxt->inputNr = 0;
65
+ ctxt->inputMax = 0;
66
+ ctxt->input = NULL;
67
+ return(-1);
68
+ }
69
+ ctxt->nodeNr = 0;
70
+ ctxt->nodeMax = 10;
71
+ ctxt->node = NULL;
72
+
73
+ ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
74
+ if (ctxt->nameTab == NULL) {
75
+ rb_raise(rb_eNoMemError, "htmlInitParserCtxt: out of memory\n");
76
+ ctxt->nameNr = 0;
77
+ ctxt->nameMax = 10;
78
+ ctxt->name = NULL;
79
+ ctxt->nodeNr = 0;
80
+ ctxt->nodeMax = 0;
81
+ ctxt->node = NULL;
82
+ ctxt->inputNr = 0;
83
+ ctxt->inputMax = 0;
84
+ ctxt->input = NULL;
85
+ return(-1);
86
+ }
87
+ ctxt->nameNr = 0;
88
+ ctxt->nameMax = 10;
89
+ ctxt->name = NULL;
90
+
91
+ if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
92
+ else {
93
+ ctxt->sax = sax;
94
+ memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
95
+ }
96
+ ctxt->userData = ctxt;
97
+ ctxt->myDoc = NULL;
98
+ ctxt->wellFormed = 1;
99
+ ctxt->replaceEntities = 0;
100
+ ctxt->linenumbers = xmlLineNumbersDefaultValue;
101
+ ctxt->html = 1;
102
+ ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
103
+ ctxt->vctxt.userData = ctxt;
104
+ ctxt->vctxt.error = xmlParserValidityError;
105
+ ctxt->vctxt.warning = xmlParserValidityWarning;
106
+ ctxt->record_info = 0;
107
+ ctxt->validate = 0;
108
+ ctxt->nbChars = 0;
109
+ ctxt->checkIndex = 0;
110
+ ctxt->catalogs = NULL;
111
+ xmlInitNodeInfoSeq(&ctxt->node_seq);
112
+ return(0);
113
+ }
114
+
115
+ static htmlParserCtxtPtr htmlNewParserCtxt(void)
116
+ {
117
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
118
+ if (ctxt == NULL) {
119
+ rb_raise(rb_eNoMemError, "NewParserCtxt: out of memory\n");
120
+ return(NULL);
121
+ }
122
+ memset(ctxt, 0, sizeof(xmlParserCtxt));
123
+ if (htmlInitParserCtxt(ctxt) < 0) {
124
+ htmlFreeParserCtxt(ctxt);
125
+ return(NULL);
126
+ }
127
+ return(ctxt);
128
+ }
129
+ #endif
130
+
131
+ static void rxml_html_parser_context_free(htmlParserCtxtPtr ctxt)
132
+ {
133
+ htmlFreeParserCtxt(ctxt);
134
+ }
135
+
136
+ static VALUE rxml_html_parser_context_wrap(htmlParserCtxtPtr ctxt)
137
+ {
138
+ return Data_Wrap_Struct(cXMLHtmlParserContext, NULL, rxml_html_parser_context_free, ctxt);
139
+ }
140
+
141
+ /* call-seq:
142
+ * XML::HTMLParser::Context.file(file) -> XML::HTMLParser::Context
143
+ *
144
+ * Creates a new parser context based on the specified file or uri.
145
+ *
146
+ * Parameters:
147
+ *
148
+ * file - A filename or uri.
149
+ */
150
+ static VALUE rxml_html_parser_context_file(VALUE klass, VALUE file)
151
+ {
152
+ htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(StringValuePtr(file), NULL);
153
+ if (!ctxt)
154
+ rxml_raise(&xmlLastError);
155
+
156
+ /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
157
+ xmlCtxtUseOptionsInternal (called below) initialize slightly different
158
+ context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
159
+ sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
160
+ htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
161
+
162
+ return rxml_html_parser_context_wrap(ctxt);
163
+ }
164
+
165
+ /* call-seq:
166
+ * XML::HTMLParser::Context.io(io) -> XML::HTMLParser::Context
167
+ *
168
+ * Creates a new parser context based on the specified io object.
169
+ *
170
+ * Parameters:
171
+ *
172
+ * io - A ruby IO object.
173
+ */
174
+ static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io)
175
+ {
176
+ VALUE result;
177
+ htmlParserCtxtPtr ctxt;
178
+ xmlParserInputBufferPtr input;
179
+ xmlParserInputPtr stream;
180
+
181
+ if (NIL_P(io))
182
+ rb_raise(rb_eTypeError, "Must pass in an IO object");
183
+
184
+ input = xmlParserInputBufferCreateIO((xmlInputReadCallback) rxml_read_callback, NULL,
185
+ (void*)io, XML_CHAR_ENCODING_NONE);
186
+
187
+ ctxt = htmlNewParserCtxt();
188
+ if (!ctxt)
189
+ {
190
+ xmlFreeParserInputBuffer(input);
191
+ rxml_raise(&xmlLastError);
192
+ }
193
+
194
+ /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
195
+ xmlCtxtUseOptionsInternal (called below) initialize slightly different
196
+ context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
197
+ sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
198
+ htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
199
+
200
+ stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
201
+
202
+ if (!stream)
203
+ {
204
+ xmlFreeParserInputBuffer(input);
205
+ xmlFreeParserCtxt(ctxt);
206
+ rxml_raise(&xmlLastError);
207
+ }
208
+ inputPush(ctxt, stream);
209
+ result = rxml_html_parser_context_wrap(ctxt);
210
+
211
+ /* Attach io object to parser so it won't get freed.*/
212
+ rb_ivar_set(result, IO_ATTR, io);
213
+
214
+ return result;
215
+ }
216
+
217
+ /* call-seq:
218
+ * XML::HTMLParser::Context.string(string) -> XML::HTMLParser::Context
219
+ *
220
+ * Creates a new parser context based on the specified string.
221
+ *
222
+ * Parameters:
223
+ *
224
+ * string - A string that contains the data to parse.
225
+ */
226
+ static VALUE rxml_html_parser_context_string(VALUE klass, VALUE string)
227
+ {
228
+ htmlParserCtxtPtr ctxt;
229
+ Check_Type(string, T_STRING);
230
+
231
+ if (RSTRING_LEN(string) == 0)
232
+ rb_raise(rb_eArgError, "Must specify a string with one or more characters");
233
+
234
+ ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string),
235
+ (int)RSTRING_LEN(string));
236
+ if (!ctxt)
237
+ rxml_raise(&xmlLastError);
238
+
239
+ /* This is annoying, but xmlInitParserCtxt (called indirectly above) and
240
+ xmlCtxtUseOptionsInternal (called below) initialize slightly different
241
+ context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
242
+ sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
243
+ htmlCtxtUseOptions(ctxt, rxml_libxml_default_options());
244
+
245
+ htmlDefaultSAXHandlerInit();
246
+ if (ctxt->sax != NULL)
247
+ memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
248
+
249
+ return rxml_html_parser_context_wrap(ctxt);
250
+ }
251
+
252
+ /*
253
+ * call-seq:
254
+ * context.close -> nil
255
+ *
256
+ * Closes the underlying input streams. This is useful when parsing a large amount of
257
+ * files and you want to close the files without relying on Ruby's garbage collector
258
+ * to run.
259
+ */
260
+ static VALUE rxml_html_parser_context_close(VALUE self)
261
+ {
262
+ htmlParserCtxtPtr ctxt;
263
+ xmlParserInputPtr xinput;
264
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
265
+
266
+ while ((xinput = inputPop(ctxt)) != NULL)
267
+ {
268
+ xmlFreeInputStream(xinput);
269
+ }
270
+ return Qnil;
271
+ }
272
+
273
+ /*
274
+ * call-seq:
275
+ * context.disable_cdata = (true|false)
276
+ *
277
+ * Control whether the CDATA nodes will be created in this context.
278
+ */
279
+ static VALUE rxml_html_parser_context_disable_cdata_set(VALUE self, VALUE value)
280
+ {
281
+ htmlParserCtxtPtr ctxt;
282
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
283
+
284
+ if (ctxt->sax == NULL)
285
+ rb_raise(rb_eRuntimeError, "Sax handler is not yet set");
286
+
287
+ /* LibXML controls this internally with the default SAX handler. */
288
+ if (value)
289
+ ctxt->sax->cdataBlock = NULL;
290
+ else
291
+ ctxt->sax->cdataBlock = htmlDefaultSAXHandler.cdataBlock;
292
+
293
+ return value;
294
+ }
295
+
296
+ /*
297
+ * call-seq:
298
+ * context.options = XML::Parser::Options::NOENT |
299
+ XML::Parser::Options::NOCDATA
300
+ *
301
+ * Provides control over the execution of a parser. Valid values
302
+ * are the constants defined on XML::Parser::Options. Multiple
303
+ * options can be combined by using Bitwise OR (|).
304
+ */
305
+ static VALUE rxml_html_parser_context_options_set(VALUE self, VALUE options)
306
+ {
307
+ int xml_options = NUM2INT(options);
308
+ htmlParserCtxtPtr ctxt;
309
+ Check_Type(options, T_FIXNUM);
310
+
311
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
312
+ htmlCtxtUseOptions(ctxt, xml_options);
313
+
314
+ #if LIBXML_VERSION >= 20707
315
+ /* Big hack here, but htmlCtxtUseOptions doens't support HTML_PARSE_NOIMPLIED.
316
+ So do it ourselves. There must be a better way??? */
317
+ if (xml_options & HTML_PARSE_NOIMPLIED)
318
+ {
319
+ ctxt->options |= HTML_PARSE_NOIMPLIED;
320
+ }
321
+ #endif
322
+
323
+ return self;
324
+ }
325
+
326
+ void rxml_init_html_parser_context(void)
327
+ {
328
+ IO_ATTR = ID2SYM(rb_intern("@io"));
329
+ cXMLHtmlParserContext = rb_define_class_under(cXMLHtmlParser, "Context", cXMLParserContext);
330
+
331
+ rb_define_singleton_method(cXMLHtmlParserContext, "file", rxml_html_parser_context_file, 1);
332
+ rb_define_singleton_method(cXMLHtmlParserContext, "io", rxml_html_parser_context_io, 1);
333
+ rb_define_singleton_method(cXMLHtmlParserContext, "string", rxml_html_parser_context_string, 1);
334
+ rb_define_method(cXMLHtmlParserContext, "close", rxml_html_parser_context_close, 0);
335
+ rb_define_method(cXMLHtmlParserContext, "disable_cdata=", rxml_html_parser_context_disable_cdata_set, 1);
336
+ rb_define_method(cXMLHtmlParserContext, "options=", rxml_html_parser_context_options_set, 1);
337
+ }
@@ -0,0 +1,10 @@
1
+ /* Please see the LICENSE file for copyright and distribution information */
2
+
3
+ #ifndef __RXML_HTML_PARSER_CONTEXT__
4
+ #define __RXML_HTML_PARSER_CONTEXT__
5
+
6
+ extern VALUE cXMLHtmlParserContext;
7
+
8
+ void rxml_init_html_parser_context(void);
9
+
10
+ #endif
@@ -0,0 +1,46 @@
1
+ /* Please see the LICENSE file for copyright and distribution information */
2
+
3
+ #include "ruby_libxml.h"
4
+
5
+ /* Document-class: LibXML::XML::HTMLParser::Options
6
+ *
7
+ * Options to control the operation of the HTMLParser. The easiest
8
+ * way to set a parser's options is via the methods
9
+ * XML::HTMLParser.file, XML::HTMLParser.io or XML::HTMLParser.string.
10
+ * For additional control, see XML::HTMLParser::Context#options=.
11
+ */
12
+
13
+ VALUE mXMLHtmlParserOptions;
14
+
15
+ void rxml_init_html_parser_options(void)
16
+ {
17
+ mXMLHtmlParserOptions = rb_define_module_under(cXMLHtmlParser, "Options");
18
+
19
+
20
+ #if LIBXML_VERSION >= 20621
21
+ /* 1: Relax parsing. */
22
+ rb_define_const(mXMLHtmlParserOptions, "RECOVER", INT2NUM(HTML_PARSE_RECOVER));
23
+ #endif
24
+ #if LIBXML_VERSION >= 20708
25
+ /* 2: Do not default a doctype if not found */
26
+ rb_define_const(mXMLHtmlParserOptions, "NODEFDTD", INT2NUM(HTML_PARSE_NODEFDTD));
27
+ #endif
28
+ /* 32: Suppress error reports. */
29
+ rb_define_const(mXMLHtmlParserOptions, "NOERROR", INT2NUM(HTML_PARSE_NOERROR));
30
+ /* 64: Suppress warning reports. */
31
+ rb_define_const(mXMLHtmlParserOptions, "NOWARNING", INT2NUM(HTML_PARSE_NOWARNING));
32
+ /* 128: Enable pedantic error reporting. */
33
+ rb_define_const(mXMLHtmlParserOptions, "PEDANTIC", INT2NUM(HTML_PARSE_PEDANTIC));
34
+ /* 256: Remove blank nodes. */
35
+ rb_define_const(mXMLHtmlParserOptions, "NOBLANKS", INT2NUM(HTML_PARSE_NOBLANKS));
36
+ #if LIBXML_VERSION >= 20621
37
+ /* 2048: Forbid network access. */
38
+ rb_define_const(mXMLHtmlParserOptions, "NONET", INT2NUM(HTML_PARSE_NONET));
39
+ /* 65536: Compact small text nodes. */
40
+ rb_define_const(mXMLHtmlParserOptions, "COMPACT", INT2NUM(HTML_PARSE_COMPACT));
41
+ #endif
42
+ #if LIBXML_VERSION >= 20707
43
+ /* 8192: Do not add implied html/body... elements */
44
+ rb_define_const(mXMLHtmlParserOptions, "NOIMPLIED", INT2NUM(HTML_PARSE_NOIMPLIED));
45
+ #endif
46
+ }
@@ -0,0 +1,10 @@
1
+ /* Please see the LICENSE file for copyright and distribution information */
2
+
3
+ #ifndef __RXML_HTML_PARSER_OPTIONS__
4
+ #define __RXML_HTML_PARSER_OPTIONS__
5
+
6
+ extern VALUE mXMLHtmlParserOptions;
7
+
8
+ void rxml_init_html_parser_options(void);
9
+
10
+ #endif
@@ -0,0 +1,191 @@
1
+ /* Author: Martin Povolny (xpovolny@fi.muni.cz) */
2
+
3
+ #include "ruby_libxml.h"
4
+ #include "ruby_xml_input_cbg.h"
5
+
6
+ /* Document-class: LibXML::XML::InputCallbacks
7
+ *
8
+ * Support for adding custom scheme handlers. */
9
+
10
+ static ic_scheme *first_scheme = 0;
11
+
12
+ int ic_match(char const *filename)
13
+ {
14
+ ic_scheme *scheme;
15
+
16
+ //fprintf( stderr, "ic_match: %s\n", filename );
17
+
18
+ scheme = first_scheme;
19
+ while (0 != scheme)
20
+ {
21
+ if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST scheme->scheme_name, scheme->name_len))
22
+ {
23
+ return 1;
24
+ }
25
+ scheme = scheme->next_scheme;
26
+ }
27
+ return 0;
28
+ }
29
+
30
+ void* ic_open(char const *filename)
31
+ {
32
+ ic_doc_context *ic_doc;
33
+ ic_scheme *scheme;
34
+ VALUE res;
35
+
36
+ scheme = first_scheme;
37
+ while (0 != scheme)
38
+ {
39
+ if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST scheme->scheme_name, scheme->name_len))
40
+ {
41
+ ic_doc = (ic_doc_context*) malloc(sizeof(ic_doc_context));
42
+
43
+ res = rb_funcall(scheme->class, rb_intern("document_query"), 1,
44
+ rb_str_new2(filename));
45
+
46
+ ic_doc->buffer = strdup(StringValuePtr(res));
47
+
48
+ ic_doc->bpos = ic_doc->buffer;
49
+ ic_doc->remaining = (int)strlen(ic_doc->buffer);
50
+ return ic_doc;
51
+ }
52
+ scheme = scheme->next_scheme;
53
+ }
54
+ return 0;
55
+ }
56
+
57
+ int ic_read(void *context, char *buffer, int len)
58
+ {
59
+ ic_doc_context *ic_doc;
60
+ int ret_len;
61
+ ic_doc = (ic_doc_context*) context;
62
+
63
+ if (len >= ic_doc->remaining)
64
+ {
65
+ ret_len = ic_doc->remaining;
66
+ }
67
+ else
68
+ {
69
+ ret_len = len;
70
+ }
71
+ ic_doc->remaining -= ret_len;
72
+ strncpy(buffer, ic_doc->bpos, ret_len);
73
+ ic_doc->bpos += ret_len;
74
+
75
+ return ret_len;
76
+ }
77
+
78
+ int ic_close(void *context)
79
+ {
80
+ ruby_xfree(((ic_doc_context*) context)->buffer);
81
+ ruby_xfree(context);
82
+ return 1;
83
+ }
84
+
85
+ /*
86
+ * call-seq:
87
+ * register
88
+ *
89
+ * Register a new set of I/O callback for handling parser input.
90
+ */
91
+ static VALUE input_callbacks_register_input_callbacks()
92
+ {
93
+ xmlRegisterInputCallbacks(ic_match, ic_open, ic_read, ic_close);
94
+ return (Qtrue);
95
+ }
96
+
97
+ /*
98
+ * call-seq:
99
+ * add_scheme
100
+ *
101
+ * No documentation available.
102
+ */
103
+ static VALUE input_callbacks_add_scheme(VALUE self, VALUE scheme_name,
104
+ VALUE class)
105
+ {
106
+ ic_scheme *scheme;
107
+
108
+ Check_Type(scheme_name, T_STRING);
109
+
110
+ scheme = (ic_scheme*) malloc(sizeof(ic_scheme));
111
+ scheme->next_scheme = 0;
112
+ scheme->scheme_name = strdup(StringValuePtr(scheme_name)); /* TODO alloc, dealloc */
113
+ scheme->name_len = (int)strlen(scheme->scheme_name);
114
+ scheme->class = class; /* TODO alloc, dealloc */
115
+
116
+ //fprintf( stderr, "registered: %s, %d, %s\n", scheme->scheme_name, scheme->name_len, scheme->class );
117
+
118
+ if (0 == first_scheme)
119
+ first_scheme = scheme;
120
+ else
121
+ {
122
+ ic_scheme *pos;
123
+ pos = first_scheme;
124
+ while (0 != pos->next_scheme)
125
+ pos = pos->next_scheme;
126
+ pos->next_scheme = scheme;
127
+ }
128
+
129
+ return (Qtrue);
130
+ }
131
+
132
+ /*
133
+ * call-seq:
134
+ * remove_scheme
135
+ *
136
+ * No documentation available.
137
+ */
138
+ static VALUE input_callbacks_remove_scheme(VALUE self, VALUE scheme_name)
139
+ {
140
+ char *name;
141
+ ic_scheme *save_scheme, *scheme;
142
+
143
+ Check_Type(scheme_name, T_STRING);
144
+ name = StringValuePtr(scheme_name);
145
+
146
+ if (0 == first_scheme)
147
+ return Qfalse;
148
+
149
+ if (!strncmp(name, first_scheme->scheme_name, first_scheme->name_len))
150
+ {
151
+ save_scheme = first_scheme->next_scheme;
152
+
153
+ ruby_xfree(first_scheme->scheme_name);
154
+ ruby_xfree(first_scheme);
155
+
156
+ first_scheme = save_scheme;
157
+ return Qtrue;
158
+ }
159
+
160
+ scheme = first_scheme;
161
+ while (0 != scheme->next_scheme)
162
+ {
163
+ if (!strncmp(name, scheme->next_scheme->scheme_name,
164
+ scheme->next_scheme->name_len))
165
+ {
166
+ save_scheme = scheme->next_scheme->next_scheme;
167
+
168
+ ruby_xfree(scheme->next_scheme->scheme_name);
169
+ ruby_xfree(scheme->next_scheme);
170
+
171
+ scheme->next_scheme = save_scheme;
172
+ return Qtrue;
173
+ }
174
+ scheme = scheme->next_scheme;
175
+ }
176
+ return Qfalse;
177
+ }
178
+
179
+ void rxml_init_input_callbacks(void)
180
+ {
181
+ VALUE cInputCallbacks;
182
+ cInputCallbacks = rb_define_class_under(mXML, "InputCallbacks", rb_cObject);
183
+
184
+ /* Class Methods */
185
+ rb_define_singleton_method(cInputCallbacks, "register",
186
+ input_callbacks_register_input_callbacks, 0);
187
+ rb_define_singleton_method(cInputCallbacks, "add_scheme",
188
+ input_callbacks_add_scheme, 2);
189
+ rb_define_singleton_method(cInputCallbacks, "remove_scheme",
190
+ input_callbacks_remove_scheme, 1);
191
+ }