nokogiri 1.11.0.rc4-arm64-darwin

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (218) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE-DEPENDENCIES.md +1682 -0
  3. data/LICENSE.md +9 -0
  4. data/README.md +200 -0
  5. data/bin/nokogiri +118 -0
  6. data/dependencies.yml +74 -0
  7. data/ext/nokogiri/depend +477 -0
  8. data/ext/nokogiri/extconf.rb +819 -0
  9. data/ext/nokogiri/html_document.c +171 -0
  10. data/ext/nokogiri/html_document.h +10 -0
  11. data/ext/nokogiri/html_element_description.c +279 -0
  12. data/ext/nokogiri/html_element_description.h +10 -0
  13. data/ext/nokogiri/html_entity_lookup.c +32 -0
  14. data/ext/nokogiri/html_entity_lookup.h +8 -0
  15. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  16. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  17. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  18. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  19. data/ext/nokogiri/include/libexslt/exslt.h +102 -0
  20. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  21. data/ext/nokogiri/include/libexslt/exsltexports.h +140 -0
  22. data/ext/nokogiri/include/libxml2/libxml/DOCBparser.h +96 -0
  23. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
  24. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  25. data/ext/nokogiri/include/libxml2/libxml/SAX.h +173 -0
  26. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +178 -0
  27. data/ext/nokogiri/include/libxml2/libxml/c14n.h +126 -0
  28. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  29. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  30. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  31. data/ext/nokogiri/include/libxml2/libxml/dict.h +79 -0
  32. data/ext/nokogiri/include/libxml2/libxml/encoding.h +245 -0
  33. data/ext/nokogiri/include/libxml2/libxml/entities.h +151 -0
  34. data/ext/nokogiri/include/libxml2/libxml/globals.h +508 -0
  35. data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
  36. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  37. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +163 -0
  38. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
  39. data/ext/nokogiri/include/libxml2/libxml/parser.h +1241 -0
  40. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +644 -0
  41. data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
  42. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +217 -0
  43. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
  44. data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
  45. data/ext/nokogiri/include/libxml2/libxml/threads.h +89 -0
  46. data/ext/nokogiri/include/libxml2/libxml/tree.h +1311 -0
  47. data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
  48. data/ext/nokogiri/include/libxml2/libxml/valid.h +458 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +366 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +945 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +153 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +224 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +151 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +485 -0
  65. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
  66. data/ext/nokogiri/include/libxml2/libxml/xpath.h +566 -0
  67. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
  68. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +114 -0
  69. data/ext/nokogiri/include/libxslt/attributes.h +38 -0
  70. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  71. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  72. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  73. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  74. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  75. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  76. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  77. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  78. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  79. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  80. data/ext/nokogiri/include/libxslt/security.h +104 -0
  81. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  82. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  83. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  84. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  85. data/ext/nokogiri/include/libxslt/xsltInternals.h +1978 -0
  86. data/ext/nokogiri/include/libxslt/xsltconfig.h +180 -0
  87. data/ext/nokogiri/include/libxslt/xsltexports.h +142 -0
  88. data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
  89. data/ext/nokogiri/include/libxslt/xsltutils.h +313 -0
  90. data/ext/nokogiri/nokogiri.c +135 -0
  91. data/ext/nokogiri/nokogiri.h +130 -0
  92. data/ext/nokogiri/xml_attr.c +103 -0
  93. data/ext/nokogiri/xml_attr.h +9 -0
  94. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  95. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  96. data/ext/nokogiri/xml_cdata.c +62 -0
  97. data/ext/nokogiri/xml_cdata.h +9 -0
  98. data/ext/nokogiri/xml_comment.c +69 -0
  99. data/ext/nokogiri/xml_comment.h +9 -0
  100. data/ext/nokogiri/xml_document.c +622 -0
  101. data/ext/nokogiri/xml_document.h +23 -0
  102. data/ext/nokogiri/xml_document_fragment.c +48 -0
  103. data/ext/nokogiri/xml_document_fragment.h +10 -0
  104. data/ext/nokogiri/xml_dtd.c +202 -0
  105. data/ext/nokogiri/xml_dtd.h +10 -0
  106. data/ext/nokogiri/xml_element_content.c +123 -0
  107. data/ext/nokogiri/xml_element_content.h +10 -0
  108. data/ext/nokogiri/xml_element_decl.c +69 -0
  109. data/ext/nokogiri/xml_element_decl.h +9 -0
  110. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  111. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  112. data/ext/nokogiri/xml_entity_decl.c +110 -0
  113. data/ext/nokogiri/xml_entity_decl.h +10 -0
  114. data/ext/nokogiri/xml_entity_reference.c +52 -0
  115. data/ext/nokogiri/xml_entity_reference.h +9 -0
  116. data/ext/nokogiri/xml_io.c +63 -0
  117. data/ext/nokogiri/xml_io.h +11 -0
  118. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  119. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  120. data/ext/nokogiri/xml_namespace.c +111 -0
  121. data/ext/nokogiri/xml_namespace.h +14 -0
  122. data/ext/nokogiri/xml_node.c +1773 -0
  123. data/ext/nokogiri/xml_node.h +13 -0
  124. data/ext/nokogiri/xml_node_set.c +486 -0
  125. data/ext/nokogiri/xml_node_set.h +12 -0
  126. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  127. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  128. data/ext/nokogiri/xml_reader.c +657 -0
  129. data/ext/nokogiri/xml_reader.h +10 -0
  130. data/ext/nokogiri/xml_relax_ng.c +179 -0
  131. data/ext/nokogiri/xml_relax_ng.h +9 -0
  132. data/ext/nokogiri/xml_sax_parser.c +305 -0
  133. data/ext/nokogiri/xml_sax_parser.h +39 -0
  134. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  135. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  136. data/ext/nokogiri/xml_sax_push_parser.c +159 -0
  137. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  138. data/ext/nokogiri/xml_schema.c +276 -0
  139. data/ext/nokogiri/xml_schema.h +9 -0
  140. data/ext/nokogiri/xml_syntax_error.c +64 -0
  141. data/ext/nokogiri/xml_syntax_error.h +13 -0
  142. data/ext/nokogiri/xml_text.c +52 -0
  143. data/ext/nokogiri/xml_text.h +9 -0
  144. data/ext/nokogiri/xml_xpath_context.c +374 -0
  145. data/ext/nokogiri/xml_xpath_context.h +10 -0
  146. data/ext/nokogiri/xslt_stylesheet.c +263 -0
  147. data/ext/nokogiri/xslt_stylesheet.h +14 -0
  148. data/lib/nokogiri.rb +127 -0
  149. data/lib/nokogiri/2.5/nokogiri.bundle +0 -0
  150. data/lib/nokogiri/2.6/nokogiri.bundle +0 -0
  151. data/lib/nokogiri/2.7/nokogiri.bundle +0 -0
  152. data/lib/nokogiri/3.0/nokogiri.bundle +0 -0
  153. data/lib/nokogiri/css.rb +28 -0
  154. data/lib/nokogiri/css/node.rb +53 -0
  155. data/lib/nokogiri/css/parser.rb +751 -0
  156. data/lib/nokogiri/css/parser.y +272 -0
  157. data/lib/nokogiri/css/parser_extras.rb +94 -0
  158. data/lib/nokogiri/css/syntax_error.rb +8 -0
  159. data/lib/nokogiri/css/tokenizer.rb +154 -0
  160. data/lib/nokogiri/css/tokenizer.rex +55 -0
  161. data/lib/nokogiri/css/xpath_visitor.rb +260 -0
  162. data/lib/nokogiri/decorators/slop.rb +43 -0
  163. data/lib/nokogiri/html.rb +38 -0
  164. data/lib/nokogiri/html/builder.rb +36 -0
  165. data/lib/nokogiri/html/document.rb +322 -0
  166. data/lib/nokogiri/html/document_fragment.rb +50 -0
  167. data/lib/nokogiri/html/element_description.rb +24 -0
  168. data/lib/nokogiri/html/element_description_defaults.rb +672 -0
  169. data/lib/nokogiri/html/entity_lookup.rb +14 -0
  170. data/lib/nokogiri/html/sax/parser.rb +63 -0
  171. data/lib/nokogiri/html/sax/parser_context.rb +17 -0
  172. data/lib/nokogiri/html/sax/push_parser.rb +37 -0
  173. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  174. data/lib/nokogiri/syntax_error.rb +5 -0
  175. data/lib/nokogiri/version.rb +3 -0
  176. data/lib/nokogiri/version/constant.rb +5 -0
  177. data/lib/nokogiri/version/info.rb +182 -0
  178. data/lib/nokogiri/xml.rb +76 -0
  179. data/lib/nokogiri/xml/attr.rb +15 -0
  180. data/lib/nokogiri/xml/attribute_decl.rb +19 -0
  181. data/lib/nokogiri/xml/builder.rb +447 -0
  182. data/lib/nokogiri/xml/cdata.rb +12 -0
  183. data/lib/nokogiri/xml/character_data.rb +8 -0
  184. data/lib/nokogiri/xml/document.rb +290 -0
  185. data/lib/nokogiri/xml/document_fragment.rb +159 -0
  186. data/lib/nokogiri/xml/dtd.rb +33 -0
  187. data/lib/nokogiri/xml/element_content.rb +37 -0
  188. data/lib/nokogiri/xml/element_decl.rb +14 -0
  189. data/lib/nokogiri/xml/entity_decl.rb +20 -0
  190. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  191. data/lib/nokogiri/xml/namespace.rb +14 -0
  192. data/lib/nokogiri/xml/node.rb +1240 -0
  193. data/lib/nokogiri/xml/node/save_options.rb +62 -0
  194. data/lib/nokogiri/xml/node_set.rb +372 -0
  195. data/lib/nokogiri/xml/notation.rb +7 -0
  196. data/lib/nokogiri/xml/parse_options.rb +127 -0
  197. data/lib/nokogiri/xml/pp.rb +3 -0
  198. data/lib/nokogiri/xml/pp/character_data.rb +19 -0
  199. data/lib/nokogiri/xml/pp/node.rb +57 -0
  200. data/lib/nokogiri/xml/processing_instruction.rb +9 -0
  201. data/lib/nokogiri/xml/reader.rb +116 -0
  202. data/lib/nokogiri/xml/relax_ng.rb +37 -0
  203. data/lib/nokogiri/xml/sax.rb +5 -0
  204. data/lib/nokogiri/xml/sax/document.rb +172 -0
  205. data/lib/nokogiri/xml/sax/parser.rb +123 -0
  206. data/lib/nokogiri/xml/sax/parser_context.rb +17 -0
  207. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  208. data/lib/nokogiri/xml/schema.rb +72 -0
  209. data/lib/nokogiri/xml/searchable.rb +239 -0
  210. data/lib/nokogiri/xml/syntax_error.rb +71 -0
  211. data/lib/nokogiri/xml/text.rb +10 -0
  212. data/lib/nokogiri/xml/xpath.rb +11 -0
  213. data/lib/nokogiri/xml/xpath/syntax_error.rb +12 -0
  214. data/lib/nokogiri/xml/xpath_context.rb +17 -0
  215. data/lib/nokogiri/xslt.rb +57 -0
  216. data/lib/nokogiri/xslt/stylesheet.rb +26 -0
  217. data/lib/xsd/xmlparser/nokogiri.rb +103 -0
  218. metadata +565 -0
@@ -0,0 +1,10 @@
1
+ #ifndef NOKOGIRI_XML_READER
2
+ #define NOKOGIRI_XML_READER
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_reader();
7
+
8
+ extern VALUE cNokogiriXmlReader;
9
+
10
+ #endif
@@ -0,0 +1,179 @@
1
+ #include <xml_relax_ng.h>
2
+
3
+ static void dealloc(xmlRelaxNGPtr schema)
4
+ {
5
+ NOKOGIRI_DEBUG_START(schema);
6
+ xmlRelaxNGFree(schema);
7
+ NOKOGIRI_DEBUG_END(schema);
8
+ }
9
+
10
+ /*
11
+ * call-seq:
12
+ * validate_document(document)
13
+ *
14
+ * Validate a Nokogiri::XML::Document against this RelaxNG schema.
15
+ */
16
+ static VALUE validate_document(VALUE self, VALUE document)
17
+ {
18
+ xmlDocPtr doc;
19
+ xmlRelaxNGPtr schema;
20
+ VALUE errors;
21
+ xmlRelaxNGValidCtxtPtr valid_ctxt;
22
+
23
+ Data_Get_Struct(self, xmlRelaxNG, schema);
24
+ Data_Get_Struct(document, xmlDoc, doc);
25
+
26
+ errors = rb_ary_new();
27
+
28
+ valid_ctxt = xmlRelaxNGNewValidCtxt(schema);
29
+
30
+ if(NULL == valid_ctxt) {
31
+ /* we have a problem */
32
+ rb_raise(rb_eRuntimeError, "Could not create a validation context");
33
+ }
34
+
35
+ #ifdef HAVE_XMLRELAXNGSETVALIDSTRUCTUREDERRORS
36
+ xmlRelaxNGSetValidStructuredErrors(
37
+ valid_ctxt,
38
+ Nokogiri_error_array_pusher,
39
+ (void *)errors
40
+ );
41
+ #endif
42
+
43
+ xmlRelaxNGValidateDoc(valid_ctxt, doc);
44
+
45
+ xmlRelaxNGFreeValidCtxt(valid_ctxt);
46
+
47
+ return errors;
48
+ }
49
+
50
+ /*
51
+ * call-seq:
52
+ * read_memory(string)
53
+ *
54
+ * Create a new RelaxNG from the contents of +string+
55
+ */
56
+ static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
57
+ {
58
+ VALUE content;
59
+ VALUE parse_options;
60
+ xmlRelaxNGParserCtxtPtr ctx;
61
+ xmlRelaxNGPtr schema;
62
+ VALUE errors;
63
+ VALUE rb_schema;
64
+ int scanned_args = 0;
65
+
66
+ scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
67
+ if (scanned_args == 1) {
68
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
69
+ }
70
+
71
+ ctx = xmlRelaxNGNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
72
+
73
+ errors = rb_ary_new();
74
+ xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
75
+
76
+ #ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
77
+ xmlRelaxNGSetParserStructuredErrors(
78
+ ctx,
79
+ Nokogiri_error_array_pusher,
80
+ (void *)errors
81
+ );
82
+ #endif
83
+
84
+ schema = xmlRelaxNGParse(ctx);
85
+
86
+ xmlSetStructuredErrorFunc(NULL, NULL);
87
+ xmlRelaxNGFreeParserCtxt(ctx);
88
+
89
+ if(NULL == schema) {
90
+ xmlErrorPtr error = xmlGetLastError();
91
+ if(error)
92
+ Nokogiri_error_raise(NULL, error);
93
+ else
94
+ rb_raise(rb_eRuntimeError, "Could not parse document");
95
+
96
+ return Qnil;
97
+ }
98
+
99
+ rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
100
+ rb_iv_set(rb_schema, "@errors", errors);
101
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
102
+
103
+ return rb_schema;
104
+ }
105
+
106
+ /*
107
+ * call-seq:
108
+ * from_document(doc)
109
+ *
110
+ * Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
111
+ */
112
+ static VALUE from_document(int argc, VALUE *argv, VALUE klass)
113
+ {
114
+ VALUE document;
115
+ VALUE parse_options;
116
+ xmlDocPtr doc;
117
+ xmlRelaxNGParserCtxtPtr ctx;
118
+ xmlRelaxNGPtr schema;
119
+ VALUE errors;
120
+ VALUE rb_schema;
121
+ int scanned_args = 0;
122
+
123
+ scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
124
+
125
+ Data_Get_Struct(document, xmlDoc, doc);
126
+ doc = doc->doc; /* In case someone passes us a node. ugh. */
127
+
128
+ if (scanned_args == 1) {
129
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
130
+ }
131
+
132
+ ctx = xmlRelaxNGNewDocParserCtxt(doc);
133
+
134
+ errors = rb_ary_new();
135
+ xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
136
+
137
+ #ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
138
+ xmlRelaxNGSetParserStructuredErrors(
139
+ ctx,
140
+ Nokogiri_error_array_pusher,
141
+ (void *)errors
142
+ );
143
+ #endif
144
+
145
+ schema = xmlRelaxNGParse(ctx);
146
+
147
+ xmlSetStructuredErrorFunc(NULL, NULL);
148
+ xmlRelaxNGFreeParserCtxt(ctx);
149
+
150
+ if(NULL == schema) {
151
+ xmlErrorPtr error = xmlGetLastError();
152
+ if(error)
153
+ Nokogiri_error_raise(NULL, error);
154
+ else
155
+ rb_raise(rb_eRuntimeError, "Could not parse document");
156
+
157
+ return Qnil;
158
+ }
159
+
160
+ rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
161
+ rb_iv_set(rb_schema, "@errors", errors);
162
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
163
+
164
+ return rb_schema;
165
+ }
166
+
167
+ VALUE cNokogiriXmlRelaxNG;
168
+ void init_xml_relax_ng()
169
+ {
170
+ VALUE nokogiri = rb_define_module("Nokogiri");
171
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
172
+ VALUE klass = rb_define_class_under(xml, "RelaxNG", cNokogiriXmlSchema);
173
+
174
+ cNokogiriXmlRelaxNG = klass;
175
+
176
+ rb_define_singleton_method(klass, "read_memory", read_memory, -1);
177
+ rb_define_singleton_method(klass, "from_document", from_document, -1);
178
+ rb_define_private_method(klass, "validate_document", validate_document, 1);
179
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_RELAX_NG
2
+ #define NOKOGIRI_XML_RELAX_NG
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_relax_ng();
7
+
8
+ extern VALUE cNokogiriXmlRelaxNG;
9
+ #endif
@@ -0,0 +1,305 @@
1
+ #include <xml_sax_parser.h>
2
+
3
+ static ID id_start_document, id_end_document, id_start_element, id_end_element;
4
+ static ID id_start_element_namespace, id_end_element_namespace;
5
+ static ID id_comment, id_characters, id_xmldecl, id_error, id_warning;
6
+ static ID id_cdata_block, id_cAttribute;
7
+ static ID id_processing_instruction;
8
+
9
+ static void start_document(void * ctx)
10
+ {
11
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
12
+ VALUE doc = rb_iv_get(self, "@document");
13
+
14
+ xmlParserCtxtPtr ctxt = NOKOGIRI_SAX_CTXT(ctx);
15
+
16
+ if(NULL != ctxt && ctxt->html != 1) {
17
+ if(ctxt->standalone != -1) { /* -1 means there was no declaration */
18
+ VALUE encoding = Qnil ;
19
+ VALUE standalone = Qnil;
20
+ VALUE version;
21
+ if (ctxt->encoding) {
22
+ encoding = NOKOGIRI_STR_NEW2(ctxt->encoding) ;
23
+ } else if (ctxt->input && ctxt->input->encoding) {
24
+ encoding = NOKOGIRI_STR_NEW2(ctxt->input->encoding) ;
25
+ }
26
+
27
+ version = ctxt->version ? NOKOGIRI_STR_NEW2(ctxt->version) : Qnil;
28
+
29
+ switch(ctxt->standalone)
30
+ {
31
+ case 0:
32
+ standalone = NOKOGIRI_STR_NEW2("no");
33
+ break;
34
+ case 1:
35
+ standalone = NOKOGIRI_STR_NEW2("yes");
36
+ break;
37
+ }
38
+
39
+ rb_funcall(doc, id_xmldecl, 3, version, encoding, standalone);
40
+ }
41
+ }
42
+
43
+ rb_funcall(doc, id_start_document, 0);
44
+ }
45
+
46
+ static void end_document(void * ctx)
47
+ {
48
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
49
+ VALUE doc = rb_iv_get(self, "@document");
50
+ rb_funcall(doc, id_end_document, 0);
51
+ }
52
+
53
+ static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts)
54
+ {
55
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
56
+ VALUE doc = rb_iv_get(self, "@document");
57
+ VALUE attributes = rb_ary_new();
58
+ const xmlChar * attr;
59
+ int i = 0;
60
+ if(atts) {
61
+ while((attr = atts[i]) != NULL) {
62
+ const xmlChar * val = atts[i+1];
63
+ VALUE value = val != NULL ? NOKOGIRI_STR_NEW2(val) : Qnil;
64
+ rb_ary_push(attributes, rb_ary_new3(2, NOKOGIRI_STR_NEW2(attr), value));
65
+ i+=2;
66
+ }
67
+ }
68
+
69
+ rb_funcall( doc,
70
+ id_start_element,
71
+ 2,
72
+ NOKOGIRI_STR_NEW2(name),
73
+ attributes
74
+ );
75
+ }
76
+
77
+ static void end_element(void * ctx, const xmlChar *name)
78
+ {
79
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
80
+ VALUE doc = rb_iv_get(self, "@document");
81
+ rb_funcall(doc, id_end_element, 1, NOKOGIRI_STR_NEW2(name));
82
+ }
83
+
84
+ static VALUE attributes_as_list(
85
+ VALUE self,
86
+ int nb_attributes,
87
+ const xmlChar ** attributes)
88
+ {
89
+ VALUE list = rb_ary_new2((long)nb_attributes);
90
+
91
+ VALUE attr_klass = rb_const_get(cNokogiriXmlSaxParser, id_cAttribute);
92
+ if (attributes) {
93
+ /* Each attribute is an array of [localname, prefix, URI, value, end] */
94
+ int i;
95
+ for (i = 0; i < nb_attributes * 5; i += 5) {
96
+ VALUE argv[4], attribute;
97
+
98
+ argv[0] = RBSTR_OR_QNIL(attributes[i + 0]); /* localname */
99
+ argv[1] = RBSTR_OR_QNIL(attributes[i + 1]); /* prefix */
100
+ argv[2] = RBSTR_OR_QNIL(attributes[i + 2]); /* URI */
101
+
102
+ /* value */
103
+ argv[3] = NOKOGIRI_STR_NEW((const char*)attributes[i+3],
104
+ (attributes[i+4] - attributes[i+3]));
105
+
106
+ attribute = rb_class_new_instance(4, argv, attr_klass);
107
+ rb_ary_push(list, attribute);
108
+ }
109
+ }
110
+
111
+ return list;
112
+ }
113
+
114
+ static void
115
+ start_element_ns (
116
+ void * ctx,
117
+ const xmlChar * localname,
118
+ const xmlChar * prefix,
119
+ const xmlChar * uri,
120
+ int nb_namespaces,
121
+ const xmlChar ** namespaces,
122
+ int nb_attributes,
123
+ int nb_defaulted,
124
+ const xmlChar ** attributes)
125
+ {
126
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
127
+ VALUE doc = rb_iv_get(self, "@document");
128
+
129
+ VALUE attribute_list = attributes_as_list(self, nb_attributes, attributes);
130
+
131
+ VALUE ns_list = rb_ary_new2((long)nb_namespaces);
132
+
133
+ if (namespaces) {
134
+ int i;
135
+ for (i = 0; i < nb_namespaces * 2; i += 2)
136
+ {
137
+ rb_ary_push(ns_list,
138
+ rb_ary_new3((long)2,
139
+ RBSTR_OR_QNIL(namespaces[i + 0]),
140
+ RBSTR_OR_QNIL(namespaces[i + 1])
141
+ )
142
+ );
143
+ }
144
+ }
145
+
146
+ rb_funcall( doc,
147
+ id_start_element_namespace,
148
+ 5,
149
+ NOKOGIRI_STR_NEW2(localname),
150
+ attribute_list,
151
+ RBSTR_OR_QNIL(prefix),
152
+ RBSTR_OR_QNIL(uri),
153
+ ns_list
154
+ );
155
+ }
156
+
157
+ /**
158
+ * end_element_ns was borrowed heavily from libxml-ruby.
159
+ */
160
+ static void
161
+ end_element_ns (
162
+ void * ctx,
163
+ const xmlChar * localname,
164
+ const xmlChar * prefix,
165
+ const xmlChar * uri)
166
+ {
167
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
168
+ VALUE doc = rb_iv_get(self, "@document");
169
+
170
+ rb_funcall(doc, id_end_element_namespace, 3,
171
+ NOKOGIRI_STR_NEW2(localname),
172
+ RBSTR_OR_QNIL(prefix),
173
+ RBSTR_OR_QNIL(uri)
174
+ );
175
+ }
176
+
177
+ static void characters_func(void * ctx, const xmlChar * ch, int len)
178
+ {
179
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
180
+ VALUE doc = rb_iv_get(self, "@document");
181
+ VALUE str = NOKOGIRI_STR_NEW(ch, len);
182
+ rb_funcall(doc, id_characters, 1, str);
183
+ }
184
+
185
+ static void comment_func(void * ctx, const xmlChar * value)
186
+ {
187
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
188
+ VALUE doc = rb_iv_get(self, "@document");
189
+ VALUE str = NOKOGIRI_STR_NEW2(value);
190
+ rb_funcall(doc, id_comment, 1, str);
191
+ }
192
+
193
+ static void warning_func(void * ctx, const char *msg, ...)
194
+ {
195
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
196
+ VALUE doc = rb_iv_get(self, "@document");
197
+ char * message;
198
+ VALUE ruby_message;
199
+
200
+ va_list args;
201
+ va_start(args, msg);
202
+ vasprintf(&message, msg, args);
203
+ va_end(args);
204
+
205
+ ruby_message = NOKOGIRI_STR_NEW2(message);
206
+ free(message);
207
+ rb_funcall(doc, id_warning, 1, ruby_message);
208
+ }
209
+
210
+ static void error_func(void * ctx, const char *msg, ...)
211
+ {
212
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
213
+ VALUE doc = rb_iv_get(self, "@document");
214
+ char * message;
215
+ VALUE ruby_message;
216
+
217
+ va_list args;
218
+ va_start(args, msg);
219
+ vasprintf(&message, msg, args);
220
+ va_end(args);
221
+
222
+ ruby_message = NOKOGIRI_STR_NEW2(message);
223
+ free(message);
224
+ rb_funcall(doc, id_error, 1, ruby_message);
225
+ }
226
+
227
+ static void cdata_block(void * ctx, const xmlChar * value, int len)
228
+ {
229
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
230
+ VALUE doc = rb_iv_get(self, "@document");
231
+ VALUE string = NOKOGIRI_STR_NEW(value, len);
232
+ rb_funcall(doc, id_cdata_block, 1, string);
233
+ }
234
+
235
+ static void processing_instruction(void * ctx, const xmlChar * name, const xmlChar * content)
236
+ {
237
+ VALUE rb_content;
238
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
239
+ VALUE doc = rb_iv_get(self, "@document");
240
+
241
+ rb_content = content ? NOKOGIRI_STR_NEW2(content) : Qnil;
242
+
243
+ rb_funcall( doc,
244
+ id_processing_instruction,
245
+ 2,
246
+ NOKOGIRI_STR_NEW2(name),
247
+ rb_content
248
+ );
249
+ }
250
+
251
+ static void deallocate(xmlSAXHandlerPtr handler)
252
+ {
253
+ NOKOGIRI_DEBUG_START(handler);
254
+ free(handler);
255
+ NOKOGIRI_DEBUG_END(handler);
256
+ }
257
+
258
+ static VALUE allocate(VALUE klass)
259
+ {
260
+ xmlSAXHandlerPtr handler = calloc((size_t)1, sizeof(xmlSAXHandler));
261
+
262
+ handler->startDocument = start_document;
263
+ handler->endDocument = end_document;
264
+ handler->startElement = start_element;
265
+ handler->endElement = end_element;
266
+ handler->startElementNs = start_element_ns;
267
+ handler->endElementNs = end_element_ns;
268
+ handler->characters = characters_func;
269
+ handler->comment = comment_func;
270
+ handler->warning = warning_func;
271
+ handler->error = error_func;
272
+ handler->cdataBlock = cdata_block;
273
+ handler->processingInstruction = processing_instruction;
274
+ handler->initialized = XML_SAX2_MAGIC;
275
+
276
+ return Data_Wrap_Struct(klass, NULL, deallocate, handler);
277
+ }
278
+
279
+ VALUE cNokogiriXmlSaxParser ;
280
+ void init_xml_sax_parser()
281
+ {
282
+ VALUE nokogiri = rb_define_module("Nokogiri");
283
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
284
+ VALUE sax = rb_define_module_under(xml, "SAX");
285
+ VALUE klass = rb_define_class_under(sax, "Parser", rb_cObject);
286
+
287
+ cNokogiriXmlSaxParser = klass;
288
+
289
+ rb_define_alloc_func(klass, allocate);
290
+
291
+ id_start_document = rb_intern("start_document");
292
+ id_end_document = rb_intern("end_document");
293
+ id_start_element = rb_intern("start_element");
294
+ id_end_element = rb_intern("end_element");
295
+ id_comment = rb_intern("comment");
296
+ id_characters = rb_intern("characters");
297
+ id_xmldecl = rb_intern("xmldecl");
298
+ id_error = rb_intern("error");
299
+ id_warning = rb_intern("warning");
300
+ id_cdata_block = rb_intern("cdata_block");
301
+ id_cAttribute = rb_intern("Attribute");
302
+ id_start_element_namespace = rb_intern("start_element_namespace");
303
+ id_end_element_namespace = rb_intern("end_element_namespace");
304
+ id_processing_instruction = rb_intern("processing_instruction");
305
+ }