libxml-ruby 2.9.0-x64-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (211) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY +790 -0
  3. data/LICENSE +21 -0
  4. data/MANIFEST +166 -0
  5. data/README.rdoc +184 -0
  6. data/Rakefile +81 -0
  7. data/ext/libxml/extconf.h +4 -0
  8. data/ext/libxml/extconf.rb +57 -0
  9. data/ext/libxml/libxml.c +80 -0
  10. data/ext/libxml/libxml_ruby.def +35 -0
  11. data/ext/libxml/ruby_libxml.h +75 -0
  12. data/ext/libxml/ruby_xml.c +977 -0
  13. data/ext/libxml/ruby_xml.h +20 -0
  14. data/ext/libxml/ruby_xml_attr.c +333 -0
  15. data/ext/libxml/ruby_xml_attr.h +12 -0
  16. data/ext/libxml/ruby_xml_attr_decl.c +153 -0
  17. data/ext/libxml/ruby_xml_attr_decl.h +11 -0
  18. data/ext/libxml/ruby_xml_attributes.c +275 -0
  19. data/ext/libxml/ruby_xml_attributes.h +15 -0
  20. data/ext/libxml/ruby_xml_cbg.c +85 -0
  21. data/ext/libxml/ruby_xml_document.c +1133 -0
  22. data/ext/libxml/ruby_xml_document.h +11 -0
  23. data/ext/libxml/ruby_xml_dtd.c +261 -0
  24. data/ext/libxml/ruby_xml_dtd.h +9 -0
  25. data/ext/libxml/ruby_xml_encoding.c +262 -0
  26. data/ext/libxml/ruby_xml_encoding.h +19 -0
  27. data/ext/libxml/ruby_xml_error.c +996 -0
  28. data/ext/libxml/ruby_xml_error.h +12 -0
  29. data/ext/libxml/ruby_xml_html_parser.c +92 -0
  30. data/ext/libxml/ruby_xml_html_parser.h +10 -0
  31. data/ext/libxml/ruby_xml_html_parser_context.c +337 -0
  32. data/ext/libxml/ruby_xml_html_parser_context.h +10 -0
  33. data/ext/libxml/ruby_xml_html_parser_options.c +46 -0
  34. data/ext/libxml/ruby_xml_html_parser_options.h +10 -0
  35. data/ext/libxml/ruby_xml_input_cbg.c +191 -0
  36. data/ext/libxml/ruby_xml_input_cbg.h +20 -0
  37. data/ext/libxml/ruby_xml_io.c +52 -0
  38. data/ext/libxml/ruby_xml_io.h +10 -0
  39. data/ext/libxml/ruby_xml_namespace.c +153 -0
  40. data/ext/libxml/ruby_xml_namespace.h +10 -0
  41. data/ext/libxml/ruby_xml_namespaces.c +293 -0
  42. data/ext/libxml/ruby_xml_namespaces.h +9 -0
  43. data/ext/libxml/ruby_xml_node.c +1446 -0
  44. data/ext/libxml/ruby_xml_node.h +11 -0
  45. data/ext/libxml/ruby_xml_parser.c +94 -0
  46. data/ext/libxml/ruby_xml_parser.h +12 -0
  47. data/ext/libxml/ruby_xml_parser_context.c +999 -0
  48. data/ext/libxml/ruby_xml_parser_context.h +10 -0
  49. data/ext/libxml/ruby_xml_parser_options.c +66 -0
  50. data/ext/libxml/ruby_xml_parser_options.h +12 -0
  51. data/ext/libxml/ruby_xml_reader.c +1226 -0
  52. data/ext/libxml/ruby_xml_reader.h +17 -0
  53. data/ext/libxml/ruby_xml_relaxng.c +110 -0
  54. data/ext/libxml/ruby_xml_relaxng.h +10 -0
  55. data/ext/libxml/ruby_xml_sax2_handler.c +326 -0
  56. data/ext/libxml/ruby_xml_sax2_handler.h +10 -0
  57. data/ext/libxml/ruby_xml_sax_parser.c +120 -0
  58. data/ext/libxml/ruby_xml_sax_parser.h +10 -0
  59. data/ext/libxml/ruby_xml_schema.c +300 -0
  60. data/ext/libxml/ruby_xml_schema.h +809 -0
  61. data/ext/libxml/ruby_xml_schema_attribute.c +109 -0
  62. data/ext/libxml/ruby_xml_schema_attribute.h +15 -0
  63. data/ext/libxml/ruby_xml_schema_element.c +94 -0
  64. data/ext/libxml/ruby_xml_schema_element.h +14 -0
  65. data/ext/libxml/ruby_xml_schema_facet.c +52 -0
  66. data/ext/libxml/ruby_xml_schema_facet.h +13 -0
  67. data/ext/libxml/ruby_xml_schema_type.c +259 -0
  68. data/ext/libxml/ruby_xml_schema_type.h +9 -0
  69. data/ext/libxml/ruby_xml_version.h +9 -0
  70. data/ext/libxml/ruby_xml_writer.c +1136 -0
  71. data/ext/libxml/ruby_xml_writer.h +10 -0
  72. data/ext/libxml/ruby_xml_xinclude.c +16 -0
  73. data/ext/libxml/ruby_xml_xinclude.h +11 -0
  74. data/ext/libxml/ruby_xml_xpath.c +188 -0
  75. data/ext/libxml/ruby_xml_xpath.h +13 -0
  76. data/ext/libxml/ruby_xml_xpath_context.c +360 -0
  77. data/ext/libxml/ruby_xml_xpath_context.h +9 -0
  78. data/ext/libxml/ruby_xml_xpath_expression.c +81 -0
  79. data/ext/libxml/ruby_xml_xpath_expression.h +10 -0
  80. data/ext/libxml/ruby_xml_xpath_object.c +335 -0
  81. data/ext/libxml/ruby_xml_xpath_object.h +17 -0
  82. data/ext/libxml/ruby_xml_xpointer.c +99 -0
  83. data/ext/libxml/ruby_xml_xpointer.h +11 -0
  84. data/ext/vc/libxml_ruby.sln +26 -0
  85. data/lib/2.3/libxml_ruby.so +0 -0
  86. data/lib/libs/libiconv-2.dll +0 -0
  87. data/lib/libs/libxml2-2.dll +0 -0
  88. data/lib/libs/zlib1.dll +0 -0
  89. data/lib/libxml.rb +35 -0
  90. data/lib/libxml/attr.rb +123 -0
  91. data/lib/libxml/attr_decl.rb +80 -0
  92. data/lib/libxml/attributes.rb +14 -0
  93. data/lib/libxml/document.rb +194 -0
  94. data/lib/libxml/error.rb +95 -0
  95. data/lib/libxml/hpricot.rb +78 -0
  96. data/lib/libxml/html_parser.rb +96 -0
  97. data/lib/libxml/namespace.rb +62 -0
  98. data/lib/libxml/namespaces.rb +38 -0
  99. data/lib/libxml/node.rb +399 -0
  100. data/lib/libxml/ns.rb +22 -0
  101. data/lib/libxml/parser.rb +367 -0
  102. data/lib/libxml/properties.rb +23 -0
  103. data/lib/libxml/reader.rb +29 -0
  104. data/lib/libxml/sax_callbacks.rb +180 -0
  105. data/lib/libxml/sax_parser.rb +58 -0
  106. data/lib/libxml/schema.rb +67 -0
  107. data/lib/libxml/schema/attribute.rb +19 -0
  108. data/lib/libxml/schema/element.rb +27 -0
  109. data/lib/libxml/schema/type.rb +29 -0
  110. data/lib/libxml/tree.rb +29 -0
  111. data/lib/libxml/xpath_object.rb +16 -0
  112. data/lib/xml.rb +14 -0
  113. data/lib/xml/libxml.rb +10 -0
  114. data/libxml-ruby.gemspec +47 -0
  115. data/script/benchmark/depixelate +634 -0
  116. data/script/benchmark/hamlet.xml +9055 -0
  117. data/script/benchmark/parsecount +170 -0
  118. data/script/benchmark/sock_entries.xml +507 -0
  119. data/script/benchmark/throughput +41 -0
  120. data/script/test +6 -0
  121. data/setup.rb +1585 -0
  122. data/test/c14n/given/doc.dtd +1 -0
  123. data/test/c14n/given/example-1.xml +14 -0
  124. data/test/c14n/given/example-2.xml +11 -0
  125. data/test/c14n/given/example-3.xml +18 -0
  126. data/test/c14n/given/example-4.xml +9 -0
  127. data/test/c14n/given/example-5.xml +12 -0
  128. data/test/c14n/given/example-6.xml +2 -0
  129. data/test/c14n/given/example-7.xml +11 -0
  130. data/test/c14n/given/example-8.xml +11 -0
  131. data/test/c14n/given/example-8.xpath +10 -0
  132. data/test/c14n/given/world.txt +1 -0
  133. data/test/c14n/result/1-1-without-comments/example-1 +4 -0
  134. data/test/c14n/result/1-1-without-comments/example-2 +11 -0
  135. data/test/c14n/result/1-1-without-comments/example-3 +14 -0
  136. data/test/c14n/result/1-1-without-comments/example-4 +9 -0
  137. data/test/c14n/result/1-1-without-comments/example-5 +3 -0
  138. data/test/c14n/result/1-1-without-comments/example-6 +1 -0
  139. data/test/c14n/result/1-1-without-comments/example-7 +1 -0
  140. data/test/c14n/result/1-1-without-comments/example-8 +1 -0
  141. data/test/c14n/result/with-comments/example-1 +6 -0
  142. data/test/c14n/result/with-comments/example-2 +11 -0
  143. data/test/c14n/result/with-comments/example-3 +14 -0
  144. data/test/c14n/result/with-comments/example-4 +9 -0
  145. data/test/c14n/result/with-comments/example-5 +4 -0
  146. data/test/c14n/result/with-comments/example-6 +1 -0
  147. data/test/c14n/result/with-comments/example-7 +1 -0
  148. data/test/c14n/result/without-comments/example-1 +4 -0
  149. data/test/c14n/result/without-comments/example-2 +11 -0
  150. data/test/c14n/result/without-comments/example-3 +14 -0
  151. data/test/c14n/result/without-comments/example-4 +9 -0
  152. data/test/c14n/result/without-comments/example-5 +3 -0
  153. data/test/c14n/result/without-comments/example-6 +1 -0
  154. data/test/c14n/result/without-comments/example-7 +1 -0
  155. data/test/model/atom.xml +13 -0
  156. data/test/model/bands.iso-8859-1.xml +5 -0
  157. data/test/model/bands.utf-8.xml +5 -0
  158. data/test/model/bands.xml +5 -0
  159. data/test/model/books.xml +154 -0
  160. data/test/model/merge_bug_data.xml +58 -0
  161. data/test/model/ruby-lang.html +238 -0
  162. data/test/model/rubynet.xml +79 -0
  163. data/test/model/rubynet_project +1 -0
  164. data/test/model/shiporder.rnc +28 -0
  165. data/test/model/shiporder.rng +86 -0
  166. data/test/model/shiporder.xml +23 -0
  167. data/test/model/shiporder.xsd +40 -0
  168. data/test/model/soap.xml +27 -0
  169. data/test/model/xinclude.xml +5 -0
  170. data/test/tc_attr.rb +181 -0
  171. data/test/tc_attr_decl.rb +132 -0
  172. data/test/tc_attributes.rb +142 -0
  173. data/test/tc_canonicalize.rb +124 -0
  174. data/test/tc_deprecated_require.rb +12 -0
  175. data/test/tc_document.rb +125 -0
  176. data/test/tc_document_write.rb +195 -0
  177. data/test/tc_dtd.rb +128 -0
  178. data/test/tc_encoding.rb +126 -0
  179. data/test/tc_encoding_sax.rb +115 -0
  180. data/test/tc_error.rb +179 -0
  181. data/test/tc_html_parser.rb +161 -0
  182. data/test/tc_html_parser_context.rb +23 -0
  183. data/test/tc_namespace.rb +61 -0
  184. data/test/tc_namespaces.rb +209 -0
  185. data/test/tc_node.rb +215 -0
  186. data/test/tc_node_cdata.rb +50 -0
  187. data/test/tc_node_comment.rb +32 -0
  188. data/test/tc_node_copy.rb +41 -0
  189. data/test/tc_node_edit.rb +174 -0
  190. data/test/tc_node_pi.rb +39 -0
  191. data/test/tc_node_text.rb +70 -0
  192. data/test/tc_node_write.rb +107 -0
  193. data/test/tc_node_xlink.rb +28 -0
  194. data/test/tc_parser.rb +375 -0
  195. data/test/tc_parser_context.rb +204 -0
  196. data/test/tc_properties.rb +38 -0
  197. data/test/tc_reader.rb +399 -0
  198. data/test/tc_relaxng.rb +53 -0
  199. data/test/tc_sax_parser.rb +319 -0
  200. data/test/tc_schema.rb +161 -0
  201. data/test/tc_traversal.rb +152 -0
  202. data/test/tc_writer.rb +447 -0
  203. data/test/tc_xinclude.rb +20 -0
  204. data/test/tc_xml.rb +225 -0
  205. data/test/tc_xpath.rb +244 -0
  206. data/test/tc_xpath_context.rb +88 -0
  207. data/test/tc_xpath_expression.rb +37 -0
  208. data/test/tc_xpointer.rb +72 -0
  209. data/test/test_helper.rb +16 -0
  210. data/test/test_suite.rb +49 -0
  211. metadata +344 -0
@@ -0,0 +1,11 @@
1
+ /* Please see the LICENSE file for copyright and distribution information */
2
+
3
+ #ifndef __RXML_DOCUMENT__
4
+ #define __RXML_DOCUMENT__
5
+
6
+ extern VALUE cXMLDocument;
7
+ void rxml_init_document();
8
+ VALUE rxml_document_wrap(xmlDocPtr xnode);
9
+
10
+ typedef xmlChar * xmlCharPtr;
11
+ #endif
@@ -0,0 +1,261 @@
1
+ #include "ruby_libxml.h"
2
+ #include "ruby_xml_dtd.h"
3
+
4
+ /*
5
+ * Document-class: LibXML::XML::Dtd
6
+ *
7
+ * The XML::Dtd class is used to prepare DTD's for validation of xml
8
+ * documents.
9
+ *
10
+ * DTDs can be created from a string or a pair of public and system identifiers.
11
+ * Once a Dtd object is instantiated, an XML document can be validated by the
12
+ * XML::Document#validate method providing the XML::Dtd object as parameeter.
13
+ * The method will raise an exception if the document is
14
+ * not valid.
15
+ *
16
+ * Basic usage:
17
+ *
18
+ * # parse DTD
19
+ * dtd = XML::Dtd.new(<<EOF)
20
+ * <!ELEMENT root (item*) >
21
+ * <!ELEMENT item (#PCDATA) >
22
+ * EOF
23
+ *
24
+ * # parse xml document to be validated
25
+ * instance = XML::Document.file('instance.xml')
26
+ *
27
+ * # validate
28
+ * instance.validate(dtd)
29
+ */
30
+
31
+ VALUE cXMLDtd;
32
+
33
+ void rxml_dtd_free(xmlDtdPtr xdtd)
34
+ {
35
+ /* Clear our private pointer so that we won't reuse the
36
+ same, freed, Ruby wrapper object later.*/
37
+ rxml_unregister_dtd(xdtd);
38
+
39
+ if (xdtd->doc == NULL && xdtd->parent == NULL)
40
+ xmlFreeDtd(xdtd);
41
+ }
42
+
43
+ void rxml_dtd_mark(xmlDtdPtr xdtd)
44
+ {
45
+ VALUE doc = Qnil;
46
+
47
+ if (xdtd == NULL)
48
+ return;
49
+
50
+ doc = rxml_lookup_doc(xdtd->doc);
51
+ rb_gc_mark(doc);
52
+ }
53
+
54
+ static VALUE rxml_dtd_alloc(VALUE klass)
55
+ {
56
+ return Data_Wrap_Struct(klass, rxml_dtd_mark, rxml_dtd_free, NULL);
57
+ }
58
+
59
+ VALUE rxml_dtd_wrap(xmlDtdPtr xdtd)
60
+ {
61
+ VALUE result = rxml_lookup_dtd(xdtd);
62
+
63
+ // This node is already wrapped
64
+ if (result == Qnil) {
65
+ result = Data_Wrap_Struct(cXMLDtd, NULL, NULL, xdtd);
66
+ rxml_register_dtd(xdtd, result);
67
+ }
68
+ return result;
69
+ }
70
+
71
+ /*
72
+ * call-seq:
73
+ * dtd.external_id -> "string"
74
+ *
75
+ * Obtain this dtd's external identifer (for a PUBLIC DTD).
76
+ */
77
+ static VALUE rxml_dtd_external_id_get(VALUE self)
78
+ {
79
+ xmlDtdPtr xdtd;
80
+ Data_Get_Struct(self, xmlDtd, xdtd);
81
+
82
+
83
+ if (xdtd->ExternalID == NULL)
84
+ return (Qnil);
85
+ else
86
+ return (rxml_new_cstr( xdtd->ExternalID, NULL));
87
+ }
88
+
89
+ /*
90
+ * call-seq:
91
+ * dtd.name -> "string"
92
+ *
93
+ * Obtain this dtd's name.
94
+ */
95
+ static VALUE rxml_dtd_name_get(VALUE self)
96
+ {
97
+ xmlDtdPtr xdtd;
98
+ Data_Get_Struct(self, xmlDtd, xdtd);
99
+
100
+
101
+ if (xdtd->name == NULL)
102
+ return (Qnil);
103
+ else
104
+ return (rxml_new_cstr( xdtd->name, NULL));
105
+ }
106
+
107
+
108
+ /*
109
+ * call-seq:
110
+ * dtd.uri -> "string"
111
+ *
112
+ * Obtain this dtd's URI (for a SYSTEM or PUBLIC DTD).
113
+ */
114
+ static VALUE rxml_dtd_uri_get(VALUE self)
115
+ {
116
+ xmlDtdPtr xdtd;
117
+ Data_Get_Struct(self, xmlDtd, xdtd);
118
+
119
+ if (xdtd->SystemID == NULL)
120
+ return (Qnil);
121
+ else
122
+ return (rxml_new_cstr( xdtd->SystemID, NULL));
123
+ }
124
+
125
+ /*
126
+ * call-seq:
127
+ * node.type -> num
128
+ *
129
+ * Obtain this node's type identifier.
130
+ */
131
+ static VALUE rxml_dtd_type(VALUE self)
132
+ {
133
+ xmlDtdPtr xdtd;
134
+ Data_Get_Struct(self, xmlDtd, xdtd);
135
+ return (INT2NUM(xdtd->type));
136
+ }
137
+
138
+ /*
139
+ * call-seq:
140
+ * XML::Dtd.new("DTD string") -> dtd
141
+ * XML::Dtd.new("public", "system") -> dtd
142
+ * XML::Dtd.new("name", "public", "system", document) -> external subset dtd
143
+ * XML::Dtd.new("name", "public", "system", document, false) -> internal subset dtd
144
+ * XML::Dtd.new("name", "public", "system", document, true) -> internal subset dtd
145
+ *
146
+ * Create a new Dtd from the specified public and system
147
+ * identifiers.
148
+ */
149
+ static VALUE rxml_dtd_initialize(int argc, VALUE *argv, VALUE self)
150
+ {
151
+ VALUE external, system, dtd_string;
152
+ xmlParserInputBufferPtr buffer;
153
+ xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
154
+ xmlChar *new_string;
155
+ xmlDtdPtr xdtd;
156
+
157
+ // 1 argument -- string --> parsujeme jako dtd
158
+ // 2 arguments -- public, system --> bude se hledat
159
+ // 3 arguments -- public, system, name --> creates an external subset (any parameter may be nil)
160
+ // 4 arguments -- public, system, name, doc --> creates an external subset (any parameter may be nil)
161
+ // 5 arguments -- public, system, name, doc, true --> creates an internal subset (all but last parameter may be nil)
162
+ switch (argc)
163
+ {
164
+ case 3:
165
+ case 4:
166
+ case 5: {
167
+ VALUE name, doc, internal;
168
+ const xmlChar *xname = NULL, *xpublic = NULL, *xsystem = NULL;
169
+ xmlDocPtr xdoc = NULL;
170
+
171
+ rb_scan_args(argc, argv, "32", &external, &system, &name, &doc, &internal);
172
+
173
+ if (external != Qnil) {
174
+ Check_Type(external, T_STRING);
175
+ xpublic = (const xmlChar*) StringValuePtr(external);
176
+ }
177
+ if (system != Qnil) {
178
+ Check_Type(system, T_STRING);
179
+ xsystem = (const xmlChar*) StringValuePtr(system);
180
+ }
181
+ if (name != Qnil) {
182
+ Check_Type(name, T_STRING);
183
+ xname = (const xmlChar*) StringValuePtr(name);
184
+ }
185
+ if (doc != Qnil) {
186
+ if (rb_obj_is_kind_of(doc, cXMLDocument) == Qfalse)
187
+ rb_raise(rb_eTypeError, "Must pass an XML::Document object");
188
+ Data_Get_Struct(doc, xmlDoc, xdoc);
189
+ }
190
+
191
+ if (internal == Qnil || internal == Qfalse)
192
+ xdtd = xmlNewDtd(xdoc, xname, xpublic, xsystem);
193
+ else
194
+ xdtd = xmlCreateIntSubset(xdoc, xname, xpublic, xsystem);
195
+
196
+ if (xdtd == NULL)
197
+ rxml_raise(&xmlLastError);
198
+
199
+ /* Document will free this dtd now. */
200
+ RDATA(self)->dfree = NULL;
201
+ DATA_PTR(self) = xdtd;
202
+
203
+ xmlSetTreeDoc((xmlNodePtr) xdtd, xdoc);
204
+ }
205
+ break;
206
+
207
+ case 2:
208
+ rb_scan_args(argc, argv, "20", &external, &system);
209
+
210
+ Check_Type(external, T_STRING);
211
+ Check_Type(system, T_STRING);
212
+
213
+ xdtd = xmlParseDTD((xmlChar*) StringValuePtr(external),
214
+ (xmlChar*) StringValuePtr(system));
215
+
216
+ if (xdtd == NULL)
217
+ rxml_raise(&xmlLastError);
218
+
219
+ DATA_PTR(self) = xdtd;
220
+
221
+ xmlSetTreeDoc((xmlNodePtr) xdtd, NULL);
222
+ break;
223
+
224
+ case 1:
225
+ rb_scan_args(argc, argv, "10", &dtd_string);
226
+ Check_Type(dtd_string, T_STRING);
227
+
228
+ /* Note that buffer is freed by xmlParserInputBufferPush*/
229
+ buffer = xmlAllocParserInputBuffer(enc);
230
+ new_string = xmlStrdup((xmlChar*) StringValuePtr(dtd_string));
231
+ xmlParserInputBufferPush(buffer, xmlStrlen(new_string),
232
+ (const char*) new_string);
233
+
234
+ xdtd = xmlIOParseDTD(NULL, buffer, enc);
235
+
236
+ if (xdtd == NULL)
237
+ rxml_raise(&xmlLastError);
238
+
239
+ xmlFree(new_string);
240
+
241
+ DATA_PTR(self) = xdtd;
242
+ break;
243
+
244
+ default:
245
+ rb_raise(rb_eArgError, "wrong number of arguments");
246
+ }
247
+
248
+ return self;
249
+ }
250
+
251
+ void rxml_init_dtd()
252
+ {
253
+ cXMLDtd = rb_define_class_under(mXML, "Dtd", rb_cObject);
254
+ rb_define_alloc_func(cXMLDtd, rxml_dtd_alloc);
255
+ rb_define_method(cXMLDtd, "initialize", rxml_dtd_initialize, -1);
256
+ rb_define_method(cXMLDtd, "external_id", rxml_dtd_external_id_get, 0);
257
+ rb_define_method(cXMLDtd, "name", rxml_dtd_name_get, 0);
258
+ rb_define_method(cXMLDtd, "uri", rxml_dtd_uri_get, 0);
259
+ rb_define_method(cXMLDtd, "node_type", rxml_dtd_type, 0);
260
+ rb_define_alias(cXMLDtd, "system_id", "uri");
261
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef __RXML_DTD__
2
+ #define __RXML_DTD__
3
+
4
+ extern VALUE cXMLDtd;
5
+
6
+ void rxml_init_dtd(void);
7
+ VALUE rxml_dtd_wrap(xmlDtdPtr xdtd);
8
+
9
+ #endif
@@ -0,0 +1,262 @@
1
+ /* Please see the LICENSE file for copyright and distribution information */
2
+
3
+ #include <stdarg.h>
4
+ #include "ruby_libxml.h"
5
+
6
+ /*
7
+ * Document-class: LibXML::XML::Encoding
8
+ *
9
+ * The encoding class exposes the encodings that libxml
10
+ * supports via constants.
11
+ *
12
+ * LibXML converts all data sources to UTF8
13
+ * internally before processing them. By default,
14
+ * LibXML determines a data source's encoding
15
+ * using the algorithm described on its
16
+ * website[http://xmlsoft.org/encoding.html].
17
+ *
18
+ * However, you may override a data source's encoding
19
+ * by using the encoding constants defined in this
20
+ * module.
21
+ *
22
+ * Example 1:
23
+ *
24
+ * io = File.open('some_file', 'rb')
25
+ * parser = XML::Parser.io(io, :encoding => XML::Encoding::ISO_8859_1)
26
+ * doc = parser.parse
27
+ *
28
+ * Example 2:
29
+ *
30
+ * parser = XML::HTMLParser.file("some_file", :encoding => XML::Encoding::ISO_8859_1)
31
+ * doc = parser.parse
32
+ *
33
+ * Example 3:
34
+ *
35
+ * document = XML::Document.new
36
+ * document.encoding = XML::Encoding::ISO_8859_1
37
+ * doc << XML::Node.new
38
+ */
39
+
40
+ VALUE mXMLEncoding;
41
+
42
+ /*
43
+ * call-seq:
44
+ * Encoding.from_s("UTF_8") -> XML::Encoding::UTF_8
45
+ *
46
+ * Converts an encoding string to an encoding constant
47
+ * defined on the XML::Encoding class.
48
+ */
49
+ static VALUE rxml_encoding_from_s(VALUE klass, VALUE encoding)
50
+ {
51
+ xmlCharEncoding xencoding;
52
+
53
+ if (encoding == Qnil)
54
+ return Qnil;
55
+
56
+ xencoding = xmlParseCharEncoding(StringValuePtr(encoding));
57
+ return INT2NUM(xencoding);
58
+ }
59
+
60
+ /*
61
+ * call-seq:
62
+ * Encoding.to_s(XML::Encoding::UTF_8) -> "UTF-8"
63
+ *
64
+ * Converts an encoding constant defined on the XML::Encoding
65
+ * class to its text representation.
66
+ */
67
+ static VALUE rxml_encoding_to_s(VALUE klass, VALUE encoding)
68
+ {
69
+ const xmlChar* xencoding = (const xmlChar*)xmlGetCharEncodingName(NUM2INT(encoding));
70
+
71
+ if (!xencoding)
72
+ return Qnil;
73
+ else
74
+ return rxml_new_cstr(xencoding, xencoding);
75
+ }
76
+
77
+ #ifdef HAVE_RUBY_ENCODING_H
78
+ /*
79
+ * Converts an xmlCharEncoding enum value into a Ruby Encoding object (available
80
+ * on Ruby 1.9.* and higher).
81
+ */
82
+ rb_encoding* rxml_xml_encoding_to_rb_encoding(VALUE klass, xmlCharEncoding xmlEncoding)
83
+ {
84
+ const char* encodingName;
85
+
86
+ switch (xmlEncoding)
87
+ {
88
+ case XML_CHAR_ENCODING_UTF8:
89
+ encodingName = "UTF-8";
90
+ break;
91
+ case XML_CHAR_ENCODING_UTF16LE:
92
+ encodingName = "UTF-16LE";
93
+ break;
94
+ case XML_CHAR_ENCODING_UTF16BE:
95
+ encodingName = "UTF-16BE";
96
+ break;
97
+ case XML_CHAR_ENCODING_UCS4LE:
98
+ encodingName = "UCS-4LE";
99
+ break;
100
+ case XML_CHAR_ENCODING_UCS4BE:
101
+ encodingName = "UCS-4BE";
102
+ break;
103
+ case XML_CHAR_ENCODING_UCS2:
104
+ encodingName = "UCS-2";
105
+ break;
106
+ case XML_CHAR_ENCODING_8859_1:
107
+ encodingName = "ISO8859-1";
108
+ break;
109
+ case XML_CHAR_ENCODING_8859_2:
110
+ encodingName = "ISO8859-2";
111
+ break;
112
+ case XML_CHAR_ENCODING_8859_3:
113
+ encodingName = "ISO8859-3";
114
+ break;
115
+ case XML_CHAR_ENCODING_8859_4:
116
+ encodingName = "ISO8859-4";
117
+ break;
118
+ case XML_CHAR_ENCODING_8859_5:
119
+ encodingName = "ISO8859-5";
120
+ break;
121
+ case XML_CHAR_ENCODING_8859_6:
122
+ encodingName = "ISO8859-6";
123
+ break;
124
+ case XML_CHAR_ENCODING_8859_7:
125
+ encodingName = "ISO8859-7";
126
+ break;
127
+ case XML_CHAR_ENCODING_8859_8:
128
+ encodingName = "ISO8859-8";
129
+ break;
130
+ case XML_CHAR_ENCODING_8859_9:
131
+ encodingName = "ISO8859-9";
132
+ break;
133
+ case XML_CHAR_ENCODING_2022_JP:
134
+ encodingName = "ISO-2022-JP";
135
+ break;
136
+ case XML_CHAR_ENCODING_SHIFT_JIS:
137
+ encodingName = "SHIFT-JIS";
138
+ break;
139
+ case XML_CHAR_ENCODING_EUC_JP:
140
+ encodingName = "EUC-JP";
141
+ break;
142
+ case XML_CHAR_ENCODING_ASCII:
143
+ encodingName = "US-ASCII";
144
+ break;
145
+ default:
146
+ /* Covers XML_CHAR_ENCODING_ERROR, XML_CHAR_ENCODING_NONE, XML_CHAR_ENCODING_EBCDIC */
147
+ encodingName = "ASCII-8BIT";
148
+ break;
149
+ }
150
+
151
+ return rb_enc_find(encodingName);
152
+ }
153
+
154
+ /*
155
+ * call-seq:
156
+ * Input.encoding_to_rb_encoding(Input::ENCODING) -> Encoding
157
+ *
158
+ * Converts an encoding constant defined on the XML::Encoding
159
+ * class to a Ruby encoding object (available on Ruby 1.9.* and higher).
160
+ */
161
+ VALUE rxml_encoding_to_rb_encoding(VALUE klass, VALUE encoding)
162
+ {
163
+ xmlCharEncoding xmlEncoding = (xmlCharEncoding)NUM2INT(encoding);
164
+ rb_encoding* rbencoding = rxml_xml_encoding_to_rb_encoding(klass, xmlEncoding);
165
+ return rb_enc_from_encoding(rbencoding);
166
+ }
167
+
168
+ rb_encoding* rxml_figure_encoding(const xmlChar* xencoding)
169
+ {
170
+ rb_encoding* result;
171
+ if (xencoding)
172
+ {
173
+ xmlCharEncoding xmlEncoding = xmlParseCharEncoding((const char*)xencoding);
174
+ result = rxml_xml_encoding_to_rb_encoding(mXMLEncoding, xmlEncoding);
175
+ }
176
+ else
177
+ {
178
+ result = rb_utf8_encoding();
179
+ }
180
+ return result;
181
+ }
182
+ #endif
183
+
184
+ VALUE rxml_new_cstr(const xmlChar* xstr, const xmlChar* xencoding)
185
+ {
186
+ #ifdef HAVE_RUBY_ENCODING_H
187
+ rb_encoding *rbencoding = rxml_figure_encoding(xencoding);
188
+ return rb_external_str_new_with_enc((const char*)xstr, strlen((const char*)xstr), rbencoding);
189
+ #else
190
+ return rb_str_new2((const char*)xstr);
191
+ #endif
192
+ }
193
+
194
+ VALUE rxml_new_cstr_len(const xmlChar* xstr, const long length, const xmlChar* xencoding)
195
+ {
196
+ #ifdef HAVE_RUBY_ENCODING_H
197
+ rb_encoding *rbencoding = rxml_figure_encoding(xencoding);
198
+ return rb_external_str_new_with_enc((const char*)xstr, length, rbencoding);
199
+ #else
200
+ return rb_str_new((const char*)xstr, length);
201
+ #endif
202
+ }
203
+
204
+ void rxml_init_encoding(void)
205
+ {
206
+ mXMLEncoding = rb_define_module_under(mXML, "Encoding");
207
+ rb_define_module_function(mXMLEncoding, "from_s", rxml_encoding_from_s, 1);
208
+ rb_define_module_function(mXMLEncoding, "to_s", rxml_encoding_to_s, 1);
209
+
210
+ #ifdef HAVE_RUBY_ENCODING_H
211
+ // rb_define_module_function(mXMLEncoding, "to_rb_encoding", rxml_encoding_to_rb_encoding, 2);
212
+ #endif
213
+
214
+ /* -1: No char encoding detected. */
215
+ rb_define_const(mXMLEncoding, "ERROR", INT2NUM(XML_CHAR_ENCODING_ERROR));
216
+ /* 0: No char encoding detected. */
217
+ rb_define_const(mXMLEncoding, "NONE", INT2NUM(XML_CHAR_ENCODING_NONE));
218
+ /* 1: UTF-8 */
219
+ rb_define_const(mXMLEncoding, "UTF_8", INT2NUM(XML_CHAR_ENCODING_UTF8));
220
+ /* 2: UTF-16 little endian. */
221
+ rb_define_const(mXMLEncoding, "UTF_16LE", INT2NUM(XML_CHAR_ENCODING_UTF16LE));
222
+ /* 3: UTF-16 big endian. */
223
+ rb_define_const(mXMLEncoding, "UTF_16BE", INT2NUM(XML_CHAR_ENCODING_UTF16BE));
224
+ /* 4: UCS-4 little endian. */
225
+ rb_define_const(mXMLEncoding, "UCS_4LE", INT2NUM(XML_CHAR_ENCODING_UCS4LE));
226
+ /* 5: UCS-4 big endian. */
227
+ rb_define_const(mXMLEncoding, "UCS_4BE", INT2NUM(XML_CHAR_ENCODING_UCS4BE));
228
+ /* 6: EBCDIC uh! */
229
+ rb_define_const(mXMLEncoding, "EBCDIC", INT2NUM(XML_CHAR_ENCODING_EBCDIC));
230
+ /* 7: UCS-4 unusual ordering. */
231
+ rb_define_const(mXMLEncoding, "UCS_4_2143", INT2NUM(XML_CHAR_ENCODING_UCS4_2143));
232
+ /* 8: UCS-4 unusual ordering. */
233
+ rb_define_const(mXMLEncoding, "UCS_4_3412", INT2NUM(XML_CHAR_ENCODING_UCS4_3412));
234
+ /* 9: UCS-2. */
235
+ rb_define_const(mXMLEncoding, "UCS_2", INT2NUM(XML_CHAR_ENCODING_UCS2));
236
+ /* 10: ISO-8859-1 ISO Latin 1. */
237
+ rb_define_const(mXMLEncoding, "ISO_8859_1", INT2NUM(XML_CHAR_ENCODING_8859_1));
238
+ /* 11: ISO-8859-2 ISO Latin 2. */
239
+ rb_define_const(mXMLEncoding, "ISO_8859_2", INT2NUM(XML_CHAR_ENCODING_8859_2));
240
+ /* 12: ISO-8859-3. */
241
+ rb_define_const(mXMLEncoding, "ISO_8859_3", INT2NUM(XML_CHAR_ENCODING_8859_3));
242
+ /* 13: ISO-8859-4. */
243
+ rb_define_const(mXMLEncoding, "ISO_8859_4", INT2NUM(XML_CHAR_ENCODING_8859_4));
244
+ /* 14: ISO-8859-5. */
245
+ rb_define_const(mXMLEncoding, "ISO_8859_5", INT2NUM(XML_CHAR_ENCODING_8859_5));
246
+ /* 15: ISO-8859-6. */
247
+ rb_define_const(mXMLEncoding, "ISO_8859_6", INT2NUM(XML_CHAR_ENCODING_8859_6));
248
+ /* 16: ISO-8859-7. */
249
+ rb_define_const(mXMLEncoding, "ISO_8859_7", INT2NUM(XML_CHAR_ENCODING_8859_7));
250
+ /* 17: ISO-8859-8. */
251
+ rb_define_const(mXMLEncoding, "ISO_8859_8", INT2NUM(XML_CHAR_ENCODING_8859_8));
252
+ /* 18: ISO-8859-9. */
253
+ rb_define_const(mXMLEncoding, "ISO_8859_9", INT2NUM(XML_CHAR_ENCODING_8859_9));
254
+ /* 19: ISO-2022-JP. */
255
+ rb_define_const(mXMLEncoding, "ISO_2022_JP", INT2NUM(XML_CHAR_ENCODING_2022_JP));
256
+ /* 20: Shift_JIS. */
257
+ rb_define_const(mXMLEncoding, "SHIFT_JIS", INT2NUM(XML_CHAR_ENCODING_SHIFT_JIS));
258
+ /* 21: EUC-JP. */
259
+ rb_define_const(mXMLEncoding, "EUC_JP", INT2NUM(XML_CHAR_ENCODING_EUC_JP));
260
+ /* 22: pure ASCII. */
261
+ rb_define_const(mXMLEncoding, "ASCII", INT2NUM(XML_CHAR_ENCODING_ASCII));
262
+ }