nokogiri 1.11.0.rc3-x64-mingw32 → 1.11.3-x64-mingw32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (174) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +168 -91
  6. data/ext/nokogiri/depend +37 -358
  7. data/ext/nokogiri/extconf.rb +581 -374
  8. data/ext/nokogiri/html_document.c +78 -82
  9. data/ext/nokogiri/html_element_description.c +84 -71
  10. data/ext/nokogiri/html_entity_lookup.c +21 -16
  11. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  12. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  13. data/ext/nokogiri/include/libexslt/exslt.h +102 -0
  14. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  15. data/ext/nokogiri/include/libexslt/exsltexports.h +140 -0
  16. data/ext/nokogiri/include/libxml2/libxml/DOCBparser.h +96 -0
  17. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
  18. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  19. data/ext/nokogiri/include/libxml2/libxml/SAX.h +173 -0
  20. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +178 -0
  21. data/ext/nokogiri/include/libxml2/libxml/c14n.h +126 -0
  22. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  23. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  24. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  25. data/ext/nokogiri/include/libxml2/libxml/dict.h +79 -0
  26. data/ext/nokogiri/include/libxml2/libxml/encoding.h +245 -0
  27. data/ext/nokogiri/include/libxml2/libxml/entities.h +151 -0
  28. data/ext/nokogiri/include/libxml2/libxml/globals.h +508 -0
  29. data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
  30. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  31. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +163 -0
  32. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
  33. data/ext/nokogiri/include/libxml2/libxml/parser.h +1241 -0
  34. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +644 -0
  35. data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
  36. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +217 -0
  37. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
  38. data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
  39. data/ext/nokogiri/include/libxml2/libxml/threads.h +89 -0
  40. data/ext/nokogiri/include/libxml2/libxml/tree.h +1311 -0
  41. data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
  42. data/ext/nokogiri/include/libxml2/libxml/valid.h +458 -0
  43. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
  44. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  45. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +366 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +945 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +153 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +224 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +151 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +485 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xpath.h +566 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +114 -0
  63. data/ext/nokogiri/include/libxslt/attributes.h +38 -0
  64. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  65. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  66. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  67. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  68. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  69. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  70. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  71. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  72. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  73. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  74. data/ext/nokogiri/include/libxslt/security.h +104 -0
  75. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  76. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  77. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  78. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  79. data/ext/nokogiri/include/libxslt/xsltInternals.h +1978 -0
  80. data/ext/nokogiri/include/libxslt/xsltconfig.h +180 -0
  81. data/ext/nokogiri/include/libxslt/xsltexports.h +142 -0
  82. data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
  83. data/ext/nokogiri/include/libxslt/xsltutils.h +313 -0
  84. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  85. data/ext/nokogiri/nokogiri.c +192 -93
  86. data/ext/nokogiri/nokogiri.h +177 -98
  87. data/ext/nokogiri/test_global_handlers.c +40 -0
  88. data/ext/nokogiri/xml_attr.c +15 -15
  89. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  90. data/ext/nokogiri/xml_cdata.c +13 -18
  91. data/ext/nokogiri/xml_comment.c +19 -26
  92. data/ext/nokogiri/xml_document.c +246 -188
  93. data/ext/nokogiri/xml_document_fragment.c +13 -15
  94. data/ext/nokogiri/xml_dtd.c +54 -48
  95. data/ext/nokogiri/xml_element_content.c +30 -27
  96. data/ext/nokogiri/xml_element_decl.c +22 -22
  97. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  98. data/ext/nokogiri/xml_entity_decl.c +32 -30
  99. data/ext/nokogiri/xml_entity_reference.c +16 -18
  100. data/ext/nokogiri/xml_namespace.c +56 -49
  101. data/ext/nokogiri/xml_node.c +371 -320
  102. data/ext/nokogiri/xml_node_set.c +168 -156
  103. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  104. data/ext/nokogiri/xml_reader.c +191 -157
  105. data/ext/nokogiri/xml_relax_ng.c +52 -28
  106. data/ext/nokogiri/xml_sax_parser.c +118 -118
  107. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  108. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  109. data/ext/nokogiri/xml_schema.c +95 -47
  110. data/ext/nokogiri/xml_syntax_error.c +42 -21
  111. data/ext/nokogiri/xml_text.c +13 -17
  112. data/ext/nokogiri/xml_xpath_context.c +206 -123
  113. data/ext/nokogiri/xslt_stylesheet.c +158 -161
  114. data/lib/nokogiri.rb +3 -7
  115. data/lib/nokogiri/2.5/nokogiri.so +0 -0
  116. data/lib/nokogiri/2.6/nokogiri.so +0 -0
  117. data/lib/nokogiri/2.7/nokogiri.so +0 -0
  118. data/lib/nokogiri/3.0/nokogiri.so +0 -0
  119. data/lib/nokogiri/css/parser.rb +3 -3
  120. data/lib/nokogiri/css/parser.y +2 -2
  121. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  122. data/lib/nokogiri/extension.rb +26 -0
  123. data/lib/nokogiri/html/document.rb +12 -26
  124. data/lib/nokogiri/html/document_fragment.rb +15 -15
  125. data/lib/nokogiri/version.rb +2 -149
  126. data/lib/nokogiri/version/constant.rb +5 -0
  127. data/lib/nokogiri/version/info.rb +205 -0
  128. data/lib/nokogiri/xml/document.rb +91 -35
  129. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  130. data/lib/nokogiri/xml/node.rb +89 -69
  131. data/lib/nokogiri/xml/parse_options.rb +6 -0
  132. data/lib/nokogiri/xml/reader.rb +2 -9
  133. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  134. data/lib/nokogiri/xml/schema.rb +12 -4
  135. data/lib/nokogiri/xml/searchable.rb +3 -1
  136. data/lib/nokogiri/xml/xpath.rb +1 -3
  137. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  138. metadata +150 -171
  139. data/ext/nokogiri/html_document.h +0 -10
  140. data/ext/nokogiri/html_element_description.h +0 -10
  141. data/ext/nokogiri/html_entity_lookup.h +0 -8
  142. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  143. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  144. data/ext/nokogiri/xml_attr.h +0 -9
  145. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  146. data/ext/nokogiri/xml_cdata.h +0 -9
  147. data/ext/nokogiri/xml_comment.h +0 -9
  148. data/ext/nokogiri/xml_document.h +0 -23
  149. data/ext/nokogiri/xml_document_fragment.h +0 -10
  150. data/ext/nokogiri/xml_dtd.h +0 -10
  151. data/ext/nokogiri/xml_element_content.h +0 -10
  152. data/ext/nokogiri/xml_element_decl.h +0 -9
  153. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  154. data/ext/nokogiri/xml_entity_decl.h +0 -10
  155. data/ext/nokogiri/xml_entity_reference.h +0 -9
  156. data/ext/nokogiri/xml_io.c +0 -63
  157. data/ext/nokogiri/xml_io.h +0 -11
  158. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  159. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  160. data/ext/nokogiri/xml_namespace.h +0 -14
  161. data/ext/nokogiri/xml_node.h +0 -13
  162. data/ext/nokogiri/xml_node_set.h +0 -12
  163. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  164. data/ext/nokogiri/xml_reader.h +0 -10
  165. data/ext/nokogiri/xml_relax_ng.h +0 -9
  166. data/ext/nokogiri/xml_sax_parser.h +0 -39
  167. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  168. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  169. data/ext/nokogiri/xml_schema.h +0 -9
  170. data/ext/nokogiri/xml_syntax_error.h +0 -13
  171. data/ext/nokogiri/xml_text.h +0 -9
  172. data/ext/nokogiri/xml_xpath_context.h +0 -10
  173. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  174. data/lib/nokogiri/2.4/nokogiri.so +0 -0
@@ -0,0 +1,217 @@
1
+ /*
2
+ * Summary: Tree debugging APIs
3
+ * Description: Interfaces to a set of routines used for debugging the tree
4
+ * produced by the XML parser.
5
+ *
6
+ * Copy: See Copyright for the status of this software.
7
+ *
8
+ * Author: Daniel Veillard
9
+ */
10
+
11
+ #ifndef __DEBUG_XML__
12
+ #define __DEBUG_XML__
13
+ #include <stdio.h>
14
+ #include <libxml/xmlversion.h>
15
+ #include <libxml/tree.h>
16
+
17
+ #ifdef LIBXML_DEBUG_ENABLED
18
+
19
+ #include <libxml/xpath.h>
20
+
21
+ #ifdef __cplusplus
22
+ extern "C" {
23
+ #endif
24
+
25
+ /*
26
+ * The standard Dump routines.
27
+ */
28
+ XMLPUBFUN void XMLCALL
29
+ xmlDebugDumpString (FILE *output,
30
+ const xmlChar *str);
31
+ XMLPUBFUN void XMLCALL
32
+ xmlDebugDumpAttr (FILE *output,
33
+ xmlAttrPtr attr,
34
+ int depth);
35
+ XMLPUBFUN void XMLCALL
36
+ xmlDebugDumpAttrList (FILE *output,
37
+ xmlAttrPtr attr,
38
+ int depth);
39
+ XMLPUBFUN void XMLCALL
40
+ xmlDebugDumpOneNode (FILE *output,
41
+ xmlNodePtr node,
42
+ int depth);
43
+ XMLPUBFUN void XMLCALL
44
+ xmlDebugDumpNode (FILE *output,
45
+ xmlNodePtr node,
46
+ int depth);
47
+ XMLPUBFUN void XMLCALL
48
+ xmlDebugDumpNodeList (FILE *output,
49
+ xmlNodePtr node,
50
+ int depth);
51
+ XMLPUBFUN void XMLCALL
52
+ xmlDebugDumpDocumentHead(FILE *output,
53
+ xmlDocPtr doc);
54
+ XMLPUBFUN void XMLCALL
55
+ xmlDebugDumpDocument (FILE *output,
56
+ xmlDocPtr doc);
57
+ XMLPUBFUN void XMLCALL
58
+ xmlDebugDumpDTD (FILE *output,
59
+ xmlDtdPtr dtd);
60
+ XMLPUBFUN void XMLCALL
61
+ xmlDebugDumpEntities (FILE *output,
62
+ xmlDocPtr doc);
63
+
64
+ /****************************************************************
65
+ * *
66
+ * Checking routines *
67
+ * *
68
+ ****************************************************************/
69
+
70
+ XMLPUBFUN int XMLCALL
71
+ xmlDebugCheckDocument (FILE * output,
72
+ xmlDocPtr doc);
73
+
74
+ /****************************************************************
75
+ * *
76
+ * XML shell helpers *
77
+ * *
78
+ ****************************************************************/
79
+
80
+ XMLPUBFUN void XMLCALL
81
+ xmlLsOneNode (FILE *output, xmlNodePtr node);
82
+ XMLPUBFUN int XMLCALL
83
+ xmlLsCountNode (xmlNodePtr node);
84
+
85
+ XMLPUBFUN const char * XMLCALL
86
+ xmlBoolToText (int boolval);
87
+
88
+ /****************************************************************
89
+ * *
90
+ * The XML shell related structures and functions *
91
+ * *
92
+ ****************************************************************/
93
+
94
+ #ifdef LIBXML_XPATH_ENABLED
95
+ /**
96
+ * xmlShellReadlineFunc:
97
+ * @prompt: a string prompt
98
+ *
99
+ * This is a generic signature for the XML shell input function.
100
+ *
101
+ * Returns a string which will be freed by the Shell.
102
+ */
103
+ typedef char * (* xmlShellReadlineFunc)(char *prompt);
104
+
105
+ /**
106
+ * xmlShellCtxt:
107
+ *
108
+ * A debugging shell context.
109
+ * TODO: add the defined function tables.
110
+ */
111
+ typedef struct _xmlShellCtxt xmlShellCtxt;
112
+ typedef xmlShellCtxt *xmlShellCtxtPtr;
113
+ struct _xmlShellCtxt {
114
+ char *filename;
115
+ xmlDocPtr doc;
116
+ xmlNodePtr node;
117
+ xmlXPathContextPtr pctxt;
118
+ int loaded;
119
+ FILE *output;
120
+ xmlShellReadlineFunc input;
121
+ };
122
+
123
+ /**
124
+ * xmlShellCmd:
125
+ * @ctxt: a shell context
126
+ * @arg: a string argument
127
+ * @node: a first node
128
+ * @node2: a second node
129
+ *
130
+ * This is a generic signature for the XML shell functions.
131
+ *
132
+ * Returns an int, negative returns indicating errors.
133
+ */
134
+ typedef int (* xmlShellCmd) (xmlShellCtxtPtr ctxt,
135
+ char *arg,
136
+ xmlNodePtr node,
137
+ xmlNodePtr node2);
138
+
139
+ XMLPUBFUN void XMLCALL
140
+ xmlShellPrintXPathError (int errorType,
141
+ const char *arg);
142
+ XMLPUBFUN void XMLCALL
143
+ xmlShellPrintXPathResult(xmlXPathObjectPtr list);
144
+ XMLPUBFUN int XMLCALL
145
+ xmlShellList (xmlShellCtxtPtr ctxt,
146
+ char *arg,
147
+ xmlNodePtr node,
148
+ xmlNodePtr node2);
149
+ XMLPUBFUN int XMLCALL
150
+ xmlShellBase (xmlShellCtxtPtr ctxt,
151
+ char *arg,
152
+ xmlNodePtr node,
153
+ xmlNodePtr node2);
154
+ XMLPUBFUN int XMLCALL
155
+ xmlShellDir (xmlShellCtxtPtr ctxt,
156
+ char *arg,
157
+ xmlNodePtr node,
158
+ xmlNodePtr node2);
159
+ XMLPUBFUN int XMLCALL
160
+ xmlShellLoad (xmlShellCtxtPtr ctxt,
161
+ char *filename,
162
+ xmlNodePtr node,
163
+ xmlNodePtr node2);
164
+ #ifdef LIBXML_OUTPUT_ENABLED
165
+ XMLPUBFUN void XMLCALL
166
+ xmlShellPrintNode (xmlNodePtr node);
167
+ XMLPUBFUN int XMLCALL
168
+ xmlShellCat (xmlShellCtxtPtr ctxt,
169
+ char *arg,
170
+ xmlNodePtr node,
171
+ xmlNodePtr node2);
172
+ XMLPUBFUN int XMLCALL
173
+ xmlShellWrite (xmlShellCtxtPtr ctxt,
174
+ char *filename,
175
+ xmlNodePtr node,
176
+ xmlNodePtr node2);
177
+ XMLPUBFUN int XMLCALL
178
+ xmlShellSave (xmlShellCtxtPtr ctxt,
179
+ char *filename,
180
+ xmlNodePtr node,
181
+ xmlNodePtr node2);
182
+ #endif /* LIBXML_OUTPUT_ENABLED */
183
+ #ifdef LIBXML_VALID_ENABLED
184
+ XMLPUBFUN int XMLCALL
185
+ xmlShellValidate (xmlShellCtxtPtr ctxt,
186
+ char *dtd,
187
+ xmlNodePtr node,
188
+ xmlNodePtr node2);
189
+ #endif /* LIBXML_VALID_ENABLED */
190
+ XMLPUBFUN int XMLCALL
191
+ xmlShellDu (xmlShellCtxtPtr ctxt,
192
+ char *arg,
193
+ xmlNodePtr tree,
194
+ xmlNodePtr node2);
195
+ XMLPUBFUN int XMLCALL
196
+ xmlShellPwd (xmlShellCtxtPtr ctxt,
197
+ char *buffer,
198
+ xmlNodePtr node,
199
+ xmlNodePtr node2);
200
+
201
+ /*
202
+ * The Shell interface.
203
+ */
204
+ XMLPUBFUN void XMLCALL
205
+ xmlShell (xmlDocPtr doc,
206
+ char *filename,
207
+ xmlShellReadlineFunc input,
208
+ FILE *output);
209
+
210
+ #endif /* LIBXML_XPATH_ENABLED */
211
+
212
+ #ifdef __cplusplus
213
+ }
214
+ #endif
215
+
216
+ #endif /* LIBXML_DEBUG_ENABLED */
217
+ #endif /* __DEBUG_XML__ */
@@ -0,0 +1,79 @@
1
+ /*
2
+ * Summary: string dictionary
3
+ * Description: dictionary of reusable strings, just used to avoid allocation
4
+ * and freeing operations.
5
+ *
6
+ * Copy: See Copyright for the status of this software.
7
+ *
8
+ * Author: Daniel Veillard
9
+ */
10
+
11
+ #ifndef __XML_DICT_H__
12
+ #define __XML_DICT_H__
13
+
14
+ #include <stddef.h>
15
+ #include <libxml/xmlversion.h>
16
+
17
+ #ifdef __cplusplus
18
+ extern "C" {
19
+ #endif
20
+
21
+ /*
22
+ * The dictionary.
23
+ */
24
+ typedef struct _xmlDict xmlDict;
25
+ typedef xmlDict *xmlDictPtr;
26
+
27
+ /*
28
+ * Initializer
29
+ */
30
+ XMLPUBFUN int XMLCALL xmlInitializeDict(void);
31
+
32
+ /*
33
+ * Constructor and destructor.
34
+ */
35
+ XMLPUBFUN xmlDictPtr XMLCALL
36
+ xmlDictCreate (void);
37
+ XMLPUBFUN size_t XMLCALL
38
+ xmlDictSetLimit (xmlDictPtr dict,
39
+ size_t limit);
40
+ XMLPUBFUN size_t XMLCALL
41
+ xmlDictGetUsage (xmlDictPtr dict);
42
+ XMLPUBFUN xmlDictPtr XMLCALL
43
+ xmlDictCreateSub(xmlDictPtr sub);
44
+ XMLPUBFUN int XMLCALL
45
+ xmlDictReference(xmlDictPtr dict);
46
+ XMLPUBFUN void XMLCALL
47
+ xmlDictFree (xmlDictPtr dict);
48
+
49
+ /*
50
+ * Lookup of entry in the dictionary.
51
+ */
52
+ XMLPUBFUN const xmlChar * XMLCALL
53
+ xmlDictLookup (xmlDictPtr dict,
54
+ const xmlChar *name,
55
+ int len);
56
+ XMLPUBFUN const xmlChar * XMLCALL
57
+ xmlDictExists (xmlDictPtr dict,
58
+ const xmlChar *name,
59
+ int len);
60
+ XMLPUBFUN const xmlChar * XMLCALL
61
+ xmlDictQLookup (xmlDictPtr dict,
62
+ const xmlChar *prefix,
63
+ const xmlChar *name);
64
+ XMLPUBFUN int XMLCALL
65
+ xmlDictOwns (xmlDictPtr dict,
66
+ const xmlChar *str);
67
+ XMLPUBFUN int XMLCALL
68
+ xmlDictSize (xmlDictPtr dict);
69
+
70
+ /*
71
+ * Cleanup function
72
+ */
73
+ XMLPUBFUN void XMLCALL
74
+ xmlDictCleanup (void);
75
+
76
+ #ifdef __cplusplus
77
+ }
78
+ #endif
79
+ #endif /* ! __XML_DICT_H__ */
@@ -0,0 +1,245 @@
1
+ /*
2
+ * Summary: interface for the encoding conversion functions
3
+ * Description: interface for the encoding conversion functions needed for
4
+ * XML basic encoding and iconv() support.
5
+ *
6
+ * Related specs are
7
+ * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
8
+ * [ISO-10646] UTF-8 and UTF-16 in Annexes
9
+ * [ISO-8859-1] ISO Latin-1 characters codes.
10
+ * [UNICODE] The Unicode Consortium, "The Unicode Standard --
11
+ * Worldwide Character Encoding -- Version 1.0", Addison-
12
+ * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
13
+ * described in Unicode Technical Report #4.
14
+ * [US-ASCII] Coded Character Set--7-bit American Standard Code for
15
+ * Information Interchange, ANSI X3.4-1986.
16
+ *
17
+ * Copy: See Copyright for the status of this software.
18
+ *
19
+ * Author: Daniel Veillard
20
+ */
21
+
22
+ #ifndef __XML_CHAR_ENCODING_H__
23
+ #define __XML_CHAR_ENCODING_H__
24
+
25
+ #include <libxml/xmlversion.h>
26
+
27
+ #ifdef LIBXML_ICONV_ENABLED
28
+ #include <iconv.h>
29
+ #endif
30
+ #ifdef LIBXML_ICU_ENABLED
31
+ #include <unicode/ucnv.h>
32
+ #endif
33
+ #ifdef __cplusplus
34
+ extern "C" {
35
+ #endif
36
+
37
+ /*
38
+ * xmlCharEncoding:
39
+ *
40
+ * Predefined values for some standard encodings.
41
+ * Libxml does not do beforehand translation on UTF8 and ISOLatinX.
42
+ * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default.
43
+ *
44
+ * Anything else would have to be translated to UTF8 before being
45
+ * given to the parser itself. The BOM for UTF16 and the encoding
46
+ * declaration are looked at and a converter is looked for at that
47
+ * point. If not found the parser stops here as asked by the XML REC. A
48
+ * converter can be registered by the user using xmlRegisterCharEncodingHandler
49
+ * but the current form doesn't allow stateful transcoding (a serious
50
+ * problem agreed !). If iconv has been found it will be used
51
+ * automatically and allow stateful transcoding, the simplest is then
52
+ * to be sure to enable iconv and to provide iconv libs for the encoding
53
+ * support needed.
54
+ *
55
+ * Note that the generic "UTF-16" is not a predefined value. Instead, only
56
+ * the specific UTF-16LE and UTF-16BE are present.
57
+ */
58
+ typedef enum {
59
+ XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */
60
+ XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */
61
+ XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */
62
+ XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */
63
+ XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */
64
+ XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */
65
+ XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */
66
+ XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */
67
+ XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */
68
+ XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */
69
+ XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */
70
+ XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */
71
+ XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */
72
+ XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */
73
+ XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */
74
+ XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */
75
+ XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */
76
+ XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */
77
+ XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */
78
+ XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */
79
+ XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */
80
+ XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */
81
+ XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */
82
+ XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */
83
+ } xmlCharEncoding;
84
+
85
+ /**
86
+ * xmlCharEncodingInputFunc:
87
+ * @out: a pointer to an array of bytes to store the UTF-8 result
88
+ * @outlen: the length of @out
89
+ * @in: a pointer to an array of chars in the original encoding
90
+ * @inlen: the length of @in
91
+ *
92
+ * Take a block of chars in the original encoding and try to convert
93
+ * it to an UTF-8 block of chars out.
94
+ *
95
+ * Returns the number of bytes written, -1 if lack of space, or -2
96
+ * if the transcoding failed.
97
+ * The value of @inlen after return is the number of octets consumed
98
+ * if the return value is positive, else unpredictiable.
99
+ * The value of @outlen after return is the number of octets consumed.
100
+ */
101
+ typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen,
102
+ const unsigned char *in, int *inlen);
103
+
104
+
105
+ /**
106
+ * xmlCharEncodingOutputFunc:
107
+ * @out: a pointer to an array of bytes to store the result
108
+ * @outlen: the length of @out
109
+ * @in: a pointer to an array of UTF-8 chars
110
+ * @inlen: the length of @in
111
+ *
112
+ * Take a block of UTF-8 chars in and try to convert it to another
113
+ * encoding.
114
+ * Note: a first call designed to produce heading info is called with
115
+ * in = NULL. If stateful this should also initialize the encoder state.
116
+ *
117
+ * Returns the number of bytes written, -1 if lack of space, or -2
118
+ * if the transcoding failed.
119
+ * The value of @inlen after return is the number of octets consumed
120
+ * if the return value is positive, else unpredictiable.
121
+ * The value of @outlen after return is the number of octets produced.
122
+ */
123
+ typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen,
124
+ const unsigned char *in, int *inlen);
125
+
126
+
127
+ /*
128
+ * Block defining the handlers for non UTF-8 encodings.
129
+ * If iconv is supported, there are two extra fields.
130
+ */
131
+ #ifdef LIBXML_ICU_ENABLED
132
+ /* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
133
+ #define ICU_PIVOT_BUF_SIZE 1024
134
+ struct _uconv_t {
135
+ UConverter *uconv; /* for conversion between an encoding and UTF-16 */
136
+ UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
137
+ UChar pivot_buf[ICU_PIVOT_BUF_SIZE];
138
+ UChar *pivot_source;
139
+ UChar *pivot_target;
140
+ };
141
+ typedef struct _uconv_t uconv_t;
142
+ #endif
143
+
144
+ typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
145
+ typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
146
+ struct _xmlCharEncodingHandler {
147
+ char *name;
148
+ xmlCharEncodingInputFunc input;
149
+ xmlCharEncodingOutputFunc output;
150
+ #ifdef LIBXML_ICONV_ENABLED
151
+ iconv_t iconv_in;
152
+ iconv_t iconv_out;
153
+ #endif /* LIBXML_ICONV_ENABLED */
154
+ #ifdef LIBXML_ICU_ENABLED
155
+ uconv_t *uconv_in;
156
+ uconv_t *uconv_out;
157
+ #endif /* LIBXML_ICU_ENABLED */
158
+ };
159
+
160
+ #ifdef __cplusplus
161
+ }
162
+ #endif
163
+ #include <libxml/tree.h>
164
+ #ifdef __cplusplus
165
+ extern "C" {
166
+ #endif
167
+
168
+ /*
169
+ * Interfaces for encoding handlers.
170
+ */
171
+ XMLPUBFUN void XMLCALL
172
+ xmlInitCharEncodingHandlers (void);
173
+ XMLPUBFUN void XMLCALL
174
+ xmlCleanupCharEncodingHandlers (void);
175
+ XMLPUBFUN void XMLCALL
176
+ xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler);
177
+ XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL
178
+ xmlGetCharEncodingHandler (xmlCharEncoding enc);
179
+ XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL
180
+ xmlFindCharEncodingHandler (const char *name);
181
+ XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL
182
+ xmlNewCharEncodingHandler (const char *name,
183
+ xmlCharEncodingInputFunc input,
184
+ xmlCharEncodingOutputFunc output);
185
+
186
+ /*
187
+ * Interfaces for encoding names and aliases.
188
+ */
189
+ XMLPUBFUN int XMLCALL
190
+ xmlAddEncodingAlias (const char *name,
191
+ const char *alias);
192
+ XMLPUBFUN int XMLCALL
193
+ xmlDelEncodingAlias (const char *alias);
194
+ XMLPUBFUN const char * XMLCALL
195
+ xmlGetEncodingAlias (const char *alias);
196
+ XMLPUBFUN void XMLCALL
197
+ xmlCleanupEncodingAliases (void);
198
+ XMLPUBFUN xmlCharEncoding XMLCALL
199
+ xmlParseCharEncoding (const char *name);
200
+ XMLPUBFUN const char * XMLCALL
201
+ xmlGetCharEncodingName (xmlCharEncoding enc);
202
+
203
+ /*
204
+ * Interfaces directly used by the parsers.
205
+ */
206
+ XMLPUBFUN xmlCharEncoding XMLCALL
207
+ xmlDetectCharEncoding (const unsigned char *in,
208
+ int len);
209
+
210
+ XMLPUBFUN int XMLCALL
211
+ xmlCharEncOutFunc (xmlCharEncodingHandler *handler,
212
+ xmlBufferPtr out,
213
+ xmlBufferPtr in);
214
+
215
+ XMLPUBFUN int XMLCALL
216
+ xmlCharEncInFunc (xmlCharEncodingHandler *handler,
217
+ xmlBufferPtr out,
218
+ xmlBufferPtr in);
219
+ XMLPUBFUN int XMLCALL
220
+ xmlCharEncFirstLine (xmlCharEncodingHandler *handler,
221
+ xmlBufferPtr out,
222
+ xmlBufferPtr in);
223
+ XMLPUBFUN int XMLCALL
224
+ xmlCharEncCloseFunc (xmlCharEncodingHandler *handler);
225
+
226
+ /*
227
+ * Export a few useful functions
228
+ */
229
+ #ifdef LIBXML_OUTPUT_ENABLED
230
+ XMLPUBFUN int XMLCALL
231
+ UTF8Toisolat1 (unsigned char *out,
232
+ int *outlen,
233
+ const unsigned char *in,
234
+ int *inlen);
235
+ #endif /* LIBXML_OUTPUT_ENABLED */
236
+ XMLPUBFUN int XMLCALL
237
+ isolat1ToUTF8 (unsigned char *out,
238
+ int *outlen,
239
+ const unsigned char *in,
240
+ int *inlen);
241
+ #ifdef __cplusplus
242
+ }
243
+ #endif
244
+
245
+ #endif /* __XML_CHAR_ENCODING_H__ */