pocxxeci 0.30.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of pocxxeci might be problematic. Click here for more details.
- package/LICENSE +19 -0
- package/Makefile +18 -0
- package/README.md +52 -0
- package/binding.gyp +81 -0
- package/index.d.ts +273 -0
- package/index.js +45 -0
- package/lib/bindings.js +1 -0
- package/lib/document.js +122 -0
- package/lib/element.js +82 -0
- package/lib/sax_parser.js +38 -0
- package/package.json +70 -0
- package/src/html_document.cc +7 -0
- package/src/html_document.h +18 -0
- package/src/libxmljs.cc +252 -0
- package/src/libxmljs.h +53 -0
- package/src/xml_attribute.cc +173 -0
- package/src/xml_attribute.h +40 -0
- package/src/xml_comment.cc +117 -0
- package/src/xml_comment.h +30 -0
- package/src/xml_document.cc +810 -0
- package/src/xml_document.h +67 -0
- package/src/xml_element.cc +565 -0
- package/src/xml_element.h +61 -0
- package/src/xml_namespace.cc +158 -0
- package/src/xml_namespace.h +39 -0
- package/src/xml_node.cc +761 -0
- package/src/xml_node.h +73 -0
- package/src/xml_pi.cc +161 -0
- package/src/xml_pi.h +34 -0
- package/src/xml_sax_parser.cc +424 -0
- package/src/xml_sax_parser.h +73 -0
- package/src/xml_syntax_error.cc +66 -0
- package/src/xml_syntax_error.h +25 -0
- package/src/xml_text.cc +320 -0
- package/src/xml_text.h +48 -0
- package/src/xml_textwriter.cc +315 -0
- package/src/xml_textwriter.h +62 -0
- package/src/xml_xpath_context.cc +70 -0
- package/src/xml_xpath_context.h +23 -0
- package/vendor/libxml/Copyright +23 -0
- package/vendor/libxml/DOCBparser.c +305 -0
- package/vendor/libxml/HTMLparser.c +7287 -0
- package/vendor/libxml/HTMLtree.c +1200 -0
- package/vendor/libxml/Makefile +2983 -0
- package/vendor/libxml/SAX.c +180 -0
- package/vendor/libxml/SAX2.c +3036 -0
- package/vendor/libxml/buf.c +1351 -0
- package/vendor/libxml/buf.h +72 -0
- package/vendor/libxml/c14n.c +2234 -0
- package/vendor/libxml/catalog.c +3828 -0
- package/vendor/libxml/chvalid.c +336 -0
- package/vendor/libxml/config.h +294 -0
- package/vendor/libxml/config.h.gch +0 -0
- package/vendor/libxml/debugXML.c +3423 -0
- package/vendor/libxml/dict.c +1298 -0
- package/vendor/libxml/elfgcchack.h +17818 -0
- package/vendor/libxml/enc.h +32 -0
- package/vendor/libxml/encoding.c +3975 -0
- package/vendor/libxml/entities.c +1163 -0
- package/vendor/libxml/error.c +998 -0
- package/vendor/libxml/globals.c +1126 -0
- package/vendor/libxml/hash.c +1146 -0
- package/vendor/libxml/include/libxml/DOCBparser.h +96 -0
- package/vendor/libxml/include/libxml/HTMLparser.h +306 -0
- package/vendor/libxml/include/libxml/HTMLtree.h +147 -0
- package/vendor/libxml/include/libxml/Makefile +725 -0
- package/vendor/libxml/include/libxml/Makefile.am +54 -0
- package/vendor/libxml/include/libxml/Makefile.in +725 -0
- package/vendor/libxml/include/libxml/SAX.h +173 -0
- package/vendor/libxml/include/libxml/SAX2.h +178 -0
- package/vendor/libxml/include/libxml/c14n.h +128 -0
- package/vendor/libxml/include/libxml/catalog.h +182 -0
- package/vendor/libxml/include/libxml/chvalid.h +230 -0
- package/vendor/libxml/include/libxml/debugXML.h +217 -0
- package/vendor/libxml/include/libxml/dict.h +79 -0
- package/vendor/libxml/include/libxml/encoding.h +245 -0
- package/vendor/libxml/include/libxml/entities.h +151 -0
- package/vendor/libxml/include/libxml/globals.h +508 -0
- package/vendor/libxml/include/libxml/hash.h +236 -0
- package/vendor/libxml/include/libxml/list.h +137 -0
- package/vendor/libxml/include/libxml/nanoftp.h +163 -0
- package/vendor/libxml/include/libxml/nanohttp.h +81 -0
- package/vendor/libxml/include/libxml/parser.h +1243 -0
- package/vendor/libxml/include/libxml/parserInternals.h +644 -0
- package/vendor/libxml/include/libxml/pattern.h +100 -0
- package/vendor/libxml/include/libxml/relaxng.h +217 -0
- package/vendor/libxml/include/libxml/schemasInternals.h +958 -0
- package/vendor/libxml/include/libxml/schematron.h +142 -0
- package/vendor/libxml/include/libxml/threads.h +89 -0
- package/vendor/libxml/include/libxml/tree.h +1311 -0
- package/vendor/libxml/include/libxml/uri.h +94 -0
- package/vendor/libxml/include/libxml/valid.h +458 -0
- package/vendor/libxml/include/libxml/xinclude.h +129 -0
- package/vendor/libxml/include/libxml/xlink.h +189 -0
- package/vendor/libxml/include/libxml/xmlIO.h +368 -0
- package/vendor/libxml/include/libxml/xmlautomata.h +146 -0
- package/vendor/libxml/include/libxml/xmlerror.h +945 -0
- package/vendor/libxml/include/libxml/xmlexports.h +77 -0
- package/vendor/libxml/include/libxml/xmlmemory.h +224 -0
- package/vendor/libxml/include/libxml/xmlmodule.h +57 -0
- package/vendor/libxml/include/libxml/xmlreader.h +428 -0
- package/vendor/libxml/include/libxml/xmlregexp.h +222 -0
- package/vendor/libxml/include/libxml/xmlsave.h +88 -0
- package/vendor/libxml/include/libxml/xmlschemas.h +246 -0
- package/vendor/libxml/include/libxml/xmlschemastypes.h +151 -0
- package/vendor/libxml/include/libxml/xmlstring.h +140 -0
- package/vendor/libxml/include/libxml/xmlunicode.h +202 -0
- package/vendor/libxml/include/libxml/xmlversion.h +484 -0
- package/vendor/libxml/include/libxml/xmlwin32version.h +239 -0
- package/vendor/libxml/include/libxml/xmlwriter.h +488 -0
- package/vendor/libxml/include/libxml/xpath.h +564 -0
- package/vendor/libxml/include/libxml/xpathInternals.h +632 -0
- package/vendor/libxml/include/libxml/xpointer.h +114 -0
- package/vendor/libxml/include/win32config.h +122 -0
- package/vendor/libxml/include/wsockcompat.h +54 -0
- package/vendor/libxml/legacy.c +1343 -0
- package/vendor/libxml/libxml.h +134 -0
- package/vendor/libxml/list.c +779 -0
- package/vendor/libxml/nanoftp.c +2118 -0
- package/vendor/libxml/nanohttp.c +1899 -0
- package/vendor/libxml/parser.c +15553 -0
- package/vendor/libxml/parserInternals.c +2164 -0
- package/vendor/libxml/pattern.c +2621 -0
- package/vendor/libxml/relaxng.c +11101 -0
- package/vendor/libxml/rngparser.c +1595 -0
- package/vendor/libxml/runsuite.c +1157 -0
- package/vendor/libxml/save.h +36 -0
- package/vendor/libxml/schematron.c +1787 -0
- package/vendor/libxml/threads.c +1049 -0
- package/vendor/libxml/timsort.h +601 -0
- package/vendor/libxml/tree.c +10183 -0
- package/vendor/libxml/trio.c +6895 -0
- package/vendor/libxml/trio.h +230 -0
- package/vendor/libxml/triodef.h +228 -0
- package/vendor/libxml/trionan.c +914 -0
- package/vendor/libxml/trionan.h +84 -0
- package/vendor/libxml/triop.h +150 -0
- package/vendor/libxml/triostr.c +2112 -0
- package/vendor/libxml/triostr.h +144 -0
- package/vendor/libxml/uri.c +2561 -0
- package/vendor/libxml/valid.c +7138 -0
- package/vendor/libxml/xinclude.c +2657 -0
- package/vendor/libxml/xlink.c +183 -0
- package/vendor/libxml/xmlIO.c +4135 -0
- package/vendor/libxml/xmlcatalog.c +624 -0
- package/vendor/libxml/xmllint.c +3796 -0
- package/vendor/libxml/xmlmemory.c +1163 -0
- package/vendor/libxml/xmlmodule.c +468 -0
- package/vendor/libxml/xmlreader.c +6033 -0
- package/vendor/libxml/xmlregexp.c +8271 -0
- package/vendor/libxml/xmlsave.c +2735 -0
- package/vendor/libxml/xmlschemas.c +29173 -0
- package/vendor/libxml/xmlschemastypes.c +6276 -0
- package/vendor/libxml/xmlstring.c +1050 -0
- package/vendor/libxml/xmlunicode.c +3179 -0
- package/vendor/libxml/xmlwriter.c +4738 -0
- package/vendor/libxml/xpath.c +14734 -0
- package/vendor/libxml/xpointer.c +2969 -0
- package/vendor/libxml/xzlib.c +815 -0
- package/vendor/libxml/xzlib.h +19 -0
@@ -0,0 +1,1200 @@
|
|
1
|
+
/*
|
2
|
+
* HTMLtree.c : implementation of access function for an HTML tree.
|
3
|
+
*
|
4
|
+
* See Copyright for the status of this software.
|
5
|
+
*
|
6
|
+
* daniel@veillard.com
|
7
|
+
*/
|
8
|
+
|
9
|
+
|
10
|
+
#define IN_LIBXML
|
11
|
+
#include "libxml.h"
|
12
|
+
#ifdef LIBXML_HTML_ENABLED
|
13
|
+
|
14
|
+
#include <string.h> /* for memset() only ! */
|
15
|
+
|
16
|
+
#ifdef HAVE_CTYPE_H
|
17
|
+
#include <ctype.h>
|
18
|
+
#endif
|
19
|
+
#ifdef HAVE_STDLIB_H
|
20
|
+
#include <stdlib.h>
|
21
|
+
#endif
|
22
|
+
|
23
|
+
#include <libxml/xmlmemory.h>
|
24
|
+
#include <libxml/HTMLparser.h>
|
25
|
+
#include <libxml/HTMLtree.h>
|
26
|
+
#include <libxml/entities.h>
|
27
|
+
#include <libxml/valid.h>
|
28
|
+
#include <libxml/xmlerror.h>
|
29
|
+
#include <libxml/parserInternals.h>
|
30
|
+
#include <libxml/globals.h>
|
31
|
+
#include <libxml/uri.h>
|
32
|
+
|
33
|
+
#include "buf.h"
|
34
|
+
|
35
|
+
/************************************************************************
|
36
|
+
* *
|
37
|
+
* Getting/Setting encoding meta tags *
|
38
|
+
* *
|
39
|
+
************************************************************************/
|
40
|
+
|
41
|
+
/**
|
42
|
+
* htmlGetMetaEncoding:
|
43
|
+
* @doc: the document
|
44
|
+
*
|
45
|
+
* Encoding definition lookup in the Meta tags
|
46
|
+
*
|
47
|
+
* Returns the current encoding as flagged in the HTML source
|
48
|
+
*/
|
49
|
+
const xmlChar *
|
50
|
+
htmlGetMetaEncoding(htmlDocPtr doc) {
|
51
|
+
htmlNodePtr cur;
|
52
|
+
const xmlChar *content;
|
53
|
+
const xmlChar *encoding;
|
54
|
+
|
55
|
+
if (doc == NULL)
|
56
|
+
return(NULL);
|
57
|
+
cur = doc->children;
|
58
|
+
|
59
|
+
/*
|
60
|
+
* Search the html
|
61
|
+
*/
|
62
|
+
while (cur != NULL) {
|
63
|
+
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
|
64
|
+
if (xmlStrEqual(cur->name, BAD_CAST"html"))
|
65
|
+
break;
|
66
|
+
if (xmlStrEqual(cur->name, BAD_CAST"head"))
|
67
|
+
goto found_head;
|
68
|
+
if (xmlStrEqual(cur->name, BAD_CAST"meta"))
|
69
|
+
goto found_meta;
|
70
|
+
}
|
71
|
+
cur = cur->next;
|
72
|
+
}
|
73
|
+
if (cur == NULL)
|
74
|
+
return(NULL);
|
75
|
+
cur = cur->children;
|
76
|
+
|
77
|
+
/*
|
78
|
+
* Search the head
|
79
|
+
*/
|
80
|
+
while (cur != NULL) {
|
81
|
+
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
|
82
|
+
if (xmlStrEqual(cur->name, BAD_CAST"head"))
|
83
|
+
break;
|
84
|
+
if (xmlStrEqual(cur->name, BAD_CAST"meta"))
|
85
|
+
goto found_meta;
|
86
|
+
}
|
87
|
+
cur = cur->next;
|
88
|
+
}
|
89
|
+
if (cur == NULL)
|
90
|
+
return(NULL);
|
91
|
+
found_head:
|
92
|
+
cur = cur->children;
|
93
|
+
|
94
|
+
/*
|
95
|
+
* Search the meta elements
|
96
|
+
*/
|
97
|
+
found_meta:
|
98
|
+
while (cur != NULL) {
|
99
|
+
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
|
100
|
+
if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
|
101
|
+
xmlAttrPtr attr = cur->properties;
|
102
|
+
int http;
|
103
|
+
const xmlChar *value;
|
104
|
+
|
105
|
+
content = NULL;
|
106
|
+
http = 0;
|
107
|
+
while (attr != NULL) {
|
108
|
+
if ((attr->children != NULL) &&
|
109
|
+
(attr->children->type == XML_TEXT_NODE) &&
|
110
|
+
(attr->children->next == NULL)) {
|
111
|
+
value = attr->children->content;
|
112
|
+
if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
|
113
|
+
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
|
114
|
+
http = 1;
|
115
|
+
else if ((value != NULL)
|
116
|
+
&& (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
|
117
|
+
content = value;
|
118
|
+
if ((http != 0) && (content != NULL))
|
119
|
+
goto found_content;
|
120
|
+
}
|
121
|
+
attr = attr->next;
|
122
|
+
}
|
123
|
+
}
|
124
|
+
}
|
125
|
+
cur = cur->next;
|
126
|
+
}
|
127
|
+
return(NULL);
|
128
|
+
|
129
|
+
found_content:
|
130
|
+
encoding = xmlStrstr(content, BAD_CAST"charset=");
|
131
|
+
if (encoding == NULL)
|
132
|
+
encoding = xmlStrstr(content, BAD_CAST"Charset=");
|
133
|
+
if (encoding == NULL)
|
134
|
+
encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
|
135
|
+
if (encoding != NULL) {
|
136
|
+
encoding += 8;
|
137
|
+
} else {
|
138
|
+
encoding = xmlStrstr(content, BAD_CAST"charset =");
|
139
|
+
if (encoding == NULL)
|
140
|
+
encoding = xmlStrstr(content, BAD_CAST"Charset =");
|
141
|
+
if (encoding == NULL)
|
142
|
+
encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
|
143
|
+
if (encoding != NULL)
|
144
|
+
encoding += 9;
|
145
|
+
}
|
146
|
+
if (encoding != NULL) {
|
147
|
+
while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
|
148
|
+
}
|
149
|
+
return(encoding);
|
150
|
+
}
|
151
|
+
|
152
|
+
/**
|
153
|
+
* htmlSetMetaEncoding:
|
154
|
+
* @doc: the document
|
155
|
+
* @encoding: the encoding string
|
156
|
+
*
|
157
|
+
* Sets the current encoding in the Meta tags
|
158
|
+
* NOTE: this will not change the document content encoding, just
|
159
|
+
* the META flag associated.
|
160
|
+
*
|
161
|
+
* Returns 0 in case of success and -1 in case of error
|
162
|
+
*/
|
163
|
+
int
|
164
|
+
htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
|
165
|
+
htmlNodePtr cur, meta = NULL, head = NULL;
|
166
|
+
const xmlChar *content = NULL;
|
167
|
+
char newcontent[100];
|
168
|
+
|
169
|
+
newcontent[0] = 0;
|
170
|
+
|
171
|
+
if (doc == NULL)
|
172
|
+
return(-1);
|
173
|
+
|
174
|
+
/* html isn't a real encoding it's just libxml2 way to get entities */
|
175
|
+
if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
|
176
|
+
return(-1);
|
177
|
+
|
178
|
+
if (encoding != NULL) {
|
179
|
+
snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
|
180
|
+
(char *)encoding);
|
181
|
+
newcontent[sizeof(newcontent) - 1] = 0;
|
182
|
+
}
|
183
|
+
|
184
|
+
cur = doc->children;
|
185
|
+
|
186
|
+
/*
|
187
|
+
* Search the html
|
188
|
+
*/
|
189
|
+
while (cur != NULL) {
|
190
|
+
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
|
191
|
+
if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
|
192
|
+
break;
|
193
|
+
if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
|
194
|
+
goto found_head;
|
195
|
+
if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
|
196
|
+
goto found_meta;
|
197
|
+
}
|
198
|
+
cur = cur->next;
|
199
|
+
}
|
200
|
+
if (cur == NULL)
|
201
|
+
return(-1);
|
202
|
+
cur = cur->children;
|
203
|
+
|
204
|
+
/*
|
205
|
+
* Search the head
|
206
|
+
*/
|
207
|
+
while (cur != NULL) {
|
208
|
+
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
|
209
|
+
if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
|
210
|
+
break;
|
211
|
+
if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
|
212
|
+
head = cur->parent;
|
213
|
+
goto found_meta;
|
214
|
+
}
|
215
|
+
}
|
216
|
+
cur = cur->next;
|
217
|
+
}
|
218
|
+
if (cur == NULL)
|
219
|
+
return(-1);
|
220
|
+
found_head:
|
221
|
+
head = cur;
|
222
|
+
if (cur->children == NULL)
|
223
|
+
goto create;
|
224
|
+
cur = cur->children;
|
225
|
+
|
226
|
+
found_meta:
|
227
|
+
/*
|
228
|
+
* Search and update all the remaining the meta elements carrying
|
229
|
+
* encoding information
|
230
|
+
*/
|
231
|
+
while (cur != NULL) {
|
232
|
+
if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
|
233
|
+
if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
|
234
|
+
xmlAttrPtr attr = cur->properties;
|
235
|
+
int http;
|
236
|
+
const xmlChar *value;
|
237
|
+
|
238
|
+
content = NULL;
|
239
|
+
http = 0;
|
240
|
+
while (attr != NULL) {
|
241
|
+
if ((attr->children != NULL) &&
|
242
|
+
(attr->children->type == XML_TEXT_NODE) &&
|
243
|
+
(attr->children->next == NULL)) {
|
244
|
+
value = attr->children->content;
|
245
|
+
if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
|
246
|
+
&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
|
247
|
+
http = 1;
|
248
|
+
else
|
249
|
+
{
|
250
|
+
if ((value != NULL) &&
|
251
|
+
(!xmlStrcasecmp(attr->name, BAD_CAST"content")))
|
252
|
+
content = value;
|
253
|
+
}
|
254
|
+
if ((http != 0) && (content != NULL))
|
255
|
+
break;
|
256
|
+
}
|
257
|
+
attr = attr->next;
|
258
|
+
}
|
259
|
+
if ((http != 0) && (content != NULL)) {
|
260
|
+
meta = cur;
|
261
|
+
break;
|
262
|
+
}
|
263
|
+
|
264
|
+
}
|
265
|
+
}
|
266
|
+
cur = cur->next;
|
267
|
+
}
|
268
|
+
create:
|
269
|
+
if (meta == NULL) {
|
270
|
+
if ((encoding != NULL) && (head != NULL)) {
|
271
|
+
/*
|
272
|
+
* Create a new Meta element with the right attributes
|
273
|
+
*/
|
274
|
+
|
275
|
+
meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
|
276
|
+
if (head->children == NULL)
|
277
|
+
xmlAddChild(head, meta);
|
278
|
+
else
|
279
|
+
xmlAddPrevSibling(head->children, meta);
|
280
|
+
xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
|
281
|
+
xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
|
282
|
+
}
|
283
|
+
} else {
|
284
|
+
/* remove the meta tag if NULL is passed */
|
285
|
+
if (encoding == NULL) {
|
286
|
+
xmlUnlinkNode(meta);
|
287
|
+
xmlFreeNode(meta);
|
288
|
+
}
|
289
|
+
/* change the document only if there is a real encoding change */
|
290
|
+
else if (xmlStrcasestr(content, encoding) == NULL) {
|
291
|
+
xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
|
292
|
+
}
|
293
|
+
}
|
294
|
+
|
295
|
+
|
296
|
+
return(0);
|
297
|
+
}
|
298
|
+
|
299
|
+
/**
|
300
|
+
* booleanHTMLAttrs:
|
301
|
+
*
|
302
|
+
* These are the HTML attributes which will be output
|
303
|
+
* in minimized form, i.e. <option selected="selected"> will be
|
304
|
+
* output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
|
305
|
+
*
|
306
|
+
*/
|
307
|
+
static const char* htmlBooleanAttrs[] = {
|
308
|
+
"checked", "compact", "declare", "defer", "disabled", "ismap",
|
309
|
+
"multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
|
310
|
+
"selected", NULL
|
311
|
+
};
|
312
|
+
|
313
|
+
|
314
|
+
/**
|
315
|
+
* htmlIsBooleanAttr:
|
316
|
+
* @name: the name of the attribute to check
|
317
|
+
*
|
318
|
+
* Determine if a given attribute is a boolean attribute.
|
319
|
+
*
|
320
|
+
* returns: false if the attribute is not boolean, true otherwise.
|
321
|
+
*/
|
322
|
+
int
|
323
|
+
htmlIsBooleanAttr(const xmlChar *name)
|
324
|
+
{
|
325
|
+
int i = 0;
|
326
|
+
|
327
|
+
while (htmlBooleanAttrs[i] != NULL) {
|
328
|
+
if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
|
329
|
+
return 1;
|
330
|
+
i++;
|
331
|
+
}
|
332
|
+
return 0;
|
333
|
+
}
|
334
|
+
|
335
|
+
#ifdef LIBXML_OUTPUT_ENABLED
|
336
|
+
/*
|
337
|
+
* private routine exported from xmlIO.c
|
338
|
+
*/
|
339
|
+
xmlOutputBufferPtr
|
340
|
+
xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
|
341
|
+
/************************************************************************
|
342
|
+
* *
|
343
|
+
* Output error handlers *
|
344
|
+
* *
|
345
|
+
************************************************************************/
|
346
|
+
/**
|
347
|
+
* htmlSaveErrMemory:
|
348
|
+
* @extra: extra information
|
349
|
+
*
|
350
|
+
* Handle an out of memory condition
|
351
|
+
*/
|
352
|
+
static void
|
353
|
+
htmlSaveErrMemory(const char *extra)
|
354
|
+
{
|
355
|
+
__xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
|
356
|
+
}
|
357
|
+
|
358
|
+
/**
|
359
|
+
* htmlSaveErr:
|
360
|
+
* @code: the error number
|
361
|
+
* @node: the location of the error.
|
362
|
+
* @extra: extra information
|
363
|
+
*
|
364
|
+
* Handle an out of memory condition
|
365
|
+
*/
|
366
|
+
static void
|
367
|
+
htmlSaveErr(int code, xmlNodePtr node, const char *extra)
|
368
|
+
{
|
369
|
+
const char *msg = NULL;
|
370
|
+
|
371
|
+
switch(code) {
|
372
|
+
case XML_SAVE_NOT_UTF8:
|
373
|
+
msg = "string is not in UTF-8\n";
|
374
|
+
break;
|
375
|
+
case XML_SAVE_CHAR_INVALID:
|
376
|
+
msg = "invalid character value\n";
|
377
|
+
break;
|
378
|
+
case XML_SAVE_UNKNOWN_ENCODING:
|
379
|
+
msg = "unknown encoding %s\n";
|
380
|
+
break;
|
381
|
+
case XML_SAVE_NO_DOCTYPE:
|
382
|
+
msg = "HTML has no DOCTYPE\n";
|
383
|
+
break;
|
384
|
+
default:
|
385
|
+
msg = "unexpected error number\n";
|
386
|
+
}
|
387
|
+
__xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
|
388
|
+
}
|
389
|
+
|
390
|
+
/************************************************************************
|
391
|
+
* *
|
392
|
+
* Dumping HTML tree content to a simple buffer *
|
393
|
+
* *
|
394
|
+
************************************************************************/
|
395
|
+
|
396
|
+
/**
|
397
|
+
* htmlBufNodeDumpFormat:
|
398
|
+
* @buf: the xmlBufPtr output
|
399
|
+
* @doc: the document
|
400
|
+
* @cur: the current node
|
401
|
+
* @format: should formatting spaces been added
|
402
|
+
*
|
403
|
+
* Dump an HTML node, recursive behaviour,children are printed too.
|
404
|
+
*
|
405
|
+
* Returns the number of byte written or -1 in case of error
|
406
|
+
*/
|
407
|
+
static size_t
|
408
|
+
htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
|
409
|
+
int format) {
|
410
|
+
size_t use;
|
411
|
+
int ret;
|
412
|
+
xmlOutputBufferPtr outbuf;
|
413
|
+
|
414
|
+
if (cur == NULL) {
|
415
|
+
return (-1);
|
416
|
+
}
|
417
|
+
if (buf == NULL) {
|
418
|
+
return (-1);
|
419
|
+
}
|
420
|
+
outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
|
421
|
+
if (outbuf == NULL) {
|
422
|
+
htmlSaveErrMemory("allocating HTML output buffer");
|
423
|
+
return (-1);
|
424
|
+
}
|
425
|
+
memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
|
426
|
+
outbuf->buffer = buf;
|
427
|
+
outbuf->encoder = NULL;
|
428
|
+
outbuf->writecallback = NULL;
|
429
|
+
outbuf->closecallback = NULL;
|
430
|
+
outbuf->context = NULL;
|
431
|
+
outbuf->written = 0;
|
432
|
+
|
433
|
+
use = xmlBufUse(buf);
|
434
|
+
htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
|
435
|
+
xmlFree(outbuf);
|
436
|
+
ret = xmlBufUse(buf) - use;
|
437
|
+
return (ret);
|
438
|
+
}
|
439
|
+
|
440
|
+
/**
|
441
|
+
* htmlNodeDump:
|
442
|
+
* @buf: the HTML buffer output
|
443
|
+
* @doc: the document
|
444
|
+
* @cur: the current node
|
445
|
+
*
|
446
|
+
* Dump an HTML node, recursive behaviour,children are printed too,
|
447
|
+
* and formatting returns are added.
|
448
|
+
*
|
449
|
+
* Returns the number of byte written or -1 in case of error
|
450
|
+
*/
|
451
|
+
int
|
452
|
+
htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
|
453
|
+
xmlBufPtr buffer;
|
454
|
+
size_t ret;
|
455
|
+
|
456
|
+
if ((buf == NULL) || (cur == NULL))
|
457
|
+
return(-1);
|
458
|
+
|
459
|
+
xmlInitParser();
|
460
|
+
buffer = xmlBufFromBuffer(buf);
|
461
|
+
if (buffer == NULL)
|
462
|
+
return(-1);
|
463
|
+
|
464
|
+
ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
|
465
|
+
|
466
|
+
xmlBufBackToBuffer(buffer);
|
467
|
+
|
468
|
+
if (ret > INT_MAX)
|
469
|
+
return(-1);
|
470
|
+
return((int) ret);
|
471
|
+
}
|
472
|
+
|
473
|
+
/**
|
474
|
+
* htmlNodeDumpFileFormat:
|
475
|
+
* @out: the FILE pointer
|
476
|
+
* @doc: the document
|
477
|
+
* @cur: the current node
|
478
|
+
* @encoding: the document encoding
|
479
|
+
* @format: should formatting spaces been added
|
480
|
+
*
|
481
|
+
* Dump an HTML node, recursive behaviour,children are printed too.
|
482
|
+
*
|
483
|
+
* TODO: if encoding == NULL try to save in the doc encoding
|
484
|
+
*
|
485
|
+
* returns: the number of byte written or -1 in case of failure.
|
486
|
+
*/
|
487
|
+
int
|
488
|
+
htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
|
489
|
+
xmlNodePtr cur, const char *encoding, int format) {
|
490
|
+
xmlOutputBufferPtr buf;
|
491
|
+
xmlCharEncodingHandlerPtr handler = NULL;
|
492
|
+
int ret;
|
493
|
+
|
494
|
+
xmlInitParser();
|
495
|
+
|
496
|
+
if (encoding != NULL) {
|
497
|
+
xmlCharEncoding enc;
|
498
|
+
|
499
|
+
enc = xmlParseCharEncoding(encoding);
|
500
|
+
if (enc != XML_CHAR_ENCODING_UTF8) {
|
501
|
+
handler = xmlFindCharEncodingHandler(encoding);
|
502
|
+
if (handler == NULL)
|
503
|
+
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
|
504
|
+
}
|
505
|
+
} else {
|
506
|
+
/*
|
507
|
+
* Fallback to HTML or ASCII when the encoding is unspecified
|
508
|
+
*/
|
509
|
+
if (handler == NULL)
|
510
|
+
handler = xmlFindCharEncodingHandler("HTML");
|
511
|
+
if (handler == NULL)
|
512
|
+
handler = xmlFindCharEncodingHandler("ascii");
|
513
|
+
}
|
514
|
+
|
515
|
+
/*
|
516
|
+
* save the content to a temp buffer.
|
517
|
+
*/
|
518
|
+
buf = xmlOutputBufferCreateFile(out, handler);
|
519
|
+
if (buf == NULL) return(0);
|
520
|
+
|
521
|
+
htmlNodeDumpFormatOutput(buf, doc, cur, NULL, format);
|
522
|
+
|
523
|
+
ret = xmlOutputBufferClose(buf);
|
524
|
+
return(ret);
|
525
|
+
}
|
526
|
+
|
527
|
+
/**
|
528
|
+
* htmlNodeDumpFile:
|
529
|
+
* @out: the FILE pointer
|
530
|
+
* @doc: the document
|
531
|
+
* @cur: the current node
|
532
|
+
*
|
533
|
+
* Dump an HTML node, recursive behaviour,children are printed too,
|
534
|
+
* and formatting returns are added.
|
535
|
+
*/
|
536
|
+
void
|
537
|
+
htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
|
538
|
+
htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
|
539
|
+
}
|
540
|
+
|
541
|
+
/**
|
542
|
+
* htmlDocDumpMemoryFormat:
|
543
|
+
* @cur: the document
|
544
|
+
* @mem: OUT: the memory pointer
|
545
|
+
* @size: OUT: the memory length
|
546
|
+
* @format: should formatting spaces been added
|
547
|
+
*
|
548
|
+
* Dump an HTML document in memory and return the xmlChar * and it's size.
|
549
|
+
* It's up to the caller to free the memory.
|
550
|
+
*/
|
551
|
+
void
|
552
|
+
htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
|
553
|
+
xmlOutputBufferPtr buf;
|
554
|
+
xmlCharEncodingHandlerPtr handler = NULL;
|
555
|
+
const char *encoding;
|
556
|
+
|
557
|
+
xmlInitParser();
|
558
|
+
|
559
|
+
if ((mem == NULL) || (size == NULL))
|
560
|
+
return;
|
561
|
+
if (cur == NULL) {
|
562
|
+
*mem = NULL;
|
563
|
+
*size = 0;
|
564
|
+
return;
|
565
|
+
}
|
566
|
+
|
567
|
+
encoding = (const char *) htmlGetMetaEncoding(cur);
|
568
|
+
|
569
|
+
if (encoding != NULL) {
|
570
|
+
xmlCharEncoding enc;
|
571
|
+
|
572
|
+
enc = xmlParseCharEncoding(encoding);
|
573
|
+
if (enc != XML_CHAR_ENCODING_UTF8) {
|
574
|
+
handler = xmlFindCharEncodingHandler(encoding);
|
575
|
+
if (handler == NULL)
|
576
|
+
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
|
577
|
+
|
578
|
+
}
|
579
|
+
} else {
|
580
|
+
/*
|
581
|
+
* Fallback to HTML or ASCII when the encoding is unspecified
|
582
|
+
*/
|
583
|
+
if (handler == NULL)
|
584
|
+
handler = xmlFindCharEncodingHandler("HTML");
|
585
|
+
if (handler == NULL)
|
586
|
+
handler = xmlFindCharEncodingHandler("ascii");
|
587
|
+
}
|
588
|
+
|
589
|
+
buf = xmlAllocOutputBufferInternal(handler);
|
590
|
+
if (buf == NULL) {
|
591
|
+
*mem = NULL;
|
592
|
+
*size = 0;
|
593
|
+
return;
|
594
|
+
}
|
595
|
+
|
596
|
+
htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
|
597
|
+
|
598
|
+
xmlOutputBufferFlush(buf);
|
599
|
+
if (buf->conv != NULL) {
|
600
|
+
*size = xmlBufUse(buf->conv);
|
601
|
+
*mem = xmlStrndup(xmlBufContent(buf->conv), *size);
|
602
|
+
} else {
|
603
|
+
*size = xmlBufUse(buf->buffer);
|
604
|
+
*mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
|
605
|
+
}
|
606
|
+
(void)xmlOutputBufferClose(buf);
|
607
|
+
}
|
608
|
+
|
609
|
+
/**
|
610
|
+
* htmlDocDumpMemory:
|
611
|
+
* @cur: the document
|
612
|
+
* @mem: OUT: the memory pointer
|
613
|
+
* @size: OUT: the memory length
|
614
|
+
*
|
615
|
+
* Dump an HTML document in memory and return the xmlChar * and it's size.
|
616
|
+
* It's up to the caller to free the memory.
|
617
|
+
*/
|
618
|
+
void
|
619
|
+
htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
|
620
|
+
htmlDocDumpMemoryFormat(cur, mem, size, 1);
|
621
|
+
}
|
622
|
+
|
623
|
+
|
624
|
+
/************************************************************************
|
625
|
+
* *
|
626
|
+
* Dumping HTML tree content to an I/O output buffer *
|
627
|
+
* *
|
628
|
+
************************************************************************/
|
629
|
+
|
630
|
+
void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
|
631
|
+
|
632
|
+
/**
|
633
|
+
* htmlDtdDumpOutput:
|
634
|
+
* @buf: the HTML buffer output
|
635
|
+
* @doc: the document
|
636
|
+
* @encoding: the encoding string
|
637
|
+
*
|
638
|
+
* TODO: check whether encoding is needed
|
639
|
+
*
|
640
|
+
* Dump the HTML document DTD, if any.
|
641
|
+
*/
|
642
|
+
static void
|
643
|
+
htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
644
|
+
const char *encoding ATTRIBUTE_UNUSED) {
|
645
|
+
xmlDtdPtr cur = doc->intSubset;
|
646
|
+
|
647
|
+
if (cur == NULL) {
|
648
|
+
htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
|
649
|
+
return;
|
650
|
+
}
|
651
|
+
xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
|
652
|
+
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
653
|
+
if (cur->ExternalID != NULL) {
|
654
|
+
xmlOutputBufferWriteString(buf, " PUBLIC ");
|
655
|
+
xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
|
656
|
+
if (cur->SystemID != NULL) {
|
657
|
+
xmlOutputBufferWriteString(buf, " ");
|
658
|
+
xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
|
659
|
+
}
|
660
|
+
} else if (cur->SystemID != NULL &&
|
661
|
+
xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
|
662
|
+
xmlOutputBufferWriteString(buf, " SYSTEM ");
|
663
|
+
xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
|
664
|
+
}
|
665
|
+
xmlOutputBufferWriteString(buf, ">\n");
|
666
|
+
}
|
667
|
+
|
668
|
+
/**
|
669
|
+
* htmlAttrDumpOutput:
|
670
|
+
* @buf: the HTML buffer output
|
671
|
+
* @doc: the document
|
672
|
+
* @cur: the attribute pointer
|
673
|
+
*
|
674
|
+
* Dump an HTML attribute
|
675
|
+
*/
|
676
|
+
static void
|
677
|
+
htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
|
678
|
+
xmlChar *value;
|
679
|
+
|
680
|
+
/*
|
681
|
+
* The html output method should not escape a & character
|
682
|
+
* occurring in an attribute value immediately followed by
|
683
|
+
* a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
|
684
|
+
* This is implemented in xmlEncodeEntitiesReentrant
|
685
|
+
*/
|
686
|
+
|
687
|
+
if (cur == NULL) {
|
688
|
+
return;
|
689
|
+
}
|
690
|
+
xmlOutputBufferWriteString(buf, " ");
|
691
|
+
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
692
|
+
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
|
693
|
+
xmlOutputBufferWriteString(buf, ":");
|
694
|
+
}
|
695
|
+
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
696
|
+
if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
|
697
|
+
value = xmlNodeListGetString(doc, cur->children, 0);
|
698
|
+
if (value) {
|
699
|
+
xmlOutputBufferWriteString(buf, "=");
|
700
|
+
if ((cur->ns == NULL) && (cur->parent != NULL) &&
|
701
|
+
(cur->parent->ns == NULL) &&
|
702
|
+
((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
|
703
|
+
(!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
|
704
|
+
(!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
|
705
|
+
((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
|
706
|
+
(!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
|
707
|
+
xmlChar *escaped;
|
708
|
+
xmlChar *tmp = value;
|
709
|
+
|
710
|
+
while (IS_BLANK_CH(*tmp)) tmp++;
|
711
|
+
|
712
|
+
/*
|
713
|
+
* the < and > have already been escaped at the entity level
|
714
|
+
* And doing so here breaks server side includes
|
715
|
+
*/
|
716
|
+
escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>");
|
717
|
+
if (escaped != NULL) {
|
718
|
+
xmlBufWriteQuotedString(buf->buffer, escaped);
|
719
|
+
xmlFree(escaped);
|
720
|
+
} else {
|
721
|
+
xmlBufWriteQuotedString(buf->buffer, value);
|
722
|
+
}
|
723
|
+
} else {
|
724
|
+
xmlBufWriteQuotedString(buf->buffer, value);
|
725
|
+
}
|
726
|
+
xmlFree(value);
|
727
|
+
} else {
|
728
|
+
xmlOutputBufferWriteString(buf, "=\"\"");
|
729
|
+
}
|
730
|
+
}
|
731
|
+
}
|
732
|
+
|
733
|
+
/**
|
734
|
+
* htmlNodeDumpFormatOutput:
|
735
|
+
* @buf: the HTML buffer output
|
736
|
+
* @doc: the document
|
737
|
+
* @cur: the current node
|
738
|
+
* @encoding: the encoding string (unused)
|
739
|
+
* @format: should formatting spaces been added
|
740
|
+
*
|
741
|
+
* Dump an HTML node, recursive behaviour,children are printed too.
|
742
|
+
*/
|
743
|
+
void
|
744
|
+
htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
745
|
+
xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
|
746
|
+
int format) {
|
747
|
+
xmlNodePtr root, parent;
|
748
|
+
xmlAttrPtr attr;
|
749
|
+
const htmlElemDesc * info;
|
750
|
+
|
751
|
+
xmlInitParser();
|
752
|
+
|
753
|
+
if ((cur == NULL) || (buf == NULL)) {
|
754
|
+
return;
|
755
|
+
}
|
756
|
+
|
757
|
+
root = cur;
|
758
|
+
parent = cur->parent;
|
759
|
+
while (1) {
|
760
|
+
switch (cur->type) {
|
761
|
+
case XML_HTML_DOCUMENT_NODE:
|
762
|
+
case XML_DOCUMENT_NODE:
|
763
|
+
if (((xmlDocPtr) cur)->intSubset != NULL) {
|
764
|
+
htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
|
765
|
+
}
|
766
|
+
if (cur->children != NULL) {
|
767
|
+
/* Always validate cur->parent when descending. */
|
768
|
+
if (cur->parent == parent) {
|
769
|
+
parent = cur;
|
770
|
+
cur = cur->children;
|
771
|
+
continue;
|
772
|
+
}
|
773
|
+
} else {
|
774
|
+
xmlOutputBufferWriteString(buf, "\n");
|
775
|
+
}
|
776
|
+
break;
|
777
|
+
|
778
|
+
case XML_ELEMENT_NODE:
|
779
|
+
/*
|
780
|
+
* Some users like lxml are known to pass nodes with a corrupted
|
781
|
+
* tree structure. Fall back to a recursive call to handle this
|
782
|
+
* case.
|
783
|
+
*/
|
784
|
+
if ((cur->parent != parent) && (cur->children != NULL)) {
|
785
|
+
htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
|
786
|
+
break;
|
787
|
+
}
|
788
|
+
|
789
|
+
/*
|
790
|
+
* Get specific HTML info for that node.
|
791
|
+
*/
|
792
|
+
if (cur->ns == NULL)
|
793
|
+
info = htmlTagLookup(cur->name);
|
794
|
+
else
|
795
|
+
info = NULL;
|
796
|
+
|
797
|
+
xmlOutputBufferWriteString(buf, "<");
|
798
|
+
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
799
|
+
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
|
800
|
+
xmlOutputBufferWriteString(buf, ":");
|
801
|
+
}
|
802
|
+
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
803
|
+
if (cur->nsDef)
|
804
|
+
xmlNsListDumpOutput(buf, cur->nsDef);
|
805
|
+
attr = cur->properties;
|
806
|
+
while (attr != NULL) {
|
807
|
+
htmlAttrDumpOutput(buf, doc, attr);
|
808
|
+
attr = attr->next;
|
809
|
+
}
|
810
|
+
|
811
|
+
if ((info != NULL) && (info->empty)) {
|
812
|
+
xmlOutputBufferWriteString(buf, ">");
|
813
|
+
} else if (cur->children == NULL) {
|
814
|
+
if ((info != NULL) && (info->saveEndTag != 0) &&
|
815
|
+
(xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
|
816
|
+
(xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
|
817
|
+
xmlOutputBufferWriteString(buf, ">");
|
818
|
+
} else {
|
819
|
+
xmlOutputBufferWriteString(buf, "></");
|
820
|
+
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
821
|
+
xmlOutputBufferWriteString(buf,
|
822
|
+
(const char *)cur->ns->prefix);
|
823
|
+
xmlOutputBufferWriteString(buf, ":");
|
824
|
+
}
|
825
|
+
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
826
|
+
xmlOutputBufferWriteString(buf, ">");
|
827
|
+
}
|
828
|
+
} else {
|
829
|
+
xmlOutputBufferWriteString(buf, ">");
|
830
|
+
if ((format) && (info != NULL) && (!info->isinline) &&
|
831
|
+
(cur->children->type != HTML_TEXT_NODE) &&
|
832
|
+
(cur->children->type != HTML_ENTITY_REF_NODE) &&
|
833
|
+
(cur->children != cur->last) &&
|
834
|
+
(cur->name != NULL) &&
|
835
|
+
(cur->name[0] != 'p')) /* p, pre, param */
|
836
|
+
xmlOutputBufferWriteString(buf, "\n");
|
837
|
+
parent = cur;
|
838
|
+
cur = cur->children;
|
839
|
+
continue;
|
840
|
+
}
|
841
|
+
|
842
|
+
if ((format) && (cur->next != NULL) &&
|
843
|
+
(info != NULL) && (!info->isinline)) {
|
844
|
+
if ((cur->next->type != HTML_TEXT_NODE) &&
|
845
|
+
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
846
|
+
(parent != NULL) &&
|
847
|
+
(parent->name != NULL) &&
|
848
|
+
(parent->name[0] != 'p')) /* p, pre, param */
|
849
|
+
xmlOutputBufferWriteString(buf, "\n");
|
850
|
+
}
|
851
|
+
|
852
|
+
break;
|
853
|
+
|
854
|
+
case XML_ATTRIBUTE_NODE:
|
855
|
+
htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur);
|
856
|
+
break;
|
857
|
+
|
858
|
+
case HTML_TEXT_NODE:
|
859
|
+
if (cur->content == NULL)
|
860
|
+
break;
|
861
|
+
if (((cur->name == (const xmlChar *)xmlStringText) ||
|
862
|
+
(cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
|
863
|
+
((parent == NULL) ||
|
864
|
+
((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
|
865
|
+
(xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
|
866
|
+
xmlChar *buffer;
|
867
|
+
|
868
|
+
buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
|
869
|
+
if (buffer != NULL) {
|
870
|
+
xmlOutputBufferWriteString(buf, (const char *)buffer);
|
871
|
+
xmlFree(buffer);
|
872
|
+
}
|
873
|
+
} else {
|
874
|
+
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
875
|
+
}
|
876
|
+
break;
|
877
|
+
|
878
|
+
case HTML_COMMENT_NODE:
|
879
|
+
if (cur->content != NULL) {
|
880
|
+
xmlOutputBufferWriteString(buf, "<!--");
|
881
|
+
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
882
|
+
xmlOutputBufferWriteString(buf, "-->");
|
883
|
+
}
|
884
|
+
break;
|
885
|
+
|
886
|
+
case HTML_PI_NODE:
|
887
|
+
if (cur->name != NULL) {
|
888
|
+
xmlOutputBufferWriteString(buf, "<?");
|
889
|
+
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
890
|
+
if (cur->content != NULL) {
|
891
|
+
xmlOutputBufferWriteString(buf, " ");
|
892
|
+
xmlOutputBufferWriteString(buf,
|
893
|
+
(const char *)cur->content);
|
894
|
+
}
|
895
|
+
xmlOutputBufferWriteString(buf, ">");
|
896
|
+
}
|
897
|
+
break;
|
898
|
+
|
899
|
+
case HTML_ENTITY_REF_NODE:
|
900
|
+
xmlOutputBufferWriteString(buf, "&");
|
901
|
+
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
902
|
+
xmlOutputBufferWriteString(buf, ";");
|
903
|
+
break;
|
904
|
+
|
905
|
+
case HTML_PRESERVE_NODE:
|
906
|
+
if (cur->content != NULL) {
|
907
|
+
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
908
|
+
}
|
909
|
+
break;
|
910
|
+
|
911
|
+
default:
|
912
|
+
break;
|
913
|
+
}
|
914
|
+
|
915
|
+
while (1) {
|
916
|
+
if (cur == root)
|
917
|
+
return;
|
918
|
+
if (cur->next != NULL) {
|
919
|
+
cur = cur->next;
|
920
|
+
break;
|
921
|
+
}
|
922
|
+
|
923
|
+
cur = parent;
|
924
|
+
/* cur->parent was validated when descending. */
|
925
|
+
parent = cur->parent;
|
926
|
+
|
927
|
+
if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
|
928
|
+
(cur->type == XML_DOCUMENT_NODE)) {
|
929
|
+
xmlOutputBufferWriteString(buf, "\n");
|
930
|
+
} else {
|
931
|
+
if ((format) && (cur->ns == NULL))
|
932
|
+
info = htmlTagLookup(cur->name);
|
933
|
+
else
|
934
|
+
info = NULL;
|
935
|
+
|
936
|
+
if ((format) && (info != NULL) && (!info->isinline) &&
|
937
|
+
(cur->last->type != HTML_TEXT_NODE) &&
|
938
|
+
(cur->last->type != HTML_ENTITY_REF_NODE) &&
|
939
|
+
(cur->children != cur->last) &&
|
940
|
+
(cur->name != NULL) &&
|
941
|
+
(cur->name[0] != 'p')) /* p, pre, param */
|
942
|
+
xmlOutputBufferWriteString(buf, "\n");
|
943
|
+
|
944
|
+
xmlOutputBufferWriteString(buf, "</");
|
945
|
+
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
946
|
+
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
|
947
|
+
xmlOutputBufferWriteString(buf, ":");
|
948
|
+
}
|
949
|
+
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
950
|
+
xmlOutputBufferWriteString(buf, ">");
|
951
|
+
|
952
|
+
if ((format) && (info != NULL) && (!info->isinline) &&
|
953
|
+
(cur->next != NULL)) {
|
954
|
+
if ((cur->next->type != HTML_TEXT_NODE) &&
|
955
|
+
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
956
|
+
(parent != NULL) &&
|
957
|
+
(parent->name != NULL) &&
|
958
|
+
(parent->name[0] != 'p')) /* p, pre, param */
|
959
|
+
xmlOutputBufferWriteString(buf, "\n");
|
960
|
+
}
|
961
|
+
}
|
962
|
+
}
|
963
|
+
}
|
964
|
+
}
|
965
|
+
|
966
|
+
/**
|
967
|
+
* htmlNodeDumpOutput:
|
968
|
+
* @buf: the HTML buffer output
|
969
|
+
* @doc: the document
|
970
|
+
* @cur: the current node
|
971
|
+
* @encoding: the encoding string (unused)
|
972
|
+
*
|
973
|
+
* Dump an HTML node, recursive behaviour,children are printed too,
|
974
|
+
* and formatting returns/spaces are added.
|
975
|
+
*/
|
976
|
+
void
|
977
|
+
htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
978
|
+
xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED) {
|
979
|
+
htmlNodeDumpFormatOutput(buf, doc, cur, NULL, 1);
|
980
|
+
}
|
981
|
+
|
982
|
+
/**
|
983
|
+
* htmlDocContentDumpFormatOutput:
|
984
|
+
* @buf: the HTML buffer output
|
985
|
+
* @cur: the document
|
986
|
+
* @encoding: the encoding string (unused)
|
987
|
+
* @format: should formatting spaces been added
|
988
|
+
*
|
989
|
+
* Dump an HTML document.
|
990
|
+
*/
|
991
|
+
void
|
992
|
+
htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
|
993
|
+
const char *encoding ATTRIBUTE_UNUSED,
|
994
|
+
int format) {
|
995
|
+
htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, format);
|
996
|
+
}
|
997
|
+
|
998
|
+
/**
|
999
|
+
* htmlDocContentDumpOutput:
|
1000
|
+
* @buf: the HTML buffer output
|
1001
|
+
* @cur: the document
|
1002
|
+
* @encoding: the encoding string (unused)
|
1003
|
+
*
|
1004
|
+
* Dump an HTML document. Formatting return/spaces are added.
|
1005
|
+
*/
|
1006
|
+
void
|
1007
|
+
htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
|
1008
|
+
const char *encoding ATTRIBUTE_UNUSED) {
|
1009
|
+
htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, 1);
|
1010
|
+
}
|
1011
|
+
|
1012
|
+
/************************************************************************
|
1013
|
+
* *
|
1014
|
+
* Saving functions front-ends *
|
1015
|
+
* *
|
1016
|
+
************************************************************************/
|
1017
|
+
|
1018
|
+
/**
|
1019
|
+
* htmlDocDump:
|
1020
|
+
* @f: the FILE*
|
1021
|
+
* @cur: the document
|
1022
|
+
*
|
1023
|
+
* Dump an HTML document to an open FILE.
|
1024
|
+
*
|
1025
|
+
* returns: the number of byte written or -1 in case of failure.
|
1026
|
+
*/
|
1027
|
+
int
|
1028
|
+
htmlDocDump(FILE *f, xmlDocPtr cur) {
|
1029
|
+
xmlOutputBufferPtr buf;
|
1030
|
+
xmlCharEncodingHandlerPtr handler = NULL;
|
1031
|
+
const char *encoding;
|
1032
|
+
int ret;
|
1033
|
+
|
1034
|
+
xmlInitParser();
|
1035
|
+
|
1036
|
+
if ((cur == NULL) || (f == NULL)) {
|
1037
|
+
return(-1);
|
1038
|
+
}
|
1039
|
+
|
1040
|
+
encoding = (const char *) htmlGetMetaEncoding(cur);
|
1041
|
+
|
1042
|
+
if (encoding != NULL) {
|
1043
|
+
xmlCharEncoding enc;
|
1044
|
+
|
1045
|
+
enc = xmlParseCharEncoding(encoding);
|
1046
|
+
if (enc != XML_CHAR_ENCODING_UTF8) {
|
1047
|
+
handler = xmlFindCharEncodingHandler(encoding);
|
1048
|
+
if (handler == NULL)
|
1049
|
+
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
|
1050
|
+
}
|
1051
|
+
} else {
|
1052
|
+
/*
|
1053
|
+
* Fallback to HTML or ASCII when the encoding is unspecified
|
1054
|
+
*/
|
1055
|
+
if (handler == NULL)
|
1056
|
+
handler = xmlFindCharEncodingHandler("HTML");
|
1057
|
+
if (handler == NULL)
|
1058
|
+
handler = xmlFindCharEncodingHandler("ascii");
|
1059
|
+
}
|
1060
|
+
|
1061
|
+
buf = xmlOutputBufferCreateFile(f, handler);
|
1062
|
+
if (buf == NULL) return(-1);
|
1063
|
+
htmlDocContentDumpOutput(buf, cur, NULL);
|
1064
|
+
|
1065
|
+
ret = xmlOutputBufferClose(buf);
|
1066
|
+
return(ret);
|
1067
|
+
}
|
1068
|
+
|
1069
|
+
/**
|
1070
|
+
* htmlSaveFile:
|
1071
|
+
* @filename: the filename (or URL)
|
1072
|
+
* @cur: the document
|
1073
|
+
*
|
1074
|
+
* Dump an HTML document to a file. If @filename is "-" the stdout file is
|
1075
|
+
* used.
|
1076
|
+
* returns: the number of byte written or -1 in case of failure.
|
1077
|
+
*/
|
1078
|
+
int
|
1079
|
+
htmlSaveFile(const char *filename, xmlDocPtr cur) {
|
1080
|
+
xmlOutputBufferPtr buf;
|
1081
|
+
xmlCharEncodingHandlerPtr handler = NULL;
|
1082
|
+
const char *encoding;
|
1083
|
+
int ret;
|
1084
|
+
|
1085
|
+
if ((cur == NULL) || (filename == NULL))
|
1086
|
+
return(-1);
|
1087
|
+
|
1088
|
+
xmlInitParser();
|
1089
|
+
|
1090
|
+
encoding = (const char *) htmlGetMetaEncoding(cur);
|
1091
|
+
|
1092
|
+
if (encoding != NULL) {
|
1093
|
+
xmlCharEncoding enc;
|
1094
|
+
|
1095
|
+
enc = xmlParseCharEncoding(encoding);
|
1096
|
+
if (enc != XML_CHAR_ENCODING_UTF8) {
|
1097
|
+
handler = xmlFindCharEncodingHandler(encoding);
|
1098
|
+
if (handler == NULL)
|
1099
|
+
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
|
1100
|
+
}
|
1101
|
+
} else {
|
1102
|
+
/*
|
1103
|
+
* Fallback to HTML or ASCII when the encoding is unspecified
|
1104
|
+
*/
|
1105
|
+
if (handler == NULL)
|
1106
|
+
handler = xmlFindCharEncodingHandler("HTML");
|
1107
|
+
if (handler == NULL)
|
1108
|
+
handler = xmlFindCharEncodingHandler("ascii");
|
1109
|
+
}
|
1110
|
+
|
1111
|
+
/*
|
1112
|
+
* save the content to a temp buffer.
|
1113
|
+
*/
|
1114
|
+
buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
|
1115
|
+
if (buf == NULL) return(0);
|
1116
|
+
|
1117
|
+
htmlDocContentDumpOutput(buf, cur, NULL);
|
1118
|
+
|
1119
|
+
ret = xmlOutputBufferClose(buf);
|
1120
|
+
return(ret);
|
1121
|
+
}
|
1122
|
+
|
1123
|
+
/**
|
1124
|
+
* htmlSaveFileFormat:
|
1125
|
+
* @filename: the filename
|
1126
|
+
* @cur: the document
|
1127
|
+
* @format: should formatting spaces been added
|
1128
|
+
* @encoding: the document encoding
|
1129
|
+
*
|
1130
|
+
* Dump an HTML document to a file using a given encoding.
|
1131
|
+
*
|
1132
|
+
* returns: the number of byte written or -1 in case of failure.
|
1133
|
+
*/
|
1134
|
+
int
|
1135
|
+
htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
|
1136
|
+
const char *encoding, int format) {
|
1137
|
+
xmlOutputBufferPtr buf;
|
1138
|
+
xmlCharEncodingHandlerPtr handler = NULL;
|
1139
|
+
int ret;
|
1140
|
+
|
1141
|
+
if ((cur == NULL) || (filename == NULL))
|
1142
|
+
return(-1);
|
1143
|
+
|
1144
|
+
xmlInitParser();
|
1145
|
+
|
1146
|
+
if (encoding != NULL) {
|
1147
|
+
xmlCharEncoding enc;
|
1148
|
+
|
1149
|
+
enc = xmlParseCharEncoding(encoding);
|
1150
|
+
if (enc != XML_CHAR_ENCODING_UTF8) {
|
1151
|
+
handler = xmlFindCharEncodingHandler(encoding);
|
1152
|
+
if (handler == NULL)
|
1153
|
+
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
|
1154
|
+
}
|
1155
|
+
htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
|
1156
|
+
} else {
|
1157
|
+
htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
|
1158
|
+
|
1159
|
+
/*
|
1160
|
+
* Fallback to HTML or ASCII when the encoding is unspecified
|
1161
|
+
*/
|
1162
|
+
if (handler == NULL)
|
1163
|
+
handler = xmlFindCharEncodingHandler("HTML");
|
1164
|
+
if (handler == NULL)
|
1165
|
+
handler = xmlFindCharEncodingHandler("ascii");
|
1166
|
+
}
|
1167
|
+
|
1168
|
+
/*
|
1169
|
+
* save the content to a temp buffer.
|
1170
|
+
*/
|
1171
|
+
buf = xmlOutputBufferCreateFilename(filename, handler, 0);
|
1172
|
+
if (buf == NULL) return(0);
|
1173
|
+
|
1174
|
+
htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
|
1175
|
+
|
1176
|
+
ret = xmlOutputBufferClose(buf);
|
1177
|
+
return(ret);
|
1178
|
+
}
|
1179
|
+
|
1180
|
+
/**
|
1181
|
+
* htmlSaveFileEnc:
|
1182
|
+
* @filename: the filename
|
1183
|
+
* @cur: the document
|
1184
|
+
* @encoding: the document encoding
|
1185
|
+
*
|
1186
|
+
* Dump an HTML document to a file using a given encoding
|
1187
|
+
* and formatting returns/spaces are added.
|
1188
|
+
*
|
1189
|
+
* returns: the number of byte written or -1 in case of failure.
|
1190
|
+
*/
|
1191
|
+
int
|
1192
|
+
htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
|
1193
|
+
return(htmlSaveFileFormat(filename, cur, encoding, 1));
|
1194
|
+
}
|
1195
|
+
|
1196
|
+
#endif /* LIBXML_OUTPUT_ENABLED */
|
1197
|
+
|
1198
|
+
#define bottom_HTMLtree
|
1199
|
+
#include "elfgcchack.h"
|
1200
|
+
#endif /* LIBXML_HTML_ENABLED */
|