pocxxeci 0.30.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of pocxxeci might be problematic. Click here for more details.

Files changed (160) hide show
  1. package/LICENSE +19 -0
  2. package/Makefile +18 -0
  3. package/README.md +52 -0
  4. package/binding.gyp +81 -0
  5. package/index.d.ts +273 -0
  6. package/index.js +45 -0
  7. package/lib/bindings.js +1 -0
  8. package/lib/document.js +122 -0
  9. package/lib/element.js +82 -0
  10. package/lib/sax_parser.js +38 -0
  11. package/package.json +70 -0
  12. package/src/html_document.cc +7 -0
  13. package/src/html_document.h +18 -0
  14. package/src/libxmljs.cc +252 -0
  15. package/src/libxmljs.h +53 -0
  16. package/src/xml_attribute.cc +173 -0
  17. package/src/xml_attribute.h +40 -0
  18. package/src/xml_comment.cc +117 -0
  19. package/src/xml_comment.h +30 -0
  20. package/src/xml_document.cc +810 -0
  21. package/src/xml_document.h +67 -0
  22. package/src/xml_element.cc +565 -0
  23. package/src/xml_element.h +61 -0
  24. package/src/xml_namespace.cc +158 -0
  25. package/src/xml_namespace.h +39 -0
  26. package/src/xml_node.cc +761 -0
  27. package/src/xml_node.h +73 -0
  28. package/src/xml_pi.cc +161 -0
  29. package/src/xml_pi.h +34 -0
  30. package/src/xml_sax_parser.cc +424 -0
  31. package/src/xml_sax_parser.h +73 -0
  32. package/src/xml_syntax_error.cc +66 -0
  33. package/src/xml_syntax_error.h +25 -0
  34. package/src/xml_text.cc +320 -0
  35. package/src/xml_text.h +48 -0
  36. package/src/xml_textwriter.cc +315 -0
  37. package/src/xml_textwriter.h +62 -0
  38. package/src/xml_xpath_context.cc +70 -0
  39. package/src/xml_xpath_context.h +23 -0
  40. package/vendor/libxml/Copyright +23 -0
  41. package/vendor/libxml/DOCBparser.c +305 -0
  42. package/vendor/libxml/HTMLparser.c +7287 -0
  43. package/vendor/libxml/HTMLtree.c +1200 -0
  44. package/vendor/libxml/Makefile +2983 -0
  45. package/vendor/libxml/SAX.c +180 -0
  46. package/vendor/libxml/SAX2.c +3036 -0
  47. package/vendor/libxml/buf.c +1351 -0
  48. package/vendor/libxml/buf.h +72 -0
  49. package/vendor/libxml/c14n.c +2234 -0
  50. package/vendor/libxml/catalog.c +3828 -0
  51. package/vendor/libxml/chvalid.c +336 -0
  52. package/vendor/libxml/config.h +294 -0
  53. package/vendor/libxml/config.h.gch +0 -0
  54. package/vendor/libxml/debugXML.c +3423 -0
  55. package/vendor/libxml/dict.c +1298 -0
  56. package/vendor/libxml/elfgcchack.h +17818 -0
  57. package/vendor/libxml/enc.h +32 -0
  58. package/vendor/libxml/encoding.c +3975 -0
  59. package/vendor/libxml/entities.c +1163 -0
  60. package/vendor/libxml/error.c +998 -0
  61. package/vendor/libxml/globals.c +1126 -0
  62. package/vendor/libxml/hash.c +1146 -0
  63. package/vendor/libxml/include/libxml/DOCBparser.h +96 -0
  64. package/vendor/libxml/include/libxml/HTMLparser.h +306 -0
  65. package/vendor/libxml/include/libxml/HTMLtree.h +147 -0
  66. package/vendor/libxml/include/libxml/Makefile +725 -0
  67. package/vendor/libxml/include/libxml/Makefile.am +54 -0
  68. package/vendor/libxml/include/libxml/Makefile.in +725 -0
  69. package/vendor/libxml/include/libxml/SAX.h +173 -0
  70. package/vendor/libxml/include/libxml/SAX2.h +178 -0
  71. package/vendor/libxml/include/libxml/c14n.h +128 -0
  72. package/vendor/libxml/include/libxml/catalog.h +182 -0
  73. package/vendor/libxml/include/libxml/chvalid.h +230 -0
  74. package/vendor/libxml/include/libxml/debugXML.h +217 -0
  75. package/vendor/libxml/include/libxml/dict.h +79 -0
  76. package/vendor/libxml/include/libxml/encoding.h +245 -0
  77. package/vendor/libxml/include/libxml/entities.h +151 -0
  78. package/vendor/libxml/include/libxml/globals.h +508 -0
  79. package/vendor/libxml/include/libxml/hash.h +236 -0
  80. package/vendor/libxml/include/libxml/list.h +137 -0
  81. package/vendor/libxml/include/libxml/nanoftp.h +163 -0
  82. package/vendor/libxml/include/libxml/nanohttp.h +81 -0
  83. package/vendor/libxml/include/libxml/parser.h +1243 -0
  84. package/vendor/libxml/include/libxml/parserInternals.h +644 -0
  85. package/vendor/libxml/include/libxml/pattern.h +100 -0
  86. package/vendor/libxml/include/libxml/relaxng.h +217 -0
  87. package/vendor/libxml/include/libxml/schemasInternals.h +958 -0
  88. package/vendor/libxml/include/libxml/schematron.h +142 -0
  89. package/vendor/libxml/include/libxml/threads.h +89 -0
  90. package/vendor/libxml/include/libxml/tree.h +1311 -0
  91. package/vendor/libxml/include/libxml/uri.h +94 -0
  92. package/vendor/libxml/include/libxml/valid.h +458 -0
  93. package/vendor/libxml/include/libxml/xinclude.h +129 -0
  94. package/vendor/libxml/include/libxml/xlink.h +189 -0
  95. package/vendor/libxml/include/libxml/xmlIO.h +368 -0
  96. package/vendor/libxml/include/libxml/xmlautomata.h +146 -0
  97. package/vendor/libxml/include/libxml/xmlerror.h +945 -0
  98. package/vendor/libxml/include/libxml/xmlexports.h +77 -0
  99. package/vendor/libxml/include/libxml/xmlmemory.h +224 -0
  100. package/vendor/libxml/include/libxml/xmlmodule.h +57 -0
  101. package/vendor/libxml/include/libxml/xmlreader.h +428 -0
  102. package/vendor/libxml/include/libxml/xmlregexp.h +222 -0
  103. package/vendor/libxml/include/libxml/xmlsave.h +88 -0
  104. package/vendor/libxml/include/libxml/xmlschemas.h +246 -0
  105. package/vendor/libxml/include/libxml/xmlschemastypes.h +151 -0
  106. package/vendor/libxml/include/libxml/xmlstring.h +140 -0
  107. package/vendor/libxml/include/libxml/xmlunicode.h +202 -0
  108. package/vendor/libxml/include/libxml/xmlversion.h +484 -0
  109. package/vendor/libxml/include/libxml/xmlwin32version.h +239 -0
  110. package/vendor/libxml/include/libxml/xmlwriter.h +488 -0
  111. package/vendor/libxml/include/libxml/xpath.h +564 -0
  112. package/vendor/libxml/include/libxml/xpathInternals.h +632 -0
  113. package/vendor/libxml/include/libxml/xpointer.h +114 -0
  114. package/vendor/libxml/include/win32config.h +122 -0
  115. package/vendor/libxml/include/wsockcompat.h +54 -0
  116. package/vendor/libxml/legacy.c +1343 -0
  117. package/vendor/libxml/libxml.h +134 -0
  118. package/vendor/libxml/list.c +779 -0
  119. package/vendor/libxml/nanoftp.c +2118 -0
  120. package/vendor/libxml/nanohttp.c +1899 -0
  121. package/vendor/libxml/parser.c +15553 -0
  122. package/vendor/libxml/parserInternals.c +2164 -0
  123. package/vendor/libxml/pattern.c +2621 -0
  124. package/vendor/libxml/relaxng.c +11101 -0
  125. package/vendor/libxml/rngparser.c +1595 -0
  126. package/vendor/libxml/runsuite.c +1157 -0
  127. package/vendor/libxml/save.h +36 -0
  128. package/vendor/libxml/schematron.c +1787 -0
  129. package/vendor/libxml/threads.c +1049 -0
  130. package/vendor/libxml/timsort.h +601 -0
  131. package/vendor/libxml/tree.c +10183 -0
  132. package/vendor/libxml/trio.c +6895 -0
  133. package/vendor/libxml/trio.h +230 -0
  134. package/vendor/libxml/triodef.h +228 -0
  135. package/vendor/libxml/trionan.c +914 -0
  136. package/vendor/libxml/trionan.h +84 -0
  137. package/vendor/libxml/triop.h +150 -0
  138. package/vendor/libxml/triostr.c +2112 -0
  139. package/vendor/libxml/triostr.h +144 -0
  140. package/vendor/libxml/uri.c +2561 -0
  141. package/vendor/libxml/valid.c +7138 -0
  142. package/vendor/libxml/xinclude.c +2657 -0
  143. package/vendor/libxml/xlink.c +183 -0
  144. package/vendor/libxml/xmlIO.c +4135 -0
  145. package/vendor/libxml/xmlcatalog.c +624 -0
  146. package/vendor/libxml/xmllint.c +3796 -0
  147. package/vendor/libxml/xmlmemory.c +1163 -0
  148. package/vendor/libxml/xmlmodule.c +468 -0
  149. package/vendor/libxml/xmlreader.c +6033 -0
  150. package/vendor/libxml/xmlregexp.c +8271 -0
  151. package/vendor/libxml/xmlsave.c +2735 -0
  152. package/vendor/libxml/xmlschemas.c +29173 -0
  153. package/vendor/libxml/xmlschemastypes.c +6276 -0
  154. package/vendor/libxml/xmlstring.c +1050 -0
  155. package/vendor/libxml/xmlunicode.c +3179 -0
  156. package/vendor/libxml/xmlwriter.c +4738 -0
  157. package/vendor/libxml/xpath.c +14734 -0
  158. package/vendor/libxml/xpointer.c +2969 -0
  159. package/vendor/libxml/xzlib.c +815 -0
  160. package/vendor/libxml/xzlib.h +19 -0
@@ -0,0 +1,1200 @@
1
+ /*
2
+ * HTMLtree.c : implementation of access function for an HTML tree.
3
+ *
4
+ * See Copyright for the status of this software.
5
+ *
6
+ * daniel@veillard.com
7
+ */
8
+
9
+
10
+ #define IN_LIBXML
11
+ #include "libxml.h"
12
+ #ifdef LIBXML_HTML_ENABLED
13
+
14
+ #include <string.h> /* for memset() only ! */
15
+
16
+ #ifdef HAVE_CTYPE_H
17
+ #include <ctype.h>
18
+ #endif
19
+ #ifdef HAVE_STDLIB_H
20
+ #include <stdlib.h>
21
+ #endif
22
+
23
+ #include <libxml/xmlmemory.h>
24
+ #include <libxml/HTMLparser.h>
25
+ #include <libxml/HTMLtree.h>
26
+ #include <libxml/entities.h>
27
+ #include <libxml/valid.h>
28
+ #include <libxml/xmlerror.h>
29
+ #include <libxml/parserInternals.h>
30
+ #include <libxml/globals.h>
31
+ #include <libxml/uri.h>
32
+
33
+ #include "buf.h"
34
+
35
+ /************************************************************************
36
+ * *
37
+ * Getting/Setting encoding meta tags *
38
+ * *
39
+ ************************************************************************/
40
+
41
+ /**
42
+ * htmlGetMetaEncoding:
43
+ * @doc: the document
44
+ *
45
+ * Encoding definition lookup in the Meta tags
46
+ *
47
+ * Returns the current encoding as flagged in the HTML source
48
+ */
49
+ const xmlChar *
50
+ htmlGetMetaEncoding(htmlDocPtr doc) {
51
+ htmlNodePtr cur;
52
+ const xmlChar *content;
53
+ const xmlChar *encoding;
54
+
55
+ if (doc == NULL)
56
+ return(NULL);
57
+ cur = doc->children;
58
+
59
+ /*
60
+ * Search the html
61
+ */
62
+ while (cur != NULL) {
63
+ if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
64
+ if (xmlStrEqual(cur->name, BAD_CAST"html"))
65
+ break;
66
+ if (xmlStrEqual(cur->name, BAD_CAST"head"))
67
+ goto found_head;
68
+ if (xmlStrEqual(cur->name, BAD_CAST"meta"))
69
+ goto found_meta;
70
+ }
71
+ cur = cur->next;
72
+ }
73
+ if (cur == NULL)
74
+ return(NULL);
75
+ cur = cur->children;
76
+
77
+ /*
78
+ * Search the head
79
+ */
80
+ while (cur != NULL) {
81
+ if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
82
+ if (xmlStrEqual(cur->name, BAD_CAST"head"))
83
+ break;
84
+ if (xmlStrEqual(cur->name, BAD_CAST"meta"))
85
+ goto found_meta;
86
+ }
87
+ cur = cur->next;
88
+ }
89
+ if (cur == NULL)
90
+ return(NULL);
91
+ found_head:
92
+ cur = cur->children;
93
+
94
+ /*
95
+ * Search the meta elements
96
+ */
97
+ found_meta:
98
+ while (cur != NULL) {
99
+ if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
100
+ if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
101
+ xmlAttrPtr attr = cur->properties;
102
+ int http;
103
+ const xmlChar *value;
104
+
105
+ content = NULL;
106
+ http = 0;
107
+ while (attr != NULL) {
108
+ if ((attr->children != NULL) &&
109
+ (attr->children->type == XML_TEXT_NODE) &&
110
+ (attr->children->next == NULL)) {
111
+ value = attr->children->content;
112
+ if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
113
+ && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
114
+ http = 1;
115
+ else if ((value != NULL)
116
+ && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
117
+ content = value;
118
+ if ((http != 0) && (content != NULL))
119
+ goto found_content;
120
+ }
121
+ attr = attr->next;
122
+ }
123
+ }
124
+ }
125
+ cur = cur->next;
126
+ }
127
+ return(NULL);
128
+
129
+ found_content:
130
+ encoding = xmlStrstr(content, BAD_CAST"charset=");
131
+ if (encoding == NULL)
132
+ encoding = xmlStrstr(content, BAD_CAST"Charset=");
133
+ if (encoding == NULL)
134
+ encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
135
+ if (encoding != NULL) {
136
+ encoding += 8;
137
+ } else {
138
+ encoding = xmlStrstr(content, BAD_CAST"charset =");
139
+ if (encoding == NULL)
140
+ encoding = xmlStrstr(content, BAD_CAST"Charset =");
141
+ if (encoding == NULL)
142
+ encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
143
+ if (encoding != NULL)
144
+ encoding += 9;
145
+ }
146
+ if (encoding != NULL) {
147
+ while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
148
+ }
149
+ return(encoding);
150
+ }
151
+
152
+ /**
153
+ * htmlSetMetaEncoding:
154
+ * @doc: the document
155
+ * @encoding: the encoding string
156
+ *
157
+ * Sets the current encoding in the Meta tags
158
+ * NOTE: this will not change the document content encoding, just
159
+ * the META flag associated.
160
+ *
161
+ * Returns 0 in case of success and -1 in case of error
162
+ */
163
+ int
164
+ htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
165
+ htmlNodePtr cur, meta = NULL, head = NULL;
166
+ const xmlChar *content = NULL;
167
+ char newcontent[100];
168
+
169
+ newcontent[0] = 0;
170
+
171
+ if (doc == NULL)
172
+ return(-1);
173
+
174
+ /* html isn't a real encoding it's just libxml2 way to get entities */
175
+ if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
176
+ return(-1);
177
+
178
+ if (encoding != NULL) {
179
+ snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
180
+ (char *)encoding);
181
+ newcontent[sizeof(newcontent) - 1] = 0;
182
+ }
183
+
184
+ cur = doc->children;
185
+
186
+ /*
187
+ * Search the html
188
+ */
189
+ while (cur != NULL) {
190
+ if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
191
+ if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
192
+ break;
193
+ if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
194
+ goto found_head;
195
+ if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
196
+ goto found_meta;
197
+ }
198
+ cur = cur->next;
199
+ }
200
+ if (cur == NULL)
201
+ return(-1);
202
+ cur = cur->children;
203
+
204
+ /*
205
+ * Search the head
206
+ */
207
+ while (cur != NULL) {
208
+ if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
209
+ if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
210
+ break;
211
+ if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
212
+ head = cur->parent;
213
+ goto found_meta;
214
+ }
215
+ }
216
+ cur = cur->next;
217
+ }
218
+ if (cur == NULL)
219
+ return(-1);
220
+ found_head:
221
+ head = cur;
222
+ if (cur->children == NULL)
223
+ goto create;
224
+ cur = cur->children;
225
+
226
+ found_meta:
227
+ /*
228
+ * Search and update all the remaining the meta elements carrying
229
+ * encoding information
230
+ */
231
+ while (cur != NULL) {
232
+ if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
233
+ if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
234
+ xmlAttrPtr attr = cur->properties;
235
+ int http;
236
+ const xmlChar *value;
237
+
238
+ content = NULL;
239
+ http = 0;
240
+ while (attr != NULL) {
241
+ if ((attr->children != NULL) &&
242
+ (attr->children->type == XML_TEXT_NODE) &&
243
+ (attr->children->next == NULL)) {
244
+ value = attr->children->content;
245
+ if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
246
+ && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
247
+ http = 1;
248
+ else
249
+ {
250
+ if ((value != NULL) &&
251
+ (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
252
+ content = value;
253
+ }
254
+ if ((http != 0) && (content != NULL))
255
+ break;
256
+ }
257
+ attr = attr->next;
258
+ }
259
+ if ((http != 0) && (content != NULL)) {
260
+ meta = cur;
261
+ break;
262
+ }
263
+
264
+ }
265
+ }
266
+ cur = cur->next;
267
+ }
268
+ create:
269
+ if (meta == NULL) {
270
+ if ((encoding != NULL) && (head != NULL)) {
271
+ /*
272
+ * Create a new Meta element with the right attributes
273
+ */
274
+
275
+ meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
276
+ if (head->children == NULL)
277
+ xmlAddChild(head, meta);
278
+ else
279
+ xmlAddPrevSibling(head->children, meta);
280
+ xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
281
+ xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
282
+ }
283
+ } else {
284
+ /* remove the meta tag if NULL is passed */
285
+ if (encoding == NULL) {
286
+ xmlUnlinkNode(meta);
287
+ xmlFreeNode(meta);
288
+ }
289
+ /* change the document only if there is a real encoding change */
290
+ else if (xmlStrcasestr(content, encoding) == NULL) {
291
+ xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
292
+ }
293
+ }
294
+
295
+
296
+ return(0);
297
+ }
298
+
299
+ /**
300
+ * booleanHTMLAttrs:
301
+ *
302
+ * These are the HTML attributes which will be output
303
+ * in minimized form, i.e. <option selected="selected"> will be
304
+ * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
305
+ *
306
+ */
307
+ static const char* htmlBooleanAttrs[] = {
308
+ "checked", "compact", "declare", "defer", "disabled", "ismap",
309
+ "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
310
+ "selected", NULL
311
+ };
312
+
313
+
314
+ /**
315
+ * htmlIsBooleanAttr:
316
+ * @name: the name of the attribute to check
317
+ *
318
+ * Determine if a given attribute is a boolean attribute.
319
+ *
320
+ * returns: false if the attribute is not boolean, true otherwise.
321
+ */
322
+ int
323
+ htmlIsBooleanAttr(const xmlChar *name)
324
+ {
325
+ int i = 0;
326
+
327
+ while (htmlBooleanAttrs[i] != NULL) {
328
+ if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
329
+ return 1;
330
+ i++;
331
+ }
332
+ return 0;
333
+ }
334
+
335
+ #ifdef LIBXML_OUTPUT_ENABLED
336
+ /*
337
+ * private routine exported from xmlIO.c
338
+ */
339
+ xmlOutputBufferPtr
340
+ xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
341
+ /************************************************************************
342
+ * *
343
+ * Output error handlers *
344
+ * *
345
+ ************************************************************************/
346
+ /**
347
+ * htmlSaveErrMemory:
348
+ * @extra: extra information
349
+ *
350
+ * Handle an out of memory condition
351
+ */
352
+ static void
353
+ htmlSaveErrMemory(const char *extra)
354
+ {
355
+ __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
356
+ }
357
+
358
+ /**
359
+ * htmlSaveErr:
360
+ * @code: the error number
361
+ * @node: the location of the error.
362
+ * @extra: extra information
363
+ *
364
+ * Handle an out of memory condition
365
+ */
366
+ static void
367
+ htmlSaveErr(int code, xmlNodePtr node, const char *extra)
368
+ {
369
+ const char *msg = NULL;
370
+
371
+ switch(code) {
372
+ case XML_SAVE_NOT_UTF8:
373
+ msg = "string is not in UTF-8\n";
374
+ break;
375
+ case XML_SAVE_CHAR_INVALID:
376
+ msg = "invalid character value\n";
377
+ break;
378
+ case XML_SAVE_UNKNOWN_ENCODING:
379
+ msg = "unknown encoding %s\n";
380
+ break;
381
+ case XML_SAVE_NO_DOCTYPE:
382
+ msg = "HTML has no DOCTYPE\n";
383
+ break;
384
+ default:
385
+ msg = "unexpected error number\n";
386
+ }
387
+ __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
388
+ }
389
+
390
+ /************************************************************************
391
+ * *
392
+ * Dumping HTML tree content to a simple buffer *
393
+ * *
394
+ ************************************************************************/
395
+
396
+ /**
397
+ * htmlBufNodeDumpFormat:
398
+ * @buf: the xmlBufPtr output
399
+ * @doc: the document
400
+ * @cur: the current node
401
+ * @format: should formatting spaces been added
402
+ *
403
+ * Dump an HTML node, recursive behaviour,children are printed too.
404
+ *
405
+ * Returns the number of byte written or -1 in case of error
406
+ */
407
+ static size_t
408
+ htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
409
+ int format) {
410
+ size_t use;
411
+ int ret;
412
+ xmlOutputBufferPtr outbuf;
413
+
414
+ if (cur == NULL) {
415
+ return (-1);
416
+ }
417
+ if (buf == NULL) {
418
+ return (-1);
419
+ }
420
+ outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
421
+ if (outbuf == NULL) {
422
+ htmlSaveErrMemory("allocating HTML output buffer");
423
+ return (-1);
424
+ }
425
+ memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
426
+ outbuf->buffer = buf;
427
+ outbuf->encoder = NULL;
428
+ outbuf->writecallback = NULL;
429
+ outbuf->closecallback = NULL;
430
+ outbuf->context = NULL;
431
+ outbuf->written = 0;
432
+
433
+ use = xmlBufUse(buf);
434
+ htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
435
+ xmlFree(outbuf);
436
+ ret = xmlBufUse(buf) - use;
437
+ return (ret);
438
+ }
439
+
440
+ /**
441
+ * htmlNodeDump:
442
+ * @buf: the HTML buffer output
443
+ * @doc: the document
444
+ * @cur: the current node
445
+ *
446
+ * Dump an HTML node, recursive behaviour,children are printed too,
447
+ * and formatting returns are added.
448
+ *
449
+ * Returns the number of byte written or -1 in case of error
450
+ */
451
+ int
452
+ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
453
+ xmlBufPtr buffer;
454
+ size_t ret;
455
+
456
+ if ((buf == NULL) || (cur == NULL))
457
+ return(-1);
458
+
459
+ xmlInitParser();
460
+ buffer = xmlBufFromBuffer(buf);
461
+ if (buffer == NULL)
462
+ return(-1);
463
+
464
+ ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
465
+
466
+ xmlBufBackToBuffer(buffer);
467
+
468
+ if (ret > INT_MAX)
469
+ return(-1);
470
+ return((int) ret);
471
+ }
472
+
473
+ /**
474
+ * htmlNodeDumpFileFormat:
475
+ * @out: the FILE pointer
476
+ * @doc: the document
477
+ * @cur: the current node
478
+ * @encoding: the document encoding
479
+ * @format: should formatting spaces been added
480
+ *
481
+ * Dump an HTML node, recursive behaviour,children are printed too.
482
+ *
483
+ * TODO: if encoding == NULL try to save in the doc encoding
484
+ *
485
+ * returns: the number of byte written or -1 in case of failure.
486
+ */
487
+ int
488
+ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
489
+ xmlNodePtr cur, const char *encoding, int format) {
490
+ xmlOutputBufferPtr buf;
491
+ xmlCharEncodingHandlerPtr handler = NULL;
492
+ int ret;
493
+
494
+ xmlInitParser();
495
+
496
+ if (encoding != NULL) {
497
+ xmlCharEncoding enc;
498
+
499
+ enc = xmlParseCharEncoding(encoding);
500
+ if (enc != XML_CHAR_ENCODING_UTF8) {
501
+ handler = xmlFindCharEncodingHandler(encoding);
502
+ if (handler == NULL)
503
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
504
+ }
505
+ } else {
506
+ /*
507
+ * Fallback to HTML or ASCII when the encoding is unspecified
508
+ */
509
+ if (handler == NULL)
510
+ handler = xmlFindCharEncodingHandler("HTML");
511
+ if (handler == NULL)
512
+ handler = xmlFindCharEncodingHandler("ascii");
513
+ }
514
+
515
+ /*
516
+ * save the content to a temp buffer.
517
+ */
518
+ buf = xmlOutputBufferCreateFile(out, handler);
519
+ if (buf == NULL) return(0);
520
+
521
+ htmlNodeDumpFormatOutput(buf, doc, cur, NULL, format);
522
+
523
+ ret = xmlOutputBufferClose(buf);
524
+ return(ret);
525
+ }
526
+
527
+ /**
528
+ * htmlNodeDumpFile:
529
+ * @out: the FILE pointer
530
+ * @doc: the document
531
+ * @cur: the current node
532
+ *
533
+ * Dump an HTML node, recursive behaviour,children are printed too,
534
+ * and formatting returns are added.
535
+ */
536
+ void
537
+ htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
538
+ htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
539
+ }
540
+
541
+ /**
542
+ * htmlDocDumpMemoryFormat:
543
+ * @cur: the document
544
+ * @mem: OUT: the memory pointer
545
+ * @size: OUT: the memory length
546
+ * @format: should formatting spaces been added
547
+ *
548
+ * Dump an HTML document in memory and return the xmlChar * and it's size.
549
+ * It's up to the caller to free the memory.
550
+ */
551
+ void
552
+ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
553
+ xmlOutputBufferPtr buf;
554
+ xmlCharEncodingHandlerPtr handler = NULL;
555
+ const char *encoding;
556
+
557
+ xmlInitParser();
558
+
559
+ if ((mem == NULL) || (size == NULL))
560
+ return;
561
+ if (cur == NULL) {
562
+ *mem = NULL;
563
+ *size = 0;
564
+ return;
565
+ }
566
+
567
+ encoding = (const char *) htmlGetMetaEncoding(cur);
568
+
569
+ if (encoding != NULL) {
570
+ xmlCharEncoding enc;
571
+
572
+ enc = xmlParseCharEncoding(encoding);
573
+ if (enc != XML_CHAR_ENCODING_UTF8) {
574
+ handler = xmlFindCharEncodingHandler(encoding);
575
+ if (handler == NULL)
576
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
577
+
578
+ }
579
+ } else {
580
+ /*
581
+ * Fallback to HTML or ASCII when the encoding is unspecified
582
+ */
583
+ if (handler == NULL)
584
+ handler = xmlFindCharEncodingHandler("HTML");
585
+ if (handler == NULL)
586
+ handler = xmlFindCharEncodingHandler("ascii");
587
+ }
588
+
589
+ buf = xmlAllocOutputBufferInternal(handler);
590
+ if (buf == NULL) {
591
+ *mem = NULL;
592
+ *size = 0;
593
+ return;
594
+ }
595
+
596
+ htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
597
+
598
+ xmlOutputBufferFlush(buf);
599
+ if (buf->conv != NULL) {
600
+ *size = xmlBufUse(buf->conv);
601
+ *mem = xmlStrndup(xmlBufContent(buf->conv), *size);
602
+ } else {
603
+ *size = xmlBufUse(buf->buffer);
604
+ *mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
605
+ }
606
+ (void)xmlOutputBufferClose(buf);
607
+ }
608
+
609
+ /**
610
+ * htmlDocDumpMemory:
611
+ * @cur: the document
612
+ * @mem: OUT: the memory pointer
613
+ * @size: OUT: the memory length
614
+ *
615
+ * Dump an HTML document in memory and return the xmlChar * and it's size.
616
+ * It's up to the caller to free the memory.
617
+ */
618
+ void
619
+ htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
620
+ htmlDocDumpMemoryFormat(cur, mem, size, 1);
621
+ }
622
+
623
+
624
+ /************************************************************************
625
+ * *
626
+ * Dumping HTML tree content to an I/O output buffer *
627
+ * *
628
+ ************************************************************************/
629
+
630
+ void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
631
+
632
+ /**
633
+ * htmlDtdDumpOutput:
634
+ * @buf: the HTML buffer output
635
+ * @doc: the document
636
+ * @encoding: the encoding string
637
+ *
638
+ * TODO: check whether encoding is needed
639
+ *
640
+ * Dump the HTML document DTD, if any.
641
+ */
642
+ static void
643
+ htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
644
+ const char *encoding ATTRIBUTE_UNUSED) {
645
+ xmlDtdPtr cur = doc->intSubset;
646
+
647
+ if (cur == NULL) {
648
+ htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
649
+ return;
650
+ }
651
+ xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
652
+ xmlOutputBufferWriteString(buf, (const char *)cur->name);
653
+ if (cur->ExternalID != NULL) {
654
+ xmlOutputBufferWriteString(buf, " PUBLIC ");
655
+ xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
656
+ if (cur->SystemID != NULL) {
657
+ xmlOutputBufferWriteString(buf, " ");
658
+ xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
659
+ }
660
+ } else if (cur->SystemID != NULL &&
661
+ xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
662
+ xmlOutputBufferWriteString(buf, " SYSTEM ");
663
+ xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
664
+ }
665
+ xmlOutputBufferWriteString(buf, ">\n");
666
+ }
667
+
668
+ /**
669
+ * htmlAttrDumpOutput:
670
+ * @buf: the HTML buffer output
671
+ * @doc: the document
672
+ * @cur: the attribute pointer
673
+ *
674
+ * Dump an HTML attribute
675
+ */
676
+ static void
677
+ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
678
+ xmlChar *value;
679
+
680
+ /*
681
+ * The html output method should not escape a & character
682
+ * occurring in an attribute value immediately followed by
683
+ * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
684
+ * This is implemented in xmlEncodeEntitiesReentrant
685
+ */
686
+
687
+ if (cur == NULL) {
688
+ return;
689
+ }
690
+ xmlOutputBufferWriteString(buf, " ");
691
+ if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
692
+ xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
693
+ xmlOutputBufferWriteString(buf, ":");
694
+ }
695
+ xmlOutputBufferWriteString(buf, (const char *)cur->name);
696
+ if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
697
+ value = xmlNodeListGetString(doc, cur->children, 0);
698
+ if (value) {
699
+ xmlOutputBufferWriteString(buf, "=");
700
+ if ((cur->ns == NULL) && (cur->parent != NULL) &&
701
+ (cur->parent->ns == NULL) &&
702
+ ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
703
+ (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
704
+ (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
705
+ ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
706
+ (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
707
+ xmlChar *escaped;
708
+ xmlChar *tmp = value;
709
+
710
+ while (IS_BLANK_CH(*tmp)) tmp++;
711
+
712
+ /*
713
+ * the < and > have already been escaped at the entity level
714
+ * And doing so here breaks server side includes
715
+ */
716
+ escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>");
717
+ if (escaped != NULL) {
718
+ xmlBufWriteQuotedString(buf->buffer, escaped);
719
+ xmlFree(escaped);
720
+ } else {
721
+ xmlBufWriteQuotedString(buf->buffer, value);
722
+ }
723
+ } else {
724
+ xmlBufWriteQuotedString(buf->buffer, value);
725
+ }
726
+ xmlFree(value);
727
+ } else {
728
+ xmlOutputBufferWriteString(buf, "=\"\"");
729
+ }
730
+ }
731
+ }
732
+
733
+ /**
734
+ * htmlNodeDumpFormatOutput:
735
+ * @buf: the HTML buffer output
736
+ * @doc: the document
737
+ * @cur: the current node
738
+ * @encoding: the encoding string (unused)
739
+ * @format: should formatting spaces been added
740
+ *
741
+ * Dump an HTML node, recursive behaviour,children are printed too.
742
+ */
743
+ void
744
+ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
745
+ xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
746
+ int format) {
747
+ xmlNodePtr root, parent;
748
+ xmlAttrPtr attr;
749
+ const htmlElemDesc * info;
750
+
751
+ xmlInitParser();
752
+
753
+ if ((cur == NULL) || (buf == NULL)) {
754
+ return;
755
+ }
756
+
757
+ root = cur;
758
+ parent = cur->parent;
759
+ while (1) {
760
+ switch (cur->type) {
761
+ case XML_HTML_DOCUMENT_NODE:
762
+ case XML_DOCUMENT_NODE:
763
+ if (((xmlDocPtr) cur)->intSubset != NULL) {
764
+ htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
765
+ }
766
+ if (cur->children != NULL) {
767
+ /* Always validate cur->parent when descending. */
768
+ if (cur->parent == parent) {
769
+ parent = cur;
770
+ cur = cur->children;
771
+ continue;
772
+ }
773
+ } else {
774
+ xmlOutputBufferWriteString(buf, "\n");
775
+ }
776
+ break;
777
+
778
+ case XML_ELEMENT_NODE:
779
+ /*
780
+ * Some users like lxml are known to pass nodes with a corrupted
781
+ * tree structure. Fall back to a recursive call to handle this
782
+ * case.
783
+ */
784
+ if ((cur->parent != parent) && (cur->children != NULL)) {
785
+ htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
786
+ break;
787
+ }
788
+
789
+ /*
790
+ * Get specific HTML info for that node.
791
+ */
792
+ if (cur->ns == NULL)
793
+ info = htmlTagLookup(cur->name);
794
+ else
795
+ info = NULL;
796
+
797
+ xmlOutputBufferWriteString(buf, "<");
798
+ if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
799
+ xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
800
+ xmlOutputBufferWriteString(buf, ":");
801
+ }
802
+ xmlOutputBufferWriteString(buf, (const char *)cur->name);
803
+ if (cur->nsDef)
804
+ xmlNsListDumpOutput(buf, cur->nsDef);
805
+ attr = cur->properties;
806
+ while (attr != NULL) {
807
+ htmlAttrDumpOutput(buf, doc, attr);
808
+ attr = attr->next;
809
+ }
810
+
811
+ if ((info != NULL) && (info->empty)) {
812
+ xmlOutputBufferWriteString(buf, ">");
813
+ } else if (cur->children == NULL) {
814
+ if ((info != NULL) && (info->saveEndTag != 0) &&
815
+ (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
816
+ (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
817
+ xmlOutputBufferWriteString(buf, ">");
818
+ } else {
819
+ xmlOutputBufferWriteString(buf, "></");
820
+ if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
821
+ xmlOutputBufferWriteString(buf,
822
+ (const char *)cur->ns->prefix);
823
+ xmlOutputBufferWriteString(buf, ":");
824
+ }
825
+ xmlOutputBufferWriteString(buf, (const char *)cur->name);
826
+ xmlOutputBufferWriteString(buf, ">");
827
+ }
828
+ } else {
829
+ xmlOutputBufferWriteString(buf, ">");
830
+ if ((format) && (info != NULL) && (!info->isinline) &&
831
+ (cur->children->type != HTML_TEXT_NODE) &&
832
+ (cur->children->type != HTML_ENTITY_REF_NODE) &&
833
+ (cur->children != cur->last) &&
834
+ (cur->name != NULL) &&
835
+ (cur->name[0] != 'p')) /* p, pre, param */
836
+ xmlOutputBufferWriteString(buf, "\n");
837
+ parent = cur;
838
+ cur = cur->children;
839
+ continue;
840
+ }
841
+
842
+ if ((format) && (cur->next != NULL) &&
843
+ (info != NULL) && (!info->isinline)) {
844
+ if ((cur->next->type != HTML_TEXT_NODE) &&
845
+ (cur->next->type != HTML_ENTITY_REF_NODE) &&
846
+ (parent != NULL) &&
847
+ (parent->name != NULL) &&
848
+ (parent->name[0] != 'p')) /* p, pre, param */
849
+ xmlOutputBufferWriteString(buf, "\n");
850
+ }
851
+
852
+ break;
853
+
854
+ case XML_ATTRIBUTE_NODE:
855
+ htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur);
856
+ break;
857
+
858
+ case HTML_TEXT_NODE:
859
+ if (cur->content == NULL)
860
+ break;
861
+ if (((cur->name == (const xmlChar *)xmlStringText) ||
862
+ (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
863
+ ((parent == NULL) ||
864
+ ((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
865
+ (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
866
+ xmlChar *buffer;
867
+
868
+ buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
869
+ if (buffer != NULL) {
870
+ xmlOutputBufferWriteString(buf, (const char *)buffer);
871
+ xmlFree(buffer);
872
+ }
873
+ } else {
874
+ xmlOutputBufferWriteString(buf, (const char *)cur->content);
875
+ }
876
+ break;
877
+
878
+ case HTML_COMMENT_NODE:
879
+ if (cur->content != NULL) {
880
+ xmlOutputBufferWriteString(buf, "<!--");
881
+ xmlOutputBufferWriteString(buf, (const char *)cur->content);
882
+ xmlOutputBufferWriteString(buf, "-->");
883
+ }
884
+ break;
885
+
886
+ case HTML_PI_NODE:
887
+ if (cur->name != NULL) {
888
+ xmlOutputBufferWriteString(buf, "<?");
889
+ xmlOutputBufferWriteString(buf, (const char *)cur->name);
890
+ if (cur->content != NULL) {
891
+ xmlOutputBufferWriteString(buf, " ");
892
+ xmlOutputBufferWriteString(buf,
893
+ (const char *)cur->content);
894
+ }
895
+ xmlOutputBufferWriteString(buf, ">");
896
+ }
897
+ break;
898
+
899
+ case HTML_ENTITY_REF_NODE:
900
+ xmlOutputBufferWriteString(buf, "&");
901
+ xmlOutputBufferWriteString(buf, (const char *)cur->name);
902
+ xmlOutputBufferWriteString(buf, ";");
903
+ break;
904
+
905
+ case HTML_PRESERVE_NODE:
906
+ if (cur->content != NULL) {
907
+ xmlOutputBufferWriteString(buf, (const char *)cur->content);
908
+ }
909
+ break;
910
+
911
+ default:
912
+ break;
913
+ }
914
+
915
+ while (1) {
916
+ if (cur == root)
917
+ return;
918
+ if (cur->next != NULL) {
919
+ cur = cur->next;
920
+ break;
921
+ }
922
+
923
+ cur = parent;
924
+ /* cur->parent was validated when descending. */
925
+ parent = cur->parent;
926
+
927
+ if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
928
+ (cur->type == XML_DOCUMENT_NODE)) {
929
+ xmlOutputBufferWriteString(buf, "\n");
930
+ } else {
931
+ if ((format) && (cur->ns == NULL))
932
+ info = htmlTagLookup(cur->name);
933
+ else
934
+ info = NULL;
935
+
936
+ if ((format) && (info != NULL) && (!info->isinline) &&
937
+ (cur->last->type != HTML_TEXT_NODE) &&
938
+ (cur->last->type != HTML_ENTITY_REF_NODE) &&
939
+ (cur->children != cur->last) &&
940
+ (cur->name != NULL) &&
941
+ (cur->name[0] != 'p')) /* p, pre, param */
942
+ xmlOutputBufferWriteString(buf, "\n");
943
+
944
+ xmlOutputBufferWriteString(buf, "</");
945
+ if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
946
+ xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
947
+ xmlOutputBufferWriteString(buf, ":");
948
+ }
949
+ xmlOutputBufferWriteString(buf, (const char *)cur->name);
950
+ xmlOutputBufferWriteString(buf, ">");
951
+
952
+ if ((format) && (info != NULL) && (!info->isinline) &&
953
+ (cur->next != NULL)) {
954
+ if ((cur->next->type != HTML_TEXT_NODE) &&
955
+ (cur->next->type != HTML_ENTITY_REF_NODE) &&
956
+ (parent != NULL) &&
957
+ (parent->name != NULL) &&
958
+ (parent->name[0] != 'p')) /* p, pre, param */
959
+ xmlOutputBufferWriteString(buf, "\n");
960
+ }
961
+ }
962
+ }
963
+ }
964
+ }
965
+
966
+ /**
967
+ * htmlNodeDumpOutput:
968
+ * @buf: the HTML buffer output
969
+ * @doc: the document
970
+ * @cur: the current node
971
+ * @encoding: the encoding string (unused)
972
+ *
973
+ * Dump an HTML node, recursive behaviour,children are printed too,
974
+ * and formatting returns/spaces are added.
975
+ */
976
+ void
977
+ htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
978
+ xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED) {
979
+ htmlNodeDumpFormatOutput(buf, doc, cur, NULL, 1);
980
+ }
981
+
982
+ /**
983
+ * htmlDocContentDumpFormatOutput:
984
+ * @buf: the HTML buffer output
985
+ * @cur: the document
986
+ * @encoding: the encoding string (unused)
987
+ * @format: should formatting spaces been added
988
+ *
989
+ * Dump an HTML document.
990
+ */
991
+ void
992
+ htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
993
+ const char *encoding ATTRIBUTE_UNUSED,
994
+ int format) {
995
+ htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, format);
996
+ }
997
+
998
+ /**
999
+ * htmlDocContentDumpOutput:
1000
+ * @buf: the HTML buffer output
1001
+ * @cur: the document
1002
+ * @encoding: the encoding string (unused)
1003
+ *
1004
+ * Dump an HTML document. Formatting return/spaces are added.
1005
+ */
1006
+ void
1007
+ htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1008
+ const char *encoding ATTRIBUTE_UNUSED) {
1009
+ htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, 1);
1010
+ }
1011
+
1012
+ /************************************************************************
1013
+ * *
1014
+ * Saving functions front-ends *
1015
+ * *
1016
+ ************************************************************************/
1017
+
1018
+ /**
1019
+ * htmlDocDump:
1020
+ * @f: the FILE*
1021
+ * @cur: the document
1022
+ *
1023
+ * Dump an HTML document to an open FILE.
1024
+ *
1025
+ * returns: the number of byte written or -1 in case of failure.
1026
+ */
1027
+ int
1028
+ htmlDocDump(FILE *f, xmlDocPtr cur) {
1029
+ xmlOutputBufferPtr buf;
1030
+ xmlCharEncodingHandlerPtr handler = NULL;
1031
+ const char *encoding;
1032
+ int ret;
1033
+
1034
+ xmlInitParser();
1035
+
1036
+ if ((cur == NULL) || (f == NULL)) {
1037
+ return(-1);
1038
+ }
1039
+
1040
+ encoding = (const char *) htmlGetMetaEncoding(cur);
1041
+
1042
+ if (encoding != NULL) {
1043
+ xmlCharEncoding enc;
1044
+
1045
+ enc = xmlParseCharEncoding(encoding);
1046
+ if (enc != XML_CHAR_ENCODING_UTF8) {
1047
+ handler = xmlFindCharEncodingHandler(encoding);
1048
+ if (handler == NULL)
1049
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1050
+ }
1051
+ } else {
1052
+ /*
1053
+ * Fallback to HTML or ASCII when the encoding is unspecified
1054
+ */
1055
+ if (handler == NULL)
1056
+ handler = xmlFindCharEncodingHandler("HTML");
1057
+ if (handler == NULL)
1058
+ handler = xmlFindCharEncodingHandler("ascii");
1059
+ }
1060
+
1061
+ buf = xmlOutputBufferCreateFile(f, handler);
1062
+ if (buf == NULL) return(-1);
1063
+ htmlDocContentDumpOutput(buf, cur, NULL);
1064
+
1065
+ ret = xmlOutputBufferClose(buf);
1066
+ return(ret);
1067
+ }
1068
+
1069
+ /**
1070
+ * htmlSaveFile:
1071
+ * @filename: the filename (or URL)
1072
+ * @cur: the document
1073
+ *
1074
+ * Dump an HTML document to a file. If @filename is "-" the stdout file is
1075
+ * used.
1076
+ * returns: the number of byte written or -1 in case of failure.
1077
+ */
1078
+ int
1079
+ htmlSaveFile(const char *filename, xmlDocPtr cur) {
1080
+ xmlOutputBufferPtr buf;
1081
+ xmlCharEncodingHandlerPtr handler = NULL;
1082
+ const char *encoding;
1083
+ int ret;
1084
+
1085
+ if ((cur == NULL) || (filename == NULL))
1086
+ return(-1);
1087
+
1088
+ xmlInitParser();
1089
+
1090
+ encoding = (const char *) htmlGetMetaEncoding(cur);
1091
+
1092
+ if (encoding != NULL) {
1093
+ xmlCharEncoding enc;
1094
+
1095
+ enc = xmlParseCharEncoding(encoding);
1096
+ if (enc != XML_CHAR_ENCODING_UTF8) {
1097
+ handler = xmlFindCharEncodingHandler(encoding);
1098
+ if (handler == NULL)
1099
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1100
+ }
1101
+ } else {
1102
+ /*
1103
+ * Fallback to HTML or ASCII when the encoding is unspecified
1104
+ */
1105
+ if (handler == NULL)
1106
+ handler = xmlFindCharEncodingHandler("HTML");
1107
+ if (handler == NULL)
1108
+ handler = xmlFindCharEncodingHandler("ascii");
1109
+ }
1110
+
1111
+ /*
1112
+ * save the content to a temp buffer.
1113
+ */
1114
+ buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1115
+ if (buf == NULL) return(0);
1116
+
1117
+ htmlDocContentDumpOutput(buf, cur, NULL);
1118
+
1119
+ ret = xmlOutputBufferClose(buf);
1120
+ return(ret);
1121
+ }
1122
+
1123
+ /**
1124
+ * htmlSaveFileFormat:
1125
+ * @filename: the filename
1126
+ * @cur: the document
1127
+ * @format: should formatting spaces been added
1128
+ * @encoding: the document encoding
1129
+ *
1130
+ * Dump an HTML document to a file using a given encoding.
1131
+ *
1132
+ * returns: the number of byte written or -1 in case of failure.
1133
+ */
1134
+ int
1135
+ htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1136
+ const char *encoding, int format) {
1137
+ xmlOutputBufferPtr buf;
1138
+ xmlCharEncodingHandlerPtr handler = NULL;
1139
+ int ret;
1140
+
1141
+ if ((cur == NULL) || (filename == NULL))
1142
+ return(-1);
1143
+
1144
+ xmlInitParser();
1145
+
1146
+ if (encoding != NULL) {
1147
+ xmlCharEncoding enc;
1148
+
1149
+ enc = xmlParseCharEncoding(encoding);
1150
+ if (enc != XML_CHAR_ENCODING_UTF8) {
1151
+ handler = xmlFindCharEncodingHandler(encoding);
1152
+ if (handler == NULL)
1153
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1154
+ }
1155
+ htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1156
+ } else {
1157
+ htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1158
+
1159
+ /*
1160
+ * Fallback to HTML or ASCII when the encoding is unspecified
1161
+ */
1162
+ if (handler == NULL)
1163
+ handler = xmlFindCharEncodingHandler("HTML");
1164
+ if (handler == NULL)
1165
+ handler = xmlFindCharEncodingHandler("ascii");
1166
+ }
1167
+
1168
+ /*
1169
+ * save the content to a temp buffer.
1170
+ */
1171
+ buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1172
+ if (buf == NULL) return(0);
1173
+
1174
+ htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1175
+
1176
+ ret = xmlOutputBufferClose(buf);
1177
+ return(ret);
1178
+ }
1179
+
1180
+ /**
1181
+ * htmlSaveFileEnc:
1182
+ * @filename: the filename
1183
+ * @cur: the document
1184
+ * @encoding: the document encoding
1185
+ *
1186
+ * Dump an HTML document to a file using a given encoding
1187
+ * and formatting returns/spaces are added.
1188
+ *
1189
+ * returns: the number of byte written or -1 in case of failure.
1190
+ */
1191
+ int
1192
+ htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1193
+ return(htmlSaveFileFormat(filename, cur, encoding, 1));
1194
+ }
1195
+
1196
+ #endif /* LIBXML_OUTPUT_ENABLED */
1197
+
1198
+ #define bottom_HTMLtree
1199
+ #include "elfgcchack.h"
1200
+ #endif /* LIBXML_HTML_ENABLED */