pocxxeci 0.30.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of pocxxeci might be problematic. Click here for more details.

Files changed (160) hide show
  1. package/LICENSE +19 -0
  2. package/Makefile +18 -0
  3. package/README.md +52 -0
  4. package/binding.gyp +81 -0
  5. package/index.d.ts +273 -0
  6. package/index.js +45 -0
  7. package/lib/bindings.js +1 -0
  8. package/lib/document.js +122 -0
  9. package/lib/element.js +82 -0
  10. package/lib/sax_parser.js +38 -0
  11. package/package.json +70 -0
  12. package/src/html_document.cc +7 -0
  13. package/src/html_document.h +18 -0
  14. package/src/libxmljs.cc +252 -0
  15. package/src/libxmljs.h +53 -0
  16. package/src/xml_attribute.cc +173 -0
  17. package/src/xml_attribute.h +40 -0
  18. package/src/xml_comment.cc +117 -0
  19. package/src/xml_comment.h +30 -0
  20. package/src/xml_document.cc +810 -0
  21. package/src/xml_document.h +67 -0
  22. package/src/xml_element.cc +565 -0
  23. package/src/xml_element.h +61 -0
  24. package/src/xml_namespace.cc +158 -0
  25. package/src/xml_namespace.h +39 -0
  26. package/src/xml_node.cc +761 -0
  27. package/src/xml_node.h +73 -0
  28. package/src/xml_pi.cc +161 -0
  29. package/src/xml_pi.h +34 -0
  30. package/src/xml_sax_parser.cc +424 -0
  31. package/src/xml_sax_parser.h +73 -0
  32. package/src/xml_syntax_error.cc +66 -0
  33. package/src/xml_syntax_error.h +25 -0
  34. package/src/xml_text.cc +320 -0
  35. package/src/xml_text.h +48 -0
  36. package/src/xml_textwriter.cc +315 -0
  37. package/src/xml_textwriter.h +62 -0
  38. package/src/xml_xpath_context.cc +70 -0
  39. package/src/xml_xpath_context.h +23 -0
  40. package/vendor/libxml/Copyright +23 -0
  41. package/vendor/libxml/DOCBparser.c +305 -0
  42. package/vendor/libxml/HTMLparser.c +7287 -0
  43. package/vendor/libxml/HTMLtree.c +1200 -0
  44. package/vendor/libxml/Makefile +2983 -0
  45. package/vendor/libxml/SAX.c +180 -0
  46. package/vendor/libxml/SAX2.c +3036 -0
  47. package/vendor/libxml/buf.c +1351 -0
  48. package/vendor/libxml/buf.h +72 -0
  49. package/vendor/libxml/c14n.c +2234 -0
  50. package/vendor/libxml/catalog.c +3828 -0
  51. package/vendor/libxml/chvalid.c +336 -0
  52. package/vendor/libxml/config.h +294 -0
  53. package/vendor/libxml/config.h.gch +0 -0
  54. package/vendor/libxml/debugXML.c +3423 -0
  55. package/vendor/libxml/dict.c +1298 -0
  56. package/vendor/libxml/elfgcchack.h +17818 -0
  57. package/vendor/libxml/enc.h +32 -0
  58. package/vendor/libxml/encoding.c +3975 -0
  59. package/vendor/libxml/entities.c +1163 -0
  60. package/vendor/libxml/error.c +998 -0
  61. package/vendor/libxml/globals.c +1126 -0
  62. package/vendor/libxml/hash.c +1146 -0
  63. package/vendor/libxml/include/libxml/DOCBparser.h +96 -0
  64. package/vendor/libxml/include/libxml/HTMLparser.h +306 -0
  65. package/vendor/libxml/include/libxml/HTMLtree.h +147 -0
  66. package/vendor/libxml/include/libxml/Makefile +725 -0
  67. package/vendor/libxml/include/libxml/Makefile.am +54 -0
  68. package/vendor/libxml/include/libxml/Makefile.in +725 -0
  69. package/vendor/libxml/include/libxml/SAX.h +173 -0
  70. package/vendor/libxml/include/libxml/SAX2.h +178 -0
  71. package/vendor/libxml/include/libxml/c14n.h +128 -0
  72. package/vendor/libxml/include/libxml/catalog.h +182 -0
  73. package/vendor/libxml/include/libxml/chvalid.h +230 -0
  74. package/vendor/libxml/include/libxml/debugXML.h +217 -0
  75. package/vendor/libxml/include/libxml/dict.h +79 -0
  76. package/vendor/libxml/include/libxml/encoding.h +245 -0
  77. package/vendor/libxml/include/libxml/entities.h +151 -0
  78. package/vendor/libxml/include/libxml/globals.h +508 -0
  79. package/vendor/libxml/include/libxml/hash.h +236 -0
  80. package/vendor/libxml/include/libxml/list.h +137 -0
  81. package/vendor/libxml/include/libxml/nanoftp.h +163 -0
  82. package/vendor/libxml/include/libxml/nanohttp.h +81 -0
  83. package/vendor/libxml/include/libxml/parser.h +1243 -0
  84. package/vendor/libxml/include/libxml/parserInternals.h +644 -0
  85. package/vendor/libxml/include/libxml/pattern.h +100 -0
  86. package/vendor/libxml/include/libxml/relaxng.h +217 -0
  87. package/vendor/libxml/include/libxml/schemasInternals.h +958 -0
  88. package/vendor/libxml/include/libxml/schematron.h +142 -0
  89. package/vendor/libxml/include/libxml/threads.h +89 -0
  90. package/vendor/libxml/include/libxml/tree.h +1311 -0
  91. package/vendor/libxml/include/libxml/uri.h +94 -0
  92. package/vendor/libxml/include/libxml/valid.h +458 -0
  93. package/vendor/libxml/include/libxml/xinclude.h +129 -0
  94. package/vendor/libxml/include/libxml/xlink.h +189 -0
  95. package/vendor/libxml/include/libxml/xmlIO.h +368 -0
  96. package/vendor/libxml/include/libxml/xmlautomata.h +146 -0
  97. package/vendor/libxml/include/libxml/xmlerror.h +945 -0
  98. package/vendor/libxml/include/libxml/xmlexports.h +77 -0
  99. package/vendor/libxml/include/libxml/xmlmemory.h +224 -0
  100. package/vendor/libxml/include/libxml/xmlmodule.h +57 -0
  101. package/vendor/libxml/include/libxml/xmlreader.h +428 -0
  102. package/vendor/libxml/include/libxml/xmlregexp.h +222 -0
  103. package/vendor/libxml/include/libxml/xmlsave.h +88 -0
  104. package/vendor/libxml/include/libxml/xmlschemas.h +246 -0
  105. package/vendor/libxml/include/libxml/xmlschemastypes.h +151 -0
  106. package/vendor/libxml/include/libxml/xmlstring.h +140 -0
  107. package/vendor/libxml/include/libxml/xmlunicode.h +202 -0
  108. package/vendor/libxml/include/libxml/xmlversion.h +484 -0
  109. package/vendor/libxml/include/libxml/xmlwin32version.h +239 -0
  110. package/vendor/libxml/include/libxml/xmlwriter.h +488 -0
  111. package/vendor/libxml/include/libxml/xpath.h +564 -0
  112. package/vendor/libxml/include/libxml/xpathInternals.h +632 -0
  113. package/vendor/libxml/include/libxml/xpointer.h +114 -0
  114. package/vendor/libxml/include/win32config.h +122 -0
  115. package/vendor/libxml/include/wsockcompat.h +54 -0
  116. package/vendor/libxml/legacy.c +1343 -0
  117. package/vendor/libxml/libxml.h +134 -0
  118. package/vendor/libxml/list.c +779 -0
  119. package/vendor/libxml/nanoftp.c +2118 -0
  120. package/vendor/libxml/nanohttp.c +1899 -0
  121. package/vendor/libxml/parser.c +15553 -0
  122. package/vendor/libxml/parserInternals.c +2164 -0
  123. package/vendor/libxml/pattern.c +2621 -0
  124. package/vendor/libxml/relaxng.c +11101 -0
  125. package/vendor/libxml/rngparser.c +1595 -0
  126. package/vendor/libxml/runsuite.c +1157 -0
  127. package/vendor/libxml/save.h +36 -0
  128. package/vendor/libxml/schematron.c +1787 -0
  129. package/vendor/libxml/threads.c +1049 -0
  130. package/vendor/libxml/timsort.h +601 -0
  131. package/vendor/libxml/tree.c +10183 -0
  132. package/vendor/libxml/trio.c +6895 -0
  133. package/vendor/libxml/trio.h +230 -0
  134. package/vendor/libxml/triodef.h +228 -0
  135. package/vendor/libxml/trionan.c +914 -0
  136. package/vendor/libxml/trionan.h +84 -0
  137. package/vendor/libxml/triop.h +150 -0
  138. package/vendor/libxml/triostr.c +2112 -0
  139. package/vendor/libxml/triostr.h +144 -0
  140. package/vendor/libxml/uri.c +2561 -0
  141. package/vendor/libxml/valid.c +7138 -0
  142. package/vendor/libxml/xinclude.c +2657 -0
  143. package/vendor/libxml/xlink.c +183 -0
  144. package/vendor/libxml/xmlIO.c +4135 -0
  145. package/vendor/libxml/xmlcatalog.c +624 -0
  146. package/vendor/libxml/xmllint.c +3796 -0
  147. package/vendor/libxml/xmlmemory.c +1163 -0
  148. package/vendor/libxml/xmlmodule.c +468 -0
  149. package/vendor/libxml/xmlreader.c +6033 -0
  150. package/vendor/libxml/xmlregexp.c +8271 -0
  151. package/vendor/libxml/xmlsave.c +2735 -0
  152. package/vendor/libxml/xmlschemas.c +29173 -0
  153. package/vendor/libxml/xmlschemastypes.c +6276 -0
  154. package/vendor/libxml/xmlstring.c +1050 -0
  155. package/vendor/libxml/xmlunicode.c +3179 -0
  156. package/vendor/libxml/xmlwriter.c +4738 -0
  157. package/vendor/libxml/xpath.c +14734 -0
  158. package/vendor/libxml/xpointer.c +2969 -0
  159. package/vendor/libxml/xzlib.c +815 -0
  160. package/vendor/libxml/xzlib.h +19 -0
@@ -0,0 +1,2561 @@
1
+ /**
2
+ * uri.c: set of generic URI related routines
3
+ *
4
+ * Reference: RFCs 3986, 2732 and 2373
5
+ *
6
+ * See Copyright for the status of this software.
7
+ *
8
+ * daniel@veillard.com
9
+ */
10
+
11
+ #define IN_LIBXML
12
+ #include "libxml.h"
13
+
14
+ #include <limits.h>
15
+ #include <string.h>
16
+
17
+ #include <libxml/xmlmemory.h>
18
+ #include <libxml/uri.h>
19
+ #include <libxml/globals.h>
20
+ #include <libxml/xmlerror.h>
21
+
22
+ /**
23
+ * MAX_URI_LENGTH:
24
+ *
25
+ * The definition of the URI regexp in the above RFC has no size limit
26
+ * In practice they are usually relatively short except for the
27
+ * data URI scheme as defined in RFC 2397. Even for data URI the usual
28
+ * maximum size before hitting random practical limits is around 64 KB
29
+ * and 4KB is usually a maximum admitted limit for proper operations.
30
+ * The value below is more a security limit than anything else and
31
+ * really should never be hit by 'normal' operations
32
+ * Set to 1 MByte in 2012, this is only enforced on output
33
+ */
34
+ #define MAX_URI_LENGTH 1024 * 1024
35
+
36
+ static void
37
+ xmlURIErrMemory(const char *extra)
38
+ {
39
+ if (extra)
40
+ __xmlRaiseError(NULL, NULL, NULL,
41
+ NULL, NULL, XML_FROM_URI,
42
+ XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
43
+ extra, NULL, NULL, 0, 0,
44
+ "Memory allocation failed : %s\n", extra);
45
+ else
46
+ __xmlRaiseError(NULL, NULL, NULL,
47
+ NULL, NULL, XML_FROM_URI,
48
+ XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
49
+ NULL, NULL, NULL, 0, 0,
50
+ "Memory allocation failed\n");
51
+ }
52
+
53
+ static void xmlCleanURI(xmlURIPtr uri);
54
+
55
+ /*
56
+ * Old rule from 2396 used in legacy handling code
57
+ * alpha = lowalpha | upalpha
58
+ */
59
+ #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
60
+
61
+
62
+ /*
63
+ * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
64
+ * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
65
+ * "u" | "v" | "w" | "x" | "y" | "z"
66
+ */
67
+
68
+ #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
69
+
70
+ /*
71
+ * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
72
+ * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
73
+ * "U" | "V" | "W" | "X" | "Y" | "Z"
74
+ */
75
+ #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
76
+
77
+ #ifdef IS_DIGIT
78
+ #undef IS_DIGIT
79
+ #endif
80
+ /*
81
+ * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
82
+ */
83
+ #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
84
+
85
+ /*
86
+ * alphanum = alpha | digit
87
+ */
88
+
89
+ #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
90
+
91
+ /*
92
+ * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
93
+ */
94
+
95
+ #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
96
+ ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
97
+ ((x) == '(') || ((x) == ')'))
98
+
99
+ /*
100
+ * unwise = "{" | "}" | "|" | "\" | "^" | "`"
101
+ */
102
+
103
+ #define IS_UNWISE(p) \
104
+ (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
105
+ ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
106
+ ((*(p) == ']')) || ((*(p) == '`')))
107
+ /*
108
+ * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
109
+ * "[" | "]"
110
+ */
111
+
112
+ #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
113
+ ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
114
+ ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
115
+ ((x) == ']'))
116
+
117
+ /*
118
+ * unreserved = alphanum | mark
119
+ */
120
+
121
+ #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
122
+
123
+ /*
124
+ * Skip to next pointer char, handle escaped sequences
125
+ */
126
+
127
+ #define NEXT(p) ((*p == '%')? p += 3 : p++)
128
+
129
+ /*
130
+ * Productions from the spec.
131
+ *
132
+ * authority = server | reg_name
133
+ * reg_name = 1*( unreserved | escaped | "$" | "," |
134
+ * ";" | ":" | "@" | "&" | "=" | "+" )
135
+ *
136
+ * path = [ abs_path | opaque_part ]
137
+ */
138
+
139
+ #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
140
+
141
+ /************************************************************************
142
+ * *
143
+ * RFC 3986 parser *
144
+ * *
145
+ ************************************************************************/
146
+
147
+ #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
148
+ #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
149
+ ((*(p) >= 'A') && (*(p) <= 'Z')))
150
+ #define ISA_HEXDIG(p) \
151
+ (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
152
+ ((*(p) >= 'A') && (*(p) <= 'F')))
153
+
154
+ /*
155
+ * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
156
+ * / "*" / "+" / "," / ";" / "="
157
+ */
158
+ #define ISA_SUB_DELIM(p) \
159
+ (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
160
+ ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
161
+ ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
162
+ ((*(p) == '=')) || ((*(p) == '\'')))
163
+
164
+ /*
165
+ * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
166
+ */
167
+ #define ISA_GEN_DELIM(p) \
168
+ (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
169
+ ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
170
+ ((*(p) == '@')))
171
+
172
+ /*
173
+ * reserved = gen-delims / sub-delims
174
+ */
175
+ #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
176
+
177
+ /*
178
+ * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
179
+ */
180
+ #define ISA_UNRESERVED(p) \
181
+ ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
182
+ ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
183
+
184
+ /*
185
+ * pct-encoded = "%" HEXDIG HEXDIG
186
+ */
187
+ #define ISA_PCT_ENCODED(p) \
188
+ ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
189
+
190
+ /*
191
+ * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
192
+ */
193
+ #define ISA_PCHAR(p) \
194
+ (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
195
+ ((*(p) == ':')) || ((*(p) == '@')))
196
+
197
+ /**
198
+ * xmlParse3986Scheme:
199
+ * @uri: pointer to an URI structure
200
+ * @str: pointer to the string to analyze
201
+ *
202
+ * Parse an URI scheme
203
+ *
204
+ * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
205
+ *
206
+ * Returns 0 or the error code
207
+ */
208
+ static int
209
+ xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
210
+ const char *cur;
211
+
212
+ if (str == NULL)
213
+ return(-1);
214
+
215
+ cur = *str;
216
+ if (!ISA_ALPHA(cur))
217
+ return(2);
218
+ cur++;
219
+ while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
220
+ (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
221
+ if (uri != NULL) {
222
+ if (uri->scheme != NULL) xmlFree(uri->scheme);
223
+ uri->scheme = STRNDUP(*str, cur - *str);
224
+ }
225
+ *str = cur;
226
+ return(0);
227
+ }
228
+
229
+ /**
230
+ * xmlParse3986Fragment:
231
+ * @uri: pointer to an URI structure
232
+ * @str: pointer to the string to analyze
233
+ *
234
+ * Parse the query part of an URI
235
+ *
236
+ * fragment = *( pchar / "/" / "?" )
237
+ * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
238
+ * in the fragment identifier but this is used very broadly for
239
+ * xpointer scheme selection, so we are allowing it here to not break
240
+ * for example all the DocBook processing chains.
241
+ *
242
+ * Returns 0 or the error code
243
+ */
244
+ static int
245
+ xmlParse3986Fragment(xmlURIPtr uri, const char **str)
246
+ {
247
+ const char *cur;
248
+
249
+ if (str == NULL)
250
+ return (-1);
251
+
252
+ cur = *str;
253
+
254
+ while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
255
+ (*cur == '[') || (*cur == ']') ||
256
+ ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
257
+ NEXT(cur);
258
+ if (uri != NULL) {
259
+ if (uri->fragment != NULL)
260
+ xmlFree(uri->fragment);
261
+ if (uri->cleanup & 2)
262
+ uri->fragment = STRNDUP(*str, cur - *str);
263
+ else
264
+ uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
265
+ }
266
+ *str = cur;
267
+ return (0);
268
+ }
269
+
270
+ /**
271
+ * xmlParse3986Query:
272
+ * @uri: pointer to an URI structure
273
+ * @str: pointer to the string to analyze
274
+ *
275
+ * Parse the query part of an URI
276
+ *
277
+ * query = *uric
278
+ *
279
+ * Returns 0 or the error code
280
+ */
281
+ static int
282
+ xmlParse3986Query(xmlURIPtr uri, const char **str)
283
+ {
284
+ const char *cur;
285
+
286
+ if (str == NULL)
287
+ return (-1);
288
+
289
+ cur = *str;
290
+
291
+ while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
292
+ ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
293
+ NEXT(cur);
294
+ if (uri != NULL) {
295
+ if (uri->query != NULL)
296
+ xmlFree(uri->query);
297
+ if (uri->cleanup & 2)
298
+ uri->query = STRNDUP(*str, cur - *str);
299
+ else
300
+ uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
301
+
302
+ /* Save the raw bytes of the query as well.
303
+ * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
304
+ */
305
+ if (uri->query_raw != NULL)
306
+ xmlFree (uri->query_raw);
307
+ uri->query_raw = STRNDUP (*str, cur - *str);
308
+ }
309
+ *str = cur;
310
+ return (0);
311
+ }
312
+
313
+ /**
314
+ * xmlParse3986Port:
315
+ * @uri: pointer to an URI structure
316
+ * @str: the string to analyze
317
+ *
318
+ * Parse a port part and fills in the appropriate fields
319
+ * of the @uri structure
320
+ *
321
+ * port = *DIGIT
322
+ *
323
+ * Returns 0 or the error code
324
+ */
325
+ static int
326
+ xmlParse3986Port(xmlURIPtr uri, const char **str)
327
+ {
328
+ const char *cur = *str;
329
+ int port = 0;
330
+
331
+ if (ISA_DIGIT(cur)) {
332
+ while (ISA_DIGIT(cur)) {
333
+ int digit = *cur - '0';
334
+
335
+ if (port > INT_MAX / 10)
336
+ return(1);
337
+ port *= 10;
338
+ if (port > INT_MAX - digit)
339
+ return(1);
340
+ port += digit;
341
+
342
+ cur++;
343
+ }
344
+ if (uri != NULL)
345
+ uri->port = port;
346
+ *str = cur;
347
+ return(0);
348
+ }
349
+ return(1);
350
+ }
351
+
352
+ /**
353
+ * xmlParse3986Userinfo:
354
+ * @uri: pointer to an URI structure
355
+ * @str: the string to analyze
356
+ *
357
+ * Parse an user information part and fills in the appropriate fields
358
+ * of the @uri structure
359
+ *
360
+ * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
361
+ *
362
+ * Returns 0 or the error code
363
+ */
364
+ static int
365
+ xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
366
+ {
367
+ const char *cur;
368
+
369
+ cur = *str;
370
+ while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
371
+ ISA_SUB_DELIM(cur) || (*cur == ':'))
372
+ NEXT(cur);
373
+ if (*cur == '@') {
374
+ if (uri != NULL) {
375
+ if (uri->user != NULL) xmlFree(uri->user);
376
+ if (uri->cleanup & 2)
377
+ uri->user = STRNDUP(*str, cur - *str);
378
+ else
379
+ uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
380
+ }
381
+ *str = cur;
382
+ return(0);
383
+ }
384
+ return(1);
385
+ }
386
+
387
+ /**
388
+ * xmlParse3986DecOctet:
389
+ * @str: the string to analyze
390
+ *
391
+ * dec-octet = DIGIT ; 0-9
392
+ * / %x31-39 DIGIT ; 10-99
393
+ * / "1" 2DIGIT ; 100-199
394
+ * / "2" %x30-34 DIGIT ; 200-249
395
+ * / "25" %x30-35 ; 250-255
396
+ *
397
+ * Skip a dec-octet.
398
+ *
399
+ * Returns 0 if found and skipped, 1 otherwise
400
+ */
401
+ static int
402
+ xmlParse3986DecOctet(const char **str) {
403
+ const char *cur = *str;
404
+
405
+ if (!(ISA_DIGIT(cur)))
406
+ return(1);
407
+ if (!ISA_DIGIT(cur+1))
408
+ cur++;
409
+ else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
410
+ cur += 2;
411
+ else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
412
+ cur += 3;
413
+ else if ((*cur == '2') && (*(cur + 1) >= '0') &&
414
+ (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
415
+ cur += 3;
416
+ else if ((*cur == '2') && (*(cur + 1) == '5') &&
417
+ (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
418
+ cur += 3;
419
+ else
420
+ return(1);
421
+ *str = cur;
422
+ return(0);
423
+ }
424
+ /**
425
+ * xmlParse3986Host:
426
+ * @uri: pointer to an URI structure
427
+ * @str: the string to analyze
428
+ *
429
+ * Parse an host part and fills in the appropriate fields
430
+ * of the @uri structure
431
+ *
432
+ * host = IP-literal / IPv4address / reg-name
433
+ * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
434
+ * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
435
+ * reg-name = *( unreserved / pct-encoded / sub-delims )
436
+ *
437
+ * Returns 0 or the error code
438
+ */
439
+ static int
440
+ xmlParse3986Host(xmlURIPtr uri, const char **str)
441
+ {
442
+ const char *cur = *str;
443
+ const char *host;
444
+
445
+ host = cur;
446
+ /*
447
+ * IPv6 and future addressing scheme are enclosed between brackets
448
+ */
449
+ if (*cur == '[') {
450
+ cur++;
451
+ while ((*cur != ']') && (*cur != 0))
452
+ cur++;
453
+ if (*cur != ']')
454
+ return(1);
455
+ cur++;
456
+ goto found;
457
+ }
458
+ /*
459
+ * try to parse an IPv4
460
+ */
461
+ if (ISA_DIGIT(cur)) {
462
+ if (xmlParse3986DecOctet(&cur) != 0)
463
+ goto not_ipv4;
464
+ if (*cur != '.')
465
+ goto not_ipv4;
466
+ cur++;
467
+ if (xmlParse3986DecOctet(&cur) != 0)
468
+ goto not_ipv4;
469
+ if (*cur != '.')
470
+ goto not_ipv4;
471
+ if (xmlParse3986DecOctet(&cur) != 0)
472
+ goto not_ipv4;
473
+ if (*cur != '.')
474
+ goto not_ipv4;
475
+ if (xmlParse3986DecOctet(&cur) != 0)
476
+ goto not_ipv4;
477
+ goto found;
478
+ not_ipv4:
479
+ cur = *str;
480
+ }
481
+ /*
482
+ * then this should be a hostname which can be empty
483
+ */
484
+ while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
485
+ NEXT(cur);
486
+ found:
487
+ if (uri != NULL) {
488
+ if (uri->authority != NULL) xmlFree(uri->authority);
489
+ uri->authority = NULL;
490
+ if (uri->server != NULL) xmlFree(uri->server);
491
+ if (cur != host) {
492
+ if (uri->cleanup & 2)
493
+ uri->server = STRNDUP(host, cur - host);
494
+ else
495
+ uri->server = xmlURIUnescapeString(host, cur - host, NULL);
496
+ } else
497
+ uri->server = NULL;
498
+ }
499
+ *str = cur;
500
+ return(0);
501
+ }
502
+
503
+ /**
504
+ * xmlParse3986Authority:
505
+ * @uri: pointer to an URI structure
506
+ * @str: the string to analyze
507
+ *
508
+ * Parse an authority part and fills in the appropriate fields
509
+ * of the @uri structure
510
+ *
511
+ * authority = [ userinfo "@" ] host [ ":" port ]
512
+ *
513
+ * Returns 0 or the error code
514
+ */
515
+ static int
516
+ xmlParse3986Authority(xmlURIPtr uri, const char **str)
517
+ {
518
+ const char *cur;
519
+ int ret;
520
+
521
+ cur = *str;
522
+ /*
523
+ * try to parse an userinfo and check for the trailing @
524
+ */
525
+ ret = xmlParse3986Userinfo(uri, &cur);
526
+ if ((ret != 0) || (*cur != '@'))
527
+ cur = *str;
528
+ else
529
+ cur++;
530
+ ret = xmlParse3986Host(uri, &cur);
531
+ if (ret != 0) return(ret);
532
+ if (*cur == ':') {
533
+ cur++;
534
+ ret = xmlParse3986Port(uri, &cur);
535
+ if (ret != 0) return(ret);
536
+ }
537
+ *str = cur;
538
+ return(0);
539
+ }
540
+
541
+ /**
542
+ * xmlParse3986Segment:
543
+ * @str: the string to analyze
544
+ * @forbid: an optional forbidden character
545
+ * @empty: allow an empty segment
546
+ *
547
+ * Parse a segment and fills in the appropriate fields
548
+ * of the @uri structure
549
+ *
550
+ * segment = *pchar
551
+ * segment-nz = 1*pchar
552
+ * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
553
+ * ; non-zero-length segment without any colon ":"
554
+ *
555
+ * Returns 0 or the error code
556
+ */
557
+ static int
558
+ xmlParse3986Segment(const char **str, char forbid, int empty)
559
+ {
560
+ const char *cur;
561
+
562
+ cur = *str;
563
+ if (!ISA_PCHAR(cur)) {
564
+ if (empty)
565
+ return(0);
566
+ return(1);
567
+ }
568
+ while (ISA_PCHAR(cur) && (*cur != forbid))
569
+ NEXT(cur);
570
+ *str = cur;
571
+ return (0);
572
+ }
573
+
574
+ /**
575
+ * xmlParse3986PathAbEmpty:
576
+ * @uri: pointer to an URI structure
577
+ * @str: the string to analyze
578
+ *
579
+ * Parse an path absolute or empty and fills in the appropriate fields
580
+ * of the @uri structure
581
+ *
582
+ * path-abempty = *( "/" segment )
583
+ *
584
+ * Returns 0 or the error code
585
+ */
586
+ static int
587
+ xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
588
+ {
589
+ const char *cur;
590
+ int ret;
591
+
592
+ cur = *str;
593
+
594
+ while (*cur == '/') {
595
+ cur++;
596
+ ret = xmlParse3986Segment(&cur, 0, 1);
597
+ if (ret != 0) return(ret);
598
+ }
599
+ if (uri != NULL) {
600
+ if (uri->path != NULL) xmlFree(uri->path);
601
+ if (*str != cur) {
602
+ if (uri->cleanup & 2)
603
+ uri->path = STRNDUP(*str, cur - *str);
604
+ else
605
+ uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
606
+ } else {
607
+ uri->path = NULL;
608
+ }
609
+ }
610
+ *str = cur;
611
+ return (0);
612
+ }
613
+
614
+ /**
615
+ * xmlParse3986PathAbsolute:
616
+ * @uri: pointer to an URI structure
617
+ * @str: the string to analyze
618
+ *
619
+ * Parse an path absolute and fills in the appropriate fields
620
+ * of the @uri structure
621
+ *
622
+ * path-absolute = "/" [ segment-nz *( "/" segment ) ]
623
+ *
624
+ * Returns 0 or the error code
625
+ */
626
+ static int
627
+ xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
628
+ {
629
+ const char *cur;
630
+ int ret;
631
+
632
+ cur = *str;
633
+
634
+ if (*cur != '/')
635
+ return(1);
636
+ cur++;
637
+ ret = xmlParse3986Segment(&cur, 0, 0);
638
+ if (ret == 0) {
639
+ while (*cur == '/') {
640
+ cur++;
641
+ ret = xmlParse3986Segment(&cur, 0, 1);
642
+ if (ret != 0) return(ret);
643
+ }
644
+ }
645
+ if (uri != NULL) {
646
+ if (uri->path != NULL) xmlFree(uri->path);
647
+ if (cur != *str) {
648
+ if (uri->cleanup & 2)
649
+ uri->path = STRNDUP(*str, cur - *str);
650
+ else
651
+ uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
652
+ } else {
653
+ uri->path = NULL;
654
+ }
655
+ }
656
+ *str = cur;
657
+ return (0);
658
+ }
659
+
660
+ /**
661
+ * xmlParse3986PathRootless:
662
+ * @uri: pointer to an URI structure
663
+ * @str: the string to analyze
664
+ *
665
+ * Parse an path without root and fills in the appropriate fields
666
+ * of the @uri structure
667
+ *
668
+ * path-rootless = segment-nz *( "/" segment )
669
+ *
670
+ * Returns 0 or the error code
671
+ */
672
+ static int
673
+ xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
674
+ {
675
+ const char *cur;
676
+ int ret;
677
+
678
+ cur = *str;
679
+
680
+ ret = xmlParse3986Segment(&cur, 0, 0);
681
+ if (ret != 0) return(ret);
682
+ while (*cur == '/') {
683
+ cur++;
684
+ ret = xmlParse3986Segment(&cur, 0, 1);
685
+ if (ret != 0) return(ret);
686
+ }
687
+ if (uri != NULL) {
688
+ if (uri->path != NULL) xmlFree(uri->path);
689
+ if (cur != *str) {
690
+ if (uri->cleanup & 2)
691
+ uri->path = STRNDUP(*str, cur - *str);
692
+ else
693
+ uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
694
+ } else {
695
+ uri->path = NULL;
696
+ }
697
+ }
698
+ *str = cur;
699
+ return (0);
700
+ }
701
+
702
+ /**
703
+ * xmlParse3986PathNoScheme:
704
+ * @uri: pointer to an URI structure
705
+ * @str: the string to analyze
706
+ *
707
+ * Parse an path which is not a scheme and fills in the appropriate fields
708
+ * of the @uri structure
709
+ *
710
+ * path-noscheme = segment-nz-nc *( "/" segment )
711
+ *
712
+ * Returns 0 or the error code
713
+ */
714
+ static int
715
+ xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
716
+ {
717
+ const char *cur;
718
+ int ret;
719
+
720
+ cur = *str;
721
+
722
+ ret = xmlParse3986Segment(&cur, ':', 0);
723
+ if (ret != 0) return(ret);
724
+ while (*cur == '/') {
725
+ cur++;
726
+ ret = xmlParse3986Segment(&cur, 0, 1);
727
+ if (ret != 0) return(ret);
728
+ }
729
+ if (uri != NULL) {
730
+ if (uri->path != NULL) xmlFree(uri->path);
731
+ if (cur != *str) {
732
+ if (uri->cleanup & 2)
733
+ uri->path = STRNDUP(*str, cur - *str);
734
+ else
735
+ uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
736
+ } else {
737
+ uri->path = NULL;
738
+ }
739
+ }
740
+ *str = cur;
741
+ return (0);
742
+ }
743
+
744
+ /**
745
+ * xmlParse3986HierPart:
746
+ * @uri: pointer to an URI structure
747
+ * @str: the string to analyze
748
+ *
749
+ * Parse an hierarchical part and fills in the appropriate fields
750
+ * of the @uri structure
751
+ *
752
+ * hier-part = "//" authority path-abempty
753
+ * / path-absolute
754
+ * / path-rootless
755
+ * / path-empty
756
+ *
757
+ * Returns 0 or the error code
758
+ */
759
+ static int
760
+ xmlParse3986HierPart(xmlURIPtr uri, const char **str)
761
+ {
762
+ const char *cur;
763
+ int ret;
764
+
765
+ cur = *str;
766
+
767
+ if ((*cur == '/') && (*(cur + 1) == '/')) {
768
+ cur += 2;
769
+ ret = xmlParse3986Authority(uri, &cur);
770
+ if (ret != 0) return(ret);
771
+ if (uri->server == NULL)
772
+ uri->port = -1;
773
+ ret = xmlParse3986PathAbEmpty(uri, &cur);
774
+ if (ret != 0) return(ret);
775
+ *str = cur;
776
+ return(0);
777
+ } else if (*cur == '/') {
778
+ ret = xmlParse3986PathAbsolute(uri, &cur);
779
+ if (ret != 0) return(ret);
780
+ } else if (ISA_PCHAR(cur)) {
781
+ ret = xmlParse3986PathRootless(uri, &cur);
782
+ if (ret != 0) return(ret);
783
+ } else {
784
+ /* path-empty is effectively empty */
785
+ if (uri != NULL) {
786
+ if (uri->path != NULL) xmlFree(uri->path);
787
+ uri->path = NULL;
788
+ }
789
+ }
790
+ *str = cur;
791
+ return (0);
792
+ }
793
+
794
+ /**
795
+ * xmlParse3986RelativeRef:
796
+ * @uri: pointer to an URI structure
797
+ * @str: the string to analyze
798
+ *
799
+ * Parse an URI string and fills in the appropriate fields
800
+ * of the @uri structure
801
+ *
802
+ * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
803
+ * relative-part = "//" authority path-abempty
804
+ * / path-absolute
805
+ * / path-noscheme
806
+ * / path-empty
807
+ *
808
+ * Returns 0 or the error code
809
+ */
810
+ static int
811
+ xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
812
+ int ret;
813
+
814
+ if ((*str == '/') && (*(str + 1) == '/')) {
815
+ str += 2;
816
+ ret = xmlParse3986Authority(uri, &str);
817
+ if (ret != 0) return(ret);
818
+ ret = xmlParse3986PathAbEmpty(uri, &str);
819
+ if (ret != 0) return(ret);
820
+ } else if (*str == '/') {
821
+ ret = xmlParse3986PathAbsolute(uri, &str);
822
+ if (ret != 0) return(ret);
823
+ } else if (ISA_PCHAR(str)) {
824
+ ret = xmlParse3986PathNoScheme(uri, &str);
825
+ if (ret != 0) return(ret);
826
+ } else {
827
+ /* path-empty is effectively empty */
828
+ if (uri != NULL) {
829
+ if (uri->path != NULL) xmlFree(uri->path);
830
+ uri->path = NULL;
831
+ }
832
+ }
833
+
834
+ if (*str == '?') {
835
+ str++;
836
+ ret = xmlParse3986Query(uri, &str);
837
+ if (ret != 0) return(ret);
838
+ }
839
+ if (*str == '#') {
840
+ str++;
841
+ ret = xmlParse3986Fragment(uri, &str);
842
+ if (ret != 0) return(ret);
843
+ }
844
+ if (*str != 0) {
845
+ xmlCleanURI(uri);
846
+ return(1);
847
+ }
848
+ return(0);
849
+ }
850
+
851
+
852
+ /**
853
+ * xmlParse3986URI:
854
+ * @uri: pointer to an URI structure
855
+ * @str: the string to analyze
856
+ *
857
+ * Parse an URI string and fills in the appropriate fields
858
+ * of the @uri structure
859
+ *
860
+ * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
861
+ *
862
+ * Returns 0 or the error code
863
+ */
864
+ static int
865
+ xmlParse3986URI(xmlURIPtr uri, const char *str) {
866
+ int ret;
867
+
868
+ ret = xmlParse3986Scheme(uri, &str);
869
+ if (ret != 0) return(ret);
870
+ if (*str != ':') {
871
+ return(1);
872
+ }
873
+ str++;
874
+ ret = xmlParse3986HierPart(uri, &str);
875
+ if (ret != 0) return(ret);
876
+ if (*str == '?') {
877
+ str++;
878
+ ret = xmlParse3986Query(uri, &str);
879
+ if (ret != 0) return(ret);
880
+ }
881
+ if (*str == '#') {
882
+ str++;
883
+ ret = xmlParse3986Fragment(uri, &str);
884
+ if (ret != 0) return(ret);
885
+ }
886
+ if (*str != 0) {
887
+ xmlCleanURI(uri);
888
+ return(1);
889
+ }
890
+ return(0);
891
+ }
892
+
893
+ /**
894
+ * xmlParse3986URIReference:
895
+ * @uri: pointer to an URI structure
896
+ * @str: the string to analyze
897
+ *
898
+ * Parse an URI reference string and fills in the appropriate fields
899
+ * of the @uri structure
900
+ *
901
+ * URI-reference = URI / relative-ref
902
+ *
903
+ * Returns 0 or the error code
904
+ */
905
+ static int
906
+ xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
907
+ int ret;
908
+
909
+ if (str == NULL)
910
+ return(-1);
911
+ xmlCleanURI(uri);
912
+
913
+ /*
914
+ * Try first to parse absolute refs, then fallback to relative if
915
+ * it fails.
916
+ */
917
+ ret = xmlParse3986URI(uri, str);
918
+ if (ret != 0) {
919
+ xmlCleanURI(uri);
920
+ ret = xmlParse3986RelativeRef(uri, str);
921
+ if (ret != 0) {
922
+ xmlCleanURI(uri);
923
+ return(ret);
924
+ }
925
+ }
926
+ return(0);
927
+ }
928
+
929
+ /**
930
+ * xmlParseURI:
931
+ * @str: the URI string to analyze
932
+ *
933
+ * Parse an URI based on RFC 3986
934
+ *
935
+ * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
936
+ *
937
+ * Returns a newly built xmlURIPtr or NULL in case of error
938
+ */
939
+ xmlURIPtr
940
+ xmlParseURI(const char *str) {
941
+ xmlURIPtr uri;
942
+ int ret;
943
+
944
+ if (str == NULL)
945
+ return(NULL);
946
+ uri = xmlCreateURI();
947
+ if (uri != NULL) {
948
+ ret = xmlParse3986URIReference(uri, str);
949
+ if (ret) {
950
+ xmlFreeURI(uri);
951
+ return(NULL);
952
+ }
953
+ }
954
+ return(uri);
955
+ }
956
+
957
+ /**
958
+ * xmlParseURIReference:
959
+ * @uri: pointer to an URI structure
960
+ * @str: the string to analyze
961
+ *
962
+ * Parse an URI reference string based on RFC 3986 and fills in the
963
+ * appropriate fields of the @uri structure
964
+ *
965
+ * URI-reference = URI / relative-ref
966
+ *
967
+ * Returns 0 or the error code
968
+ */
969
+ int
970
+ xmlParseURIReference(xmlURIPtr uri, const char *str) {
971
+ return(xmlParse3986URIReference(uri, str));
972
+ }
973
+
974
+ /**
975
+ * xmlParseURIRaw:
976
+ * @str: the URI string to analyze
977
+ * @raw: if 1 unescaping of URI pieces are disabled
978
+ *
979
+ * Parse an URI but allows to keep intact the original fragments.
980
+ *
981
+ * URI-reference = URI / relative-ref
982
+ *
983
+ * Returns a newly built xmlURIPtr or NULL in case of error
984
+ */
985
+ xmlURIPtr
986
+ xmlParseURIRaw(const char *str, int raw) {
987
+ xmlURIPtr uri;
988
+ int ret;
989
+
990
+ if (str == NULL)
991
+ return(NULL);
992
+ uri = xmlCreateURI();
993
+ if (uri != NULL) {
994
+ if (raw) {
995
+ uri->cleanup |= 2;
996
+ }
997
+ ret = xmlParseURIReference(uri, str);
998
+ if (ret) {
999
+ xmlFreeURI(uri);
1000
+ return(NULL);
1001
+ }
1002
+ }
1003
+ return(uri);
1004
+ }
1005
+
1006
+ /************************************************************************
1007
+ * *
1008
+ * Generic URI structure functions *
1009
+ * *
1010
+ ************************************************************************/
1011
+
1012
+ /**
1013
+ * xmlCreateURI:
1014
+ *
1015
+ * Simply creates an empty xmlURI
1016
+ *
1017
+ * Returns the new structure or NULL in case of error
1018
+ */
1019
+ xmlURIPtr
1020
+ xmlCreateURI(void) {
1021
+ xmlURIPtr ret;
1022
+
1023
+ ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1024
+ if (ret == NULL) {
1025
+ xmlURIErrMemory("creating URI structure\n");
1026
+ return(NULL);
1027
+ }
1028
+ memset(ret, 0, sizeof(xmlURI));
1029
+ return(ret);
1030
+ }
1031
+
1032
+ /**
1033
+ * xmlSaveUriRealloc:
1034
+ *
1035
+ * Function to handle properly a reallocation when saving an URI
1036
+ * Also imposes some limit on the length of an URI string output
1037
+ */
1038
+ static xmlChar *
1039
+ xmlSaveUriRealloc(xmlChar *ret, int *max) {
1040
+ xmlChar *temp;
1041
+ int tmp;
1042
+
1043
+ if (*max > MAX_URI_LENGTH) {
1044
+ xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1045
+ return(NULL);
1046
+ }
1047
+ tmp = *max * 2;
1048
+ temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1049
+ if (temp == NULL) {
1050
+ xmlURIErrMemory("saving URI\n");
1051
+ return(NULL);
1052
+ }
1053
+ *max = tmp;
1054
+ return(temp);
1055
+ }
1056
+
1057
+ /**
1058
+ * xmlSaveUri:
1059
+ * @uri: pointer to an xmlURI
1060
+ *
1061
+ * Save the URI as an escaped string
1062
+ *
1063
+ * Returns a new string (to be deallocated by caller)
1064
+ */
1065
+ xmlChar *
1066
+ xmlSaveUri(xmlURIPtr uri) {
1067
+ xmlChar *ret = NULL;
1068
+ xmlChar *temp;
1069
+ const char *p;
1070
+ int len;
1071
+ int max;
1072
+
1073
+ if (uri == NULL) return(NULL);
1074
+
1075
+
1076
+ max = 80;
1077
+ ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1078
+ if (ret == NULL) {
1079
+ xmlURIErrMemory("saving URI\n");
1080
+ return(NULL);
1081
+ }
1082
+ len = 0;
1083
+
1084
+ if (uri->scheme != NULL) {
1085
+ p = uri->scheme;
1086
+ while (*p != 0) {
1087
+ if (len >= max) {
1088
+ temp = xmlSaveUriRealloc(ret, &max);
1089
+ if (temp == NULL) goto mem_error;
1090
+ ret = temp;
1091
+ }
1092
+ ret[len++] = *p++;
1093
+ }
1094
+ if (len >= max) {
1095
+ temp = xmlSaveUriRealloc(ret, &max);
1096
+ if (temp == NULL) goto mem_error;
1097
+ ret = temp;
1098
+ }
1099
+ ret[len++] = ':';
1100
+ }
1101
+ if (uri->opaque != NULL) {
1102
+ p = uri->opaque;
1103
+ while (*p != 0) {
1104
+ if (len + 3 >= max) {
1105
+ temp = xmlSaveUriRealloc(ret, &max);
1106
+ if (temp == NULL) goto mem_error;
1107
+ ret = temp;
1108
+ }
1109
+ if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1110
+ ret[len++] = *p++;
1111
+ else {
1112
+ int val = *(unsigned char *)p++;
1113
+ int hi = val / 0x10, lo = val % 0x10;
1114
+ ret[len++] = '%';
1115
+ ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1116
+ ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1117
+ }
1118
+ }
1119
+ } else {
1120
+ if ((uri->server != NULL) || (uri->port == -1)) {
1121
+ if (len + 3 >= max) {
1122
+ temp = xmlSaveUriRealloc(ret, &max);
1123
+ if (temp == NULL) goto mem_error;
1124
+ ret = temp;
1125
+ }
1126
+ ret[len++] = '/';
1127
+ ret[len++] = '/';
1128
+ if (uri->user != NULL) {
1129
+ p = uri->user;
1130
+ while (*p != 0) {
1131
+ if (len + 3 >= max) {
1132
+ temp = xmlSaveUriRealloc(ret, &max);
1133
+ if (temp == NULL) goto mem_error;
1134
+ ret = temp;
1135
+ }
1136
+ if ((IS_UNRESERVED(*(p))) ||
1137
+ ((*(p) == ';')) || ((*(p) == ':')) ||
1138
+ ((*(p) == '&')) || ((*(p) == '=')) ||
1139
+ ((*(p) == '+')) || ((*(p) == '$')) ||
1140
+ ((*(p) == ',')))
1141
+ ret[len++] = *p++;
1142
+ else {
1143
+ int val = *(unsigned char *)p++;
1144
+ int hi = val / 0x10, lo = val % 0x10;
1145
+ ret[len++] = '%';
1146
+ ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1147
+ ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1148
+ }
1149
+ }
1150
+ if (len + 3 >= max) {
1151
+ temp = xmlSaveUriRealloc(ret, &max);
1152
+ if (temp == NULL) goto mem_error;
1153
+ ret = temp;
1154
+ }
1155
+ ret[len++] = '@';
1156
+ }
1157
+ if (uri->server != NULL) {
1158
+ p = uri->server;
1159
+ while (*p != 0) {
1160
+ if (len >= max) {
1161
+ temp = xmlSaveUriRealloc(ret, &max);
1162
+ if (temp == NULL) goto mem_error;
1163
+ ret = temp;
1164
+ }
1165
+ ret[len++] = *p++;
1166
+ }
1167
+ if (uri->port > 0) {
1168
+ if (len + 10 >= max) {
1169
+ temp = xmlSaveUriRealloc(ret, &max);
1170
+ if (temp == NULL) goto mem_error;
1171
+ ret = temp;
1172
+ }
1173
+ len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1174
+ }
1175
+ }
1176
+ } else if (uri->authority != NULL) {
1177
+ if (len + 3 >= max) {
1178
+ temp = xmlSaveUriRealloc(ret, &max);
1179
+ if (temp == NULL) goto mem_error;
1180
+ ret = temp;
1181
+ }
1182
+ ret[len++] = '/';
1183
+ ret[len++] = '/';
1184
+ p = uri->authority;
1185
+ while (*p != 0) {
1186
+ if (len + 3 >= max) {
1187
+ temp = xmlSaveUriRealloc(ret, &max);
1188
+ if (temp == NULL) goto mem_error;
1189
+ ret = temp;
1190
+ }
1191
+ if ((IS_UNRESERVED(*(p))) ||
1192
+ ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1193
+ ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1194
+ ((*(p) == '=')) || ((*(p) == '+')))
1195
+ ret[len++] = *p++;
1196
+ else {
1197
+ int val = *(unsigned char *)p++;
1198
+ int hi = val / 0x10, lo = val % 0x10;
1199
+ ret[len++] = '%';
1200
+ ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1201
+ ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1202
+ }
1203
+ }
1204
+ } else if (uri->scheme != NULL) {
1205
+ if (len + 3 >= max) {
1206
+ temp = xmlSaveUriRealloc(ret, &max);
1207
+ if (temp == NULL) goto mem_error;
1208
+ ret = temp;
1209
+ }
1210
+ }
1211
+ if (uri->path != NULL) {
1212
+ p = uri->path;
1213
+ /*
1214
+ * the colon in file:///d: should not be escaped or
1215
+ * Windows accesses fail later.
1216
+ */
1217
+ if ((uri->scheme != NULL) &&
1218
+ (p[0] == '/') &&
1219
+ (((p[1] >= 'a') && (p[1] <= 'z')) ||
1220
+ ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1221
+ (p[2] == ':') &&
1222
+ (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1223
+ if (len + 3 >= max) {
1224
+ temp = xmlSaveUriRealloc(ret, &max);
1225
+ if (temp == NULL) goto mem_error;
1226
+ ret = temp;
1227
+ }
1228
+ ret[len++] = *p++;
1229
+ ret[len++] = *p++;
1230
+ ret[len++] = *p++;
1231
+ }
1232
+ while (*p != 0) {
1233
+ if (len + 3 >= max) {
1234
+ temp = xmlSaveUriRealloc(ret, &max);
1235
+ if (temp == NULL) goto mem_error;
1236
+ ret = temp;
1237
+ }
1238
+ if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1239
+ ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1240
+ ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1241
+ ((*(p) == ',')))
1242
+ ret[len++] = *p++;
1243
+ else {
1244
+ int val = *(unsigned char *)p++;
1245
+ int hi = val / 0x10, lo = val % 0x10;
1246
+ ret[len++] = '%';
1247
+ ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1248
+ ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1249
+ }
1250
+ }
1251
+ }
1252
+ if (uri->query_raw != NULL) {
1253
+ if (len + 1 >= max) {
1254
+ temp = xmlSaveUriRealloc(ret, &max);
1255
+ if (temp == NULL) goto mem_error;
1256
+ ret = temp;
1257
+ }
1258
+ ret[len++] = '?';
1259
+ p = uri->query_raw;
1260
+ while (*p != 0) {
1261
+ if (len + 1 >= max) {
1262
+ temp = xmlSaveUriRealloc(ret, &max);
1263
+ if (temp == NULL) goto mem_error;
1264
+ ret = temp;
1265
+ }
1266
+ ret[len++] = *p++;
1267
+ }
1268
+ } else if (uri->query != NULL) {
1269
+ if (len + 3 >= max) {
1270
+ temp = xmlSaveUriRealloc(ret, &max);
1271
+ if (temp == NULL) goto mem_error;
1272
+ ret = temp;
1273
+ }
1274
+ ret[len++] = '?';
1275
+ p = uri->query;
1276
+ while (*p != 0) {
1277
+ if (len + 3 >= max) {
1278
+ temp = xmlSaveUriRealloc(ret, &max);
1279
+ if (temp == NULL) goto mem_error;
1280
+ ret = temp;
1281
+ }
1282
+ if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1283
+ ret[len++] = *p++;
1284
+ else {
1285
+ int val = *(unsigned char *)p++;
1286
+ int hi = val / 0x10, lo = val % 0x10;
1287
+ ret[len++] = '%';
1288
+ ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1289
+ ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1290
+ }
1291
+ }
1292
+ }
1293
+ }
1294
+ if (uri->fragment != NULL) {
1295
+ if (len + 3 >= max) {
1296
+ temp = xmlSaveUriRealloc(ret, &max);
1297
+ if (temp == NULL) goto mem_error;
1298
+ ret = temp;
1299
+ }
1300
+ ret[len++] = '#';
1301
+ p = uri->fragment;
1302
+ while (*p != 0) {
1303
+ if (len + 3 >= max) {
1304
+ temp = xmlSaveUriRealloc(ret, &max);
1305
+ if (temp == NULL) goto mem_error;
1306
+ ret = temp;
1307
+ }
1308
+ if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1309
+ ret[len++] = *p++;
1310
+ else {
1311
+ int val = *(unsigned char *)p++;
1312
+ int hi = val / 0x10, lo = val % 0x10;
1313
+ ret[len++] = '%';
1314
+ ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1315
+ ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1316
+ }
1317
+ }
1318
+ }
1319
+ if (len >= max) {
1320
+ temp = xmlSaveUriRealloc(ret, &max);
1321
+ if (temp == NULL) goto mem_error;
1322
+ ret = temp;
1323
+ }
1324
+ ret[len] = 0;
1325
+ return(ret);
1326
+
1327
+ mem_error:
1328
+ xmlFree(ret);
1329
+ return(NULL);
1330
+ }
1331
+
1332
+ /**
1333
+ * xmlPrintURI:
1334
+ * @stream: a FILE* for the output
1335
+ * @uri: pointer to an xmlURI
1336
+ *
1337
+ * Prints the URI in the stream @stream.
1338
+ */
1339
+ void
1340
+ xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1341
+ xmlChar *out;
1342
+
1343
+ out = xmlSaveUri(uri);
1344
+ if (out != NULL) {
1345
+ fprintf(stream, "%s", (char *) out);
1346
+ xmlFree(out);
1347
+ }
1348
+ }
1349
+
1350
+ /**
1351
+ * xmlCleanURI:
1352
+ * @uri: pointer to an xmlURI
1353
+ *
1354
+ * Make sure the xmlURI struct is free of content
1355
+ */
1356
+ static void
1357
+ xmlCleanURI(xmlURIPtr uri) {
1358
+ if (uri == NULL) return;
1359
+
1360
+ if (uri->scheme != NULL) xmlFree(uri->scheme);
1361
+ uri->scheme = NULL;
1362
+ if (uri->server != NULL) xmlFree(uri->server);
1363
+ uri->server = NULL;
1364
+ if (uri->user != NULL) xmlFree(uri->user);
1365
+ uri->user = NULL;
1366
+ if (uri->path != NULL) xmlFree(uri->path);
1367
+ uri->path = NULL;
1368
+ if (uri->fragment != NULL) xmlFree(uri->fragment);
1369
+ uri->fragment = NULL;
1370
+ if (uri->opaque != NULL) xmlFree(uri->opaque);
1371
+ uri->opaque = NULL;
1372
+ if (uri->authority != NULL) xmlFree(uri->authority);
1373
+ uri->authority = NULL;
1374
+ if (uri->query != NULL) xmlFree(uri->query);
1375
+ uri->query = NULL;
1376
+ if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1377
+ uri->query_raw = NULL;
1378
+ }
1379
+
1380
+ /**
1381
+ * xmlFreeURI:
1382
+ * @uri: pointer to an xmlURI
1383
+ *
1384
+ * Free up the xmlURI struct
1385
+ */
1386
+ void
1387
+ xmlFreeURI(xmlURIPtr uri) {
1388
+ if (uri == NULL) return;
1389
+
1390
+ if (uri->scheme != NULL) xmlFree(uri->scheme);
1391
+ if (uri->server != NULL) xmlFree(uri->server);
1392
+ if (uri->user != NULL) xmlFree(uri->user);
1393
+ if (uri->path != NULL) xmlFree(uri->path);
1394
+ if (uri->fragment != NULL) xmlFree(uri->fragment);
1395
+ if (uri->opaque != NULL) xmlFree(uri->opaque);
1396
+ if (uri->authority != NULL) xmlFree(uri->authority);
1397
+ if (uri->query != NULL) xmlFree(uri->query);
1398
+ if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1399
+ xmlFree(uri);
1400
+ }
1401
+
1402
+ /************************************************************************
1403
+ * *
1404
+ * Helper functions *
1405
+ * *
1406
+ ************************************************************************/
1407
+
1408
+ /**
1409
+ * xmlNormalizeURIPath:
1410
+ * @path: pointer to the path string
1411
+ *
1412
+ * Applies the 5 normalization steps to a path string--that is, RFC 2396
1413
+ * Section 5.2, steps 6.c through 6.g.
1414
+ *
1415
+ * Normalization occurs directly on the string, no new allocation is done
1416
+ *
1417
+ * Returns 0 or an error code
1418
+ */
1419
+ int
1420
+ xmlNormalizeURIPath(char *path) {
1421
+ char *cur, *out;
1422
+
1423
+ if (path == NULL)
1424
+ return(-1);
1425
+
1426
+ /* Skip all initial "/" chars. We want to get to the beginning of the
1427
+ * first non-empty segment.
1428
+ */
1429
+ cur = path;
1430
+ while (cur[0] == '/')
1431
+ ++cur;
1432
+ if (cur[0] == '\0')
1433
+ return(0);
1434
+
1435
+ /* Keep everything we've seen so far. */
1436
+ out = cur;
1437
+
1438
+ /*
1439
+ * Analyze each segment in sequence for cases (c) and (d).
1440
+ */
1441
+ while (cur[0] != '\0') {
1442
+ /*
1443
+ * c) All occurrences of "./", where "." is a complete path segment,
1444
+ * are removed from the buffer string.
1445
+ */
1446
+ if ((cur[0] == '.') && (cur[1] == '/')) {
1447
+ cur += 2;
1448
+ /* '//' normalization should be done at this point too */
1449
+ while (cur[0] == '/')
1450
+ cur++;
1451
+ continue;
1452
+ }
1453
+
1454
+ /*
1455
+ * d) If the buffer string ends with "." as a complete path segment,
1456
+ * that "." is removed.
1457
+ */
1458
+ if ((cur[0] == '.') && (cur[1] == '\0'))
1459
+ break;
1460
+
1461
+ /* Otherwise keep the segment. */
1462
+ while (cur[0] != '/') {
1463
+ if (cur[0] == '\0')
1464
+ goto done_cd;
1465
+ (out++)[0] = (cur++)[0];
1466
+ }
1467
+ /* normalize // */
1468
+ while ((cur[0] == '/') && (cur[1] == '/'))
1469
+ cur++;
1470
+
1471
+ (out++)[0] = (cur++)[0];
1472
+ }
1473
+ done_cd:
1474
+ out[0] = '\0';
1475
+
1476
+ /* Reset to the beginning of the first segment for the next sequence. */
1477
+ cur = path;
1478
+ while (cur[0] == '/')
1479
+ ++cur;
1480
+ if (cur[0] == '\0')
1481
+ return(0);
1482
+
1483
+ /*
1484
+ * Analyze each segment in sequence for cases (e) and (f).
1485
+ *
1486
+ * e) All occurrences of "<segment>/../", where <segment> is a
1487
+ * complete path segment not equal to "..", are removed from the
1488
+ * buffer string. Removal of these path segments is performed
1489
+ * iteratively, removing the leftmost matching pattern on each
1490
+ * iteration, until no matching pattern remains.
1491
+ *
1492
+ * f) If the buffer string ends with "<segment>/..", where <segment>
1493
+ * is a complete path segment not equal to "..", that
1494
+ * "<segment>/.." is removed.
1495
+ *
1496
+ * To satisfy the "iterative" clause in (e), we need to collapse the
1497
+ * string every time we find something that needs to be removed. Thus,
1498
+ * we don't need to keep two pointers into the string: we only need a
1499
+ * "current position" pointer.
1500
+ */
1501
+ while (1) {
1502
+ char *segp, *tmp;
1503
+
1504
+ /* At the beginning of each iteration of this loop, "cur" points to
1505
+ * the first character of the segment we want to examine.
1506
+ */
1507
+
1508
+ /* Find the end of the current segment. */
1509
+ segp = cur;
1510
+ while ((segp[0] != '/') && (segp[0] != '\0'))
1511
+ ++segp;
1512
+
1513
+ /* If this is the last segment, we're done (we need at least two
1514
+ * segments to meet the criteria for the (e) and (f) cases).
1515
+ */
1516
+ if (segp[0] == '\0')
1517
+ break;
1518
+
1519
+ /* If the first segment is "..", or if the next segment _isn't_ "..",
1520
+ * keep this segment and try the next one.
1521
+ */
1522
+ ++segp;
1523
+ if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1524
+ || ((segp[0] != '.') || (segp[1] != '.')
1525
+ || ((segp[2] != '/') && (segp[2] != '\0')))) {
1526
+ cur = segp;
1527
+ continue;
1528
+ }
1529
+
1530
+ /* If we get here, remove this segment and the next one and back up
1531
+ * to the previous segment (if there is one), to implement the
1532
+ * "iteratively" clause. It's pretty much impossible to back up
1533
+ * while maintaining two pointers into the buffer, so just compact
1534
+ * the whole buffer now.
1535
+ */
1536
+
1537
+ /* If this is the end of the buffer, we're done. */
1538
+ if (segp[2] == '\0') {
1539
+ cur[0] = '\0';
1540
+ break;
1541
+ }
1542
+ /* Valgrind complained, strcpy(cur, segp + 3); */
1543
+ /* string will overlap, do not use strcpy */
1544
+ tmp = cur;
1545
+ segp += 3;
1546
+ while ((*tmp++ = *segp++) != 0)
1547
+ ;
1548
+
1549
+ /* If there are no previous segments, then keep going from here. */
1550
+ segp = cur;
1551
+ while ((segp > path) && ((--segp)[0] == '/'))
1552
+ ;
1553
+ if (segp == path)
1554
+ continue;
1555
+
1556
+ /* "segp" is pointing to the end of a previous segment; find it's
1557
+ * start. We need to back up to the previous segment and start
1558
+ * over with that to handle things like "foo/bar/../..". If we
1559
+ * don't do this, then on the first pass we'll remove the "bar/..",
1560
+ * but be pointing at the second ".." so we won't realize we can also
1561
+ * remove the "foo/..".
1562
+ */
1563
+ cur = segp;
1564
+ while ((cur > path) && (cur[-1] != '/'))
1565
+ --cur;
1566
+ }
1567
+ out[0] = '\0';
1568
+
1569
+ /*
1570
+ * g) If the resulting buffer string still begins with one or more
1571
+ * complete path segments of "..", then the reference is
1572
+ * considered to be in error. Implementations may handle this
1573
+ * error by retaining these components in the resolved path (i.e.,
1574
+ * treating them as part of the final URI), by removing them from
1575
+ * the resolved path (i.e., discarding relative levels above the
1576
+ * root), or by avoiding traversal of the reference.
1577
+ *
1578
+ * We discard them from the final path.
1579
+ */
1580
+ if (path[0] == '/') {
1581
+ cur = path;
1582
+ while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1583
+ && ((cur[3] == '/') || (cur[3] == '\0')))
1584
+ cur += 3;
1585
+
1586
+ if (cur != path) {
1587
+ out = path;
1588
+ while (cur[0] != '\0')
1589
+ (out++)[0] = (cur++)[0];
1590
+ out[0] = 0;
1591
+ }
1592
+ }
1593
+
1594
+ return(0);
1595
+ }
1596
+
1597
+ static int is_hex(char c) {
1598
+ if (((c >= '0') && (c <= '9')) ||
1599
+ ((c >= 'a') && (c <= 'f')) ||
1600
+ ((c >= 'A') && (c <= 'F')))
1601
+ return(1);
1602
+ return(0);
1603
+ }
1604
+
1605
+ /**
1606
+ * xmlURIUnescapeString:
1607
+ * @str: the string to unescape
1608
+ * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1609
+ * @target: optional destination buffer
1610
+ *
1611
+ * Unescaping routine, but does not check that the string is an URI. The
1612
+ * output is a direct unsigned char translation of %XX values (no encoding)
1613
+ * Note that the length of the result can only be smaller or same size as
1614
+ * the input string.
1615
+ *
1616
+ * Returns a copy of the string, but unescaped, will return NULL only in case
1617
+ * of error
1618
+ */
1619
+ char *
1620
+ xmlURIUnescapeString(const char *str, int len, char *target) {
1621
+ char *ret, *out;
1622
+ const char *in;
1623
+
1624
+ if (str == NULL)
1625
+ return(NULL);
1626
+ if (len <= 0) len = strlen(str);
1627
+ if (len < 0) return(NULL);
1628
+
1629
+ if (target == NULL) {
1630
+ ret = (char *) xmlMallocAtomic(len + 1);
1631
+ if (ret == NULL) {
1632
+ xmlURIErrMemory("unescaping URI value\n");
1633
+ return(NULL);
1634
+ }
1635
+ } else
1636
+ ret = target;
1637
+ in = str;
1638
+ out = ret;
1639
+ while(len > 0) {
1640
+ if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1641
+ in++;
1642
+ if ((*in >= '0') && (*in <= '9'))
1643
+ *out = (*in - '0');
1644
+ else if ((*in >= 'a') && (*in <= 'f'))
1645
+ *out = (*in - 'a') + 10;
1646
+ else if ((*in >= 'A') && (*in <= 'F'))
1647
+ *out = (*in - 'A') + 10;
1648
+ in++;
1649
+ if ((*in >= '0') && (*in <= '9'))
1650
+ *out = *out * 16 + (*in - '0');
1651
+ else if ((*in >= 'a') && (*in <= 'f'))
1652
+ *out = *out * 16 + (*in - 'a') + 10;
1653
+ else if ((*in >= 'A') && (*in <= 'F'))
1654
+ *out = *out * 16 + (*in - 'A') + 10;
1655
+ in++;
1656
+ len -= 3;
1657
+ out++;
1658
+ } else {
1659
+ *out++ = *in++;
1660
+ len--;
1661
+ }
1662
+ }
1663
+ *out = 0;
1664
+ return(ret);
1665
+ }
1666
+
1667
+ /**
1668
+ * xmlURIEscapeStr:
1669
+ * @str: string to escape
1670
+ * @list: exception list string of chars not to escape
1671
+ *
1672
+ * This routine escapes a string to hex, ignoring reserved characters (a-z)
1673
+ * and the characters in the exception list.
1674
+ *
1675
+ * Returns a new escaped string or NULL in case of error.
1676
+ */
1677
+ xmlChar *
1678
+ xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1679
+ xmlChar *ret, ch;
1680
+ xmlChar *temp;
1681
+ const xmlChar *in;
1682
+ int len, out;
1683
+
1684
+ if (str == NULL)
1685
+ return(NULL);
1686
+ if (str[0] == 0)
1687
+ return(xmlStrdup(str));
1688
+ len = xmlStrlen(str);
1689
+ if (!(len > 0)) return(NULL);
1690
+
1691
+ len += 20;
1692
+ ret = (xmlChar *) xmlMallocAtomic(len);
1693
+ if (ret == NULL) {
1694
+ xmlURIErrMemory("escaping URI value\n");
1695
+ return(NULL);
1696
+ }
1697
+ in = (const xmlChar *) str;
1698
+ out = 0;
1699
+ while(*in != 0) {
1700
+ if (len - out <= 3) {
1701
+ temp = xmlSaveUriRealloc(ret, &len);
1702
+ if (temp == NULL) {
1703
+ xmlURIErrMemory("escaping URI value\n");
1704
+ xmlFree(ret);
1705
+ return(NULL);
1706
+ }
1707
+ ret = temp;
1708
+ }
1709
+
1710
+ ch = *in;
1711
+
1712
+ if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1713
+ unsigned char val;
1714
+ ret[out++] = '%';
1715
+ val = ch >> 4;
1716
+ if (val <= 9)
1717
+ ret[out++] = '0' + val;
1718
+ else
1719
+ ret[out++] = 'A' + val - 0xA;
1720
+ val = ch & 0xF;
1721
+ if (val <= 9)
1722
+ ret[out++] = '0' + val;
1723
+ else
1724
+ ret[out++] = 'A' + val - 0xA;
1725
+ in++;
1726
+ } else {
1727
+ ret[out++] = *in++;
1728
+ }
1729
+
1730
+ }
1731
+ ret[out] = 0;
1732
+ return(ret);
1733
+ }
1734
+
1735
+ /**
1736
+ * xmlURIEscape:
1737
+ * @str: the string of the URI to escape
1738
+ *
1739
+ * Escaping routine, does not do validity checks !
1740
+ * It will try to escape the chars needing this, but this is heuristic
1741
+ * based it's impossible to be sure.
1742
+ *
1743
+ * Returns an copy of the string, but escaped
1744
+ *
1745
+ * 25 May 2001
1746
+ * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1747
+ * according to RFC2396.
1748
+ * - Carl Douglas
1749
+ */
1750
+ xmlChar *
1751
+ xmlURIEscape(const xmlChar * str)
1752
+ {
1753
+ xmlChar *ret, *segment = NULL;
1754
+ xmlURIPtr uri;
1755
+ int ret2;
1756
+
1757
+ if (str == NULL)
1758
+ return (NULL);
1759
+
1760
+ uri = xmlCreateURI();
1761
+ if (uri != NULL) {
1762
+ /*
1763
+ * Allow escaping errors in the unescaped form
1764
+ */
1765
+ uri->cleanup = 1;
1766
+ ret2 = xmlParseURIReference(uri, (const char *)str);
1767
+ if (ret2) {
1768
+ xmlFreeURI(uri);
1769
+ return (NULL);
1770
+ }
1771
+ }
1772
+
1773
+ if (!uri)
1774
+ return NULL;
1775
+
1776
+ ret = NULL;
1777
+
1778
+ #define NULLCHK(p) if(!p) { \
1779
+ xmlURIErrMemory("escaping URI value\n"); \
1780
+ xmlFreeURI(uri); \
1781
+ xmlFree(ret); \
1782
+ return NULL; } \
1783
+
1784
+ if (uri->scheme) {
1785
+ segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1786
+ NULLCHK(segment)
1787
+ ret = xmlStrcat(ret, segment);
1788
+ ret = xmlStrcat(ret, BAD_CAST ":");
1789
+ xmlFree(segment);
1790
+ }
1791
+
1792
+ if (uri->authority) {
1793
+ segment =
1794
+ xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1795
+ NULLCHK(segment)
1796
+ ret = xmlStrcat(ret, BAD_CAST "//");
1797
+ ret = xmlStrcat(ret, segment);
1798
+ xmlFree(segment);
1799
+ }
1800
+
1801
+ if (uri->user) {
1802
+ segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1803
+ NULLCHK(segment)
1804
+ ret = xmlStrcat(ret,BAD_CAST "//");
1805
+ ret = xmlStrcat(ret, segment);
1806
+ ret = xmlStrcat(ret, BAD_CAST "@");
1807
+ xmlFree(segment);
1808
+ }
1809
+
1810
+ if (uri->server) {
1811
+ segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1812
+ NULLCHK(segment)
1813
+ if (uri->user == NULL)
1814
+ ret = xmlStrcat(ret, BAD_CAST "//");
1815
+ ret = xmlStrcat(ret, segment);
1816
+ xmlFree(segment);
1817
+ }
1818
+
1819
+ if (uri->port) {
1820
+ xmlChar port[10];
1821
+
1822
+ snprintf((char *) port, 10, "%d", uri->port);
1823
+ ret = xmlStrcat(ret, BAD_CAST ":");
1824
+ ret = xmlStrcat(ret, port);
1825
+ }
1826
+
1827
+ if (uri->path) {
1828
+ segment =
1829
+ xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1830
+ NULLCHK(segment)
1831
+ ret = xmlStrcat(ret, segment);
1832
+ xmlFree(segment);
1833
+ }
1834
+
1835
+ if (uri->query_raw) {
1836
+ ret = xmlStrcat(ret, BAD_CAST "?");
1837
+ ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1838
+ }
1839
+ else if (uri->query) {
1840
+ segment =
1841
+ xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1842
+ NULLCHK(segment)
1843
+ ret = xmlStrcat(ret, BAD_CAST "?");
1844
+ ret = xmlStrcat(ret, segment);
1845
+ xmlFree(segment);
1846
+ }
1847
+
1848
+ if (uri->opaque) {
1849
+ segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1850
+ NULLCHK(segment)
1851
+ ret = xmlStrcat(ret, segment);
1852
+ xmlFree(segment);
1853
+ }
1854
+
1855
+ if (uri->fragment) {
1856
+ segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1857
+ NULLCHK(segment)
1858
+ ret = xmlStrcat(ret, BAD_CAST "#");
1859
+ ret = xmlStrcat(ret, segment);
1860
+ xmlFree(segment);
1861
+ }
1862
+
1863
+ xmlFreeURI(uri);
1864
+ #undef NULLCHK
1865
+
1866
+ return (ret);
1867
+ }
1868
+
1869
+ /************************************************************************
1870
+ * *
1871
+ * Public functions *
1872
+ * *
1873
+ ************************************************************************/
1874
+
1875
+ /**
1876
+ * xmlBuildURI:
1877
+ * @URI: the URI instance found in the document
1878
+ * @base: the base value
1879
+ *
1880
+ * Computes he final URI of the reference done by checking that
1881
+ * the given URI is valid, and building the final URI using the
1882
+ * base URI. This is processed according to section 5.2 of the
1883
+ * RFC 2396
1884
+ *
1885
+ * 5.2. Resolving Relative References to Absolute Form
1886
+ *
1887
+ * Returns a new URI string (to be freed by the caller) or NULL in case
1888
+ * of error.
1889
+ */
1890
+ xmlChar *
1891
+ xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1892
+ xmlChar *val = NULL;
1893
+ int ret, len, indx, cur, out;
1894
+ xmlURIPtr ref = NULL;
1895
+ xmlURIPtr bas = NULL;
1896
+ xmlURIPtr res = NULL;
1897
+
1898
+ /*
1899
+ * 1) The URI reference is parsed into the potential four components and
1900
+ * fragment identifier, as described in Section 4.3.
1901
+ *
1902
+ * NOTE that a completely empty URI is treated by modern browsers
1903
+ * as a reference to "." rather than as a synonym for the current
1904
+ * URI. Should we do that here?
1905
+ */
1906
+ if (URI == NULL)
1907
+ ret = -1;
1908
+ else {
1909
+ if (*URI) {
1910
+ ref = xmlCreateURI();
1911
+ if (ref == NULL)
1912
+ goto done;
1913
+ ret = xmlParseURIReference(ref, (const char *) URI);
1914
+ }
1915
+ else
1916
+ ret = 0;
1917
+ }
1918
+ if (ret != 0)
1919
+ goto done;
1920
+ if ((ref != NULL) && (ref->scheme != NULL)) {
1921
+ /*
1922
+ * The URI is absolute don't modify.
1923
+ */
1924
+ val = xmlStrdup(URI);
1925
+ goto done;
1926
+ }
1927
+ if (base == NULL)
1928
+ ret = -1;
1929
+ else {
1930
+ bas = xmlCreateURI();
1931
+ if (bas == NULL)
1932
+ goto done;
1933
+ ret = xmlParseURIReference(bas, (const char *) base);
1934
+ }
1935
+ if (ret != 0) {
1936
+ if (ref)
1937
+ val = xmlSaveUri(ref);
1938
+ goto done;
1939
+ }
1940
+ if (ref == NULL) {
1941
+ /*
1942
+ * the base fragment must be ignored
1943
+ */
1944
+ if (bas->fragment != NULL) {
1945
+ xmlFree(bas->fragment);
1946
+ bas->fragment = NULL;
1947
+ }
1948
+ val = xmlSaveUri(bas);
1949
+ goto done;
1950
+ }
1951
+
1952
+ /*
1953
+ * 2) If the path component is empty and the scheme, authority, and
1954
+ * query components are undefined, then it is a reference to the
1955
+ * current document and we are done. Otherwise, the reference URI's
1956
+ * query and fragment components are defined as found (or not found)
1957
+ * within the URI reference and not inherited from the base URI.
1958
+ *
1959
+ * NOTE that in modern browsers, the parsing differs from the above
1960
+ * in the following aspect: the query component is allowed to be
1961
+ * defined while still treating this as a reference to the current
1962
+ * document.
1963
+ */
1964
+ res = xmlCreateURI();
1965
+ if (res == NULL)
1966
+ goto done;
1967
+ if ((ref->scheme == NULL) && (ref->path == NULL) &&
1968
+ ((ref->authority == NULL) && (ref->server == NULL))) {
1969
+ if (bas->scheme != NULL)
1970
+ res->scheme = xmlMemStrdup(bas->scheme);
1971
+ if (bas->authority != NULL)
1972
+ res->authority = xmlMemStrdup(bas->authority);
1973
+ else if ((bas->server != NULL) || (bas->port == -1)) {
1974
+ if (bas->server != NULL)
1975
+ res->server = xmlMemStrdup(bas->server);
1976
+ if (bas->user != NULL)
1977
+ res->user = xmlMemStrdup(bas->user);
1978
+ res->port = bas->port;
1979
+ }
1980
+ if (bas->path != NULL)
1981
+ res->path = xmlMemStrdup(bas->path);
1982
+ if (ref->query_raw != NULL)
1983
+ res->query_raw = xmlMemStrdup (ref->query_raw);
1984
+ else if (ref->query != NULL)
1985
+ res->query = xmlMemStrdup(ref->query);
1986
+ else if (bas->query_raw != NULL)
1987
+ res->query_raw = xmlMemStrdup(bas->query_raw);
1988
+ else if (bas->query != NULL)
1989
+ res->query = xmlMemStrdup(bas->query);
1990
+ if (ref->fragment != NULL)
1991
+ res->fragment = xmlMemStrdup(ref->fragment);
1992
+ goto step_7;
1993
+ }
1994
+
1995
+ /*
1996
+ * 3) If the scheme component is defined, indicating that the reference
1997
+ * starts with a scheme name, then the reference is interpreted as an
1998
+ * absolute URI and we are done. Otherwise, the reference URI's
1999
+ * scheme is inherited from the base URI's scheme component.
2000
+ */
2001
+ if (ref->scheme != NULL) {
2002
+ val = xmlSaveUri(ref);
2003
+ goto done;
2004
+ }
2005
+ if (bas->scheme != NULL)
2006
+ res->scheme = xmlMemStrdup(bas->scheme);
2007
+
2008
+ if (ref->query_raw != NULL)
2009
+ res->query_raw = xmlMemStrdup(ref->query_raw);
2010
+ else if (ref->query != NULL)
2011
+ res->query = xmlMemStrdup(ref->query);
2012
+ if (ref->fragment != NULL)
2013
+ res->fragment = xmlMemStrdup(ref->fragment);
2014
+
2015
+ /*
2016
+ * 4) If the authority component is defined, then the reference is a
2017
+ * network-path and we skip to step 7. Otherwise, the reference
2018
+ * URI's authority is inherited from the base URI's authority
2019
+ * component, which will also be undefined if the URI scheme does not
2020
+ * use an authority component.
2021
+ */
2022
+ if ((ref->authority != NULL) || (ref->server != NULL)) {
2023
+ if (ref->authority != NULL)
2024
+ res->authority = xmlMemStrdup(ref->authority);
2025
+ else {
2026
+ res->server = xmlMemStrdup(ref->server);
2027
+ if (ref->user != NULL)
2028
+ res->user = xmlMemStrdup(ref->user);
2029
+ res->port = ref->port;
2030
+ }
2031
+ if (ref->path != NULL)
2032
+ res->path = xmlMemStrdup(ref->path);
2033
+ goto step_7;
2034
+ }
2035
+ if (bas->authority != NULL)
2036
+ res->authority = xmlMemStrdup(bas->authority);
2037
+ else if ((bas->server != NULL) || (bas->port == -1)) {
2038
+ if (bas->server != NULL)
2039
+ res->server = xmlMemStrdup(bas->server);
2040
+ if (bas->user != NULL)
2041
+ res->user = xmlMemStrdup(bas->user);
2042
+ res->port = bas->port;
2043
+ }
2044
+
2045
+ /*
2046
+ * 5) If the path component begins with a slash character ("/"), then
2047
+ * the reference is an absolute-path and we skip to step 7.
2048
+ */
2049
+ if ((ref->path != NULL) && (ref->path[0] == '/')) {
2050
+ res->path = xmlMemStrdup(ref->path);
2051
+ goto step_7;
2052
+ }
2053
+
2054
+
2055
+ /*
2056
+ * 6) If this step is reached, then we are resolving a relative-path
2057
+ * reference. The relative path needs to be merged with the base
2058
+ * URI's path. Although there are many ways to do this, we will
2059
+ * describe a simple method using a separate string buffer.
2060
+ *
2061
+ * Allocate a buffer large enough for the result string.
2062
+ */
2063
+ len = 2; /* extra / and 0 */
2064
+ if (ref->path != NULL)
2065
+ len += strlen(ref->path);
2066
+ if (bas->path != NULL)
2067
+ len += strlen(bas->path);
2068
+ res->path = (char *) xmlMallocAtomic(len);
2069
+ if (res->path == NULL) {
2070
+ xmlURIErrMemory("resolving URI against base\n");
2071
+ goto done;
2072
+ }
2073
+ res->path[0] = 0;
2074
+
2075
+ /*
2076
+ * a) All but the last segment of the base URI's path component is
2077
+ * copied to the buffer. In other words, any characters after the
2078
+ * last (right-most) slash character, if any, are excluded.
2079
+ */
2080
+ cur = 0;
2081
+ out = 0;
2082
+ if (bas->path != NULL) {
2083
+ while (bas->path[cur] != 0) {
2084
+ while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2085
+ cur++;
2086
+ if (bas->path[cur] == 0)
2087
+ break;
2088
+
2089
+ cur++;
2090
+ while (out < cur) {
2091
+ res->path[out] = bas->path[out];
2092
+ out++;
2093
+ }
2094
+ }
2095
+ }
2096
+ res->path[out] = 0;
2097
+
2098
+ /*
2099
+ * b) The reference's path component is appended to the buffer
2100
+ * string.
2101
+ */
2102
+ if (ref->path != NULL && ref->path[0] != 0) {
2103
+ indx = 0;
2104
+ /*
2105
+ * Ensure the path includes a '/'
2106
+ */
2107
+ if ((out == 0) && (bas->server != NULL))
2108
+ res->path[out++] = '/';
2109
+ while (ref->path[indx] != 0) {
2110
+ res->path[out++] = ref->path[indx++];
2111
+ }
2112
+ }
2113
+ res->path[out] = 0;
2114
+
2115
+ /*
2116
+ * Steps c) to h) are really path normalization steps
2117
+ */
2118
+ xmlNormalizeURIPath(res->path);
2119
+
2120
+ step_7:
2121
+
2122
+ /*
2123
+ * 7) The resulting URI components, including any inherited from the
2124
+ * base URI, are recombined to give the absolute form of the URI
2125
+ * reference.
2126
+ */
2127
+ val = xmlSaveUri(res);
2128
+
2129
+ done:
2130
+ if (ref != NULL)
2131
+ xmlFreeURI(ref);
2132
+ if (bas != NULL)
2133
+ xmlFreeURI(bas);
2134
+ if (res != NULL)
2135
+ xmlFreeURI(res);
2136
+ return(val);
2137
+ }
2138
+
2139
+ /**
2140
+ * xmlBuildRelativeURI:
2141
+ * @URI: the URI reference under consideration
2142
+ * @base: the base value
2143
+ *
2144
+ * Expresses the URI of the reference in terms relative to the
2145
+ * base. Some examples of this operation include:
2146
+ * base = "http://site1.com/docs/book1.html"
2147
+ * URI input URI returned
2148
+ * docs/pic1.gif pic1.gif
2149
+ * docs/img/pic1.gif img/pic1.gif
2150
+ * img/pic1.gif ../img/pic1.gif
2151
+ * http://site1.com/docs/pic1.gif pic1.gif
2152
+ * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2153
+ *
2154
+ * base = "docs/book1.html"
2155
+ * URI input URI returned
2156
+ * docs/pic1.gif pic1.gif
2157
+ * docs/img/pic1.gif img/pic1.gif
2158
+ * img/pic1.gif ../img/pic1.gif
2159
+ * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2160
+ *
2161
+ *
2162
+ * Note: if the URI reference is really weird or complicated, it may be
2163
+ * worthwhile to first convert it into a "nice" one by calling
2164
+ * xmlBuildURI (using 'base') before calling this routine,
2165
+ * since this routine (for reasonable efficiency) assumes URI has
2166
+ * already been through some validation.
2167
+ *
2168
+ * Returns a new URI string (to be freed by the caller) or NULL in case
2169
+ * error.
2170
+ */
2171
+ xmlChar *
2172
+ xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2173
+ {
2174
+ xmlChar *val = NULL;
2175
+ int ret;
2176
+ int ix;
2177
+ int nbslash = 0;
2178
+ int len;
2179
+ xmlURIPtr ref = NULL;
2180
+ xmlURIPtr bas = NULL;
2181
+ xmlChar *bptr, *uptr, *vptr;
2182
+ int remove_path = 0;
2183
+
2184
+ if ((URI == NULL) || (*URI == 0))
2185
+ return NULL;
2186
+
2187
+ /*
2188
+ * First parse URI into a standard form
2189
+ */
2190
+ ref = xmlCreateURI ();
2191
+ if (ref == NULL)
2192
+ return NULL;
2193
+ /* If URI not already in "relative" form */
2194
+ if (URI[0] != '.') {
2195
+ ret = xmlParseURIReference (ref, (const char *) URI);
2196
+ if (ret != 0)
2197
+ goto done; /* Error in URI, return NULL */
2198
+ } else
2199
+ ref->path = (char *)xmlStrdup(URI);
2200
+
2201
+ /*
2202
+ * Next parse base into the same standard form
2203
+ */
2204
+ if ((base == NULL) || (*base == 0)) {
2205
+ val = xmlStrdup (URI);
2206
+ goto done;
2207
+ }
2208
+ bas = xmlCreateURI ();
2209
+ if (bas == NULL)
2210
+ goto done;
2211
+ if (base[0] != '.') {
2212
+ ret = xmlParseURIReference (bas, (const char *) base);
2213
+ if (ret != 0)
2214
+ goto done; /* Error in base, return NULL */
2215
+ } else
2216
+ bas->path = (char *)xmlStrdup(base);
2217
+
2218
+ /*
2219
+ * If the scheme / server on the URI differs from the base,
2220
+ * just return the URI
2221
+ */
2222
+ if ((ref->scheme != NULL) &&
2223
+ ((bas->scheme == NULL) ||
2224
+ (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2225
+ (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2226
+ val = xmlStrdup (URI);
2227
+ goto done;
2228
+ }
2229
+ if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2230
+ val = xmlStrdup(BAD_CAST "");
2231
+ goto done;
2232
+ }
2233
+ if (bas->path == NULL) {
2234
+ val = xmlStrdup((xmlChar *)ref->path);
2235
+ goto done;
2236
+ }
2237
+ if (ref->path == NULL) {
2238
+ ref->path = (char *) "/";
2239
+ remove_path = 1;
2240
+ }
2241
+
2242
+ /*
2243
+ * At this point (at last!) we can compare the two paths
2244
+ *
2245
+ * First we take care of the special case where either of the
2246
+ * two path components may be missing (bug 316224)
2247
+ */
2248
+ bptr = (xmlChar *)bas->path;
2249
+ {
2250
+ xmlChar *rptr = (xmlChar *) ref->path;
2251
+ int pos = 0;
2252
+
2253
+ /*
2254
+ * Next we compare the two strings and find where they first differ
2255
+ */
2256
+ if ((*rptr == '.') && (rptr[1] == '/'))
2257
+ rptr += 2;
2258
+ if ((*bptr == '.') && (bptr[1] == '/'))
2259
+ bptr += 2;
2260
+ else if ((*bptr == '/') && (*rptr != '/'))
2261
+ bptr++;
2262
+ while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2263
+ pos++;
2264
+
2265
+ if (bptr[pos] == rptr[pos]) {
2266
+ val = xmlStrdup(BAD_CAST "");
2267
+ goto done; /* (I can't imagine why anyone would do this) */
2268
+ }
2269
+
2270
+ /*
2271
+ * In URI, "back up" to the last '/' encountered. This will be the
2272
+ * beginning of the "unique" suffix of URI
2273
+ */
2274
+ ix = pos;
2275
+ for (; ix > 0; ix--) {
2276
+ if (rptr[ix - 1] == '/')
2277
+ break;
2278
+ }
2279
+ uptr = (xmlChar *)&rptr[ix];
2280
+
2281
+ /*
2282
+ * In base, count the number of '/' from the differing point
2283
+ */
2284
+ for (; bptr[ix] != 0; ix++) {
2285
+ if (bptr[ix] == '/')
2286
+ nbslash++;
2287
+ }
2288
+
2289
+ /*
2290
+ * e.g: URI="foo/" base="foo/bar" -> "./"
2291
+ */
2292
+ if (nbslash == 0 && !uptr[0]) {
2293
+ val = xmlStrdup(BAD_CAST "./");
2294
+ goto done;
2295
+ }
2296
+
2297
+ len = xmlStrlen (uptr) + 1;
2298
+ }
2299
+
2300
+ if (nbslash == 0) {
2301
+ if (uptr != NULL)
2302
+ /* exception characters from xmlSaveUri */
2303
+ val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2304
+ goto done;
2305
+ }
2306
+
2307
+ /*
2308
+ * Allocate just enough space for the returned string -
2309
+ * length of the remainder of the URI, plus enough space
2310
+ * for the "../" groups, plus one for the terminator
2311
+ */
2312
+ val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2313
+ if (val == NULL) {
2314
+ xmlURIErrMemory("building relative URI\n");
2315
+ goto done;
2316
+ }
2317
+ vptr = val;
2318
+ /*
2319
+ * Put in as many "../" as needed
2320
+ */
2321
+ for (; nbslash>0; nbslash--) {
2322
+ *vptr++ = '.';
2323
+ *vptr++ = '.';
2324
+ *vptr++ = '/';
2325
+ }
2326
+ /*
2327
+ * Finish up with the end of the URI
2328
+ */
2329
+ if (uptr != NULL) {
2330
+ if ((vptr > val) && (len > 0) &&
2331
+ (uptr[0] == '/') && (vptr[-1] == '/')) {
2332
+ memcpy (vptr, uptr + 1, len - 1);
2333
+ vptr[len - 2] = 0;
2334
+ } else {
2335
+ memcpy (vptr, uptr, len);
2336
+ vptr[len - 1] = 0;
2337
+ }
2338
+ } else {
2339
+ vptr[len - 1] = 0;
2340
+ }
2341
+
2342
+ /* escape the freshly-built path */
2343
+ vptr = val;
2344
+ /* exception characters from xmlSaveUri */
2345
+ val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2346
+ xmlFree(vptr);
2347
+
2348
+ done:
2349
+ /*
2350
+ * Free the working variables
2351
+ */
2352
+ if (remove_path != 0)
2353
+ ref->path = NULL;
2354
+ if (ref != NULL)
2355
+ xmlFreeURI (ref);
2356
+ if (bas != NULL)
2357
+ xmlFreeURI (bas);
2358
+
2359
+ return val;
2360
+ }
2361
+
2362
+ /**
2363
+ * xmlCanonicPath:
2364
+ * @path: the resource locator in a filesystem notation
2365
+ *
2366
+ * Constructs a canonic path from the specified path.
2367
+ *
2368
+ * Returns a new canonic path, or a duplicate of the path parameter if the
2369
+ * construction fails. The caller is responsible for freeing the memory occupied
2370
+ * by the returned string. If there is insufficient memory available, or the
2371
+ * argument is NULL, the function returns NULL.
2372
+ */
2373
+ #define IS_WINDOWS_PATH(p) \
2374
+ ((p != NULL) && \
2375
+ (((p[0] >= 'a') && (p[0] <= 'z')) || \
2376
+ ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2377
+ (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2378
+ xmlChar *
2379
+ xmlCanonicPath(const xmlChar *path)
2380
+ {
2381
+ /*
2382
+ * For Windows implementations, additional work needs to be done to
2383
+ * replace backslashes in pathnames with "forward slashes"
2384
+ */
2385
+ #if defined(_WIN32) && !defined(__CYGWIN__)
2386
+ int len = 0;
2387
+ char *p = NULL;
2388
+ #endif
2389
+ xmlURIPtr uri;
2390
+ xmlChar *ret;
2391
+ const xmlChar *absuri;
2392
+
2393
+ if (path == NULL)
2394
+ return(NULL);
2395
+
2396
+ #if defined(_WIN32)
2397
+ /*
2398
+ * We must not change the backslashes to slashes if the the path
2399
+ * starts with \\?\
2400
+ * Those paths can be up to 32k characters long.
2401
+ * Was added specifically for OpenOffice, those paths can't be converted
2402
+ * to URIs anyway.
2403
+ */
2404
+ if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2405
+ (path[3] == '\\') )
2406
+ return xmlStrdup((const xmlChar *) path);
2407
+ #endif
2408
+
2409
+ /* sanitize filename starting with // so it can be used as URI */
2410
+ if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2411
+ path++;
2412
+
2413
+ if ((uri = xmlParseURI((const char *) path)) != NULL) {
2414
+ xmlFreeURI(uri);
2415
+ return xmlStrdup(path);
2416
+ }
2417
+
2418
+ /* Check if this is an "absolute uri" */
2419
+ absuri = xmlStrstr(path, BAD_CAST "://");
2420
+ if (absuri != NULL) {
2421
+ int l, j;
2422
+ unsigned char c;
2423
+ xmlChar *escURI;
2424
+
2425
+ /*
2426
+ * this looks like an URI where some parts have not been
2427
+ * escaped leading to a parsing problem. Check that the first
2428
+ * part matches a protocol.
2429
+ */
2430
+ l = absuri - path;
2431
+ /* Bypass if first part (part before the '://') is > 20 chars */
2432
+ if ((l <= 0) || (l > 20))
2433
+ goto path_processing;
2434
+ /* Bypass if any non-alpha characters are present in first part */
2435
+ for (j = 0;j < l;j++) {
2436
+ c = path[j];
2437
+ if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2438
+ goto path_processing;
2439
+ }
2440
+
2441
+ /* Escape all except the characters specified in the supplied path */
2442
+ escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2443
+ if (escURI != NULL) {
2444
+ /* Try parsing the escaped path */
2445
+ uri = xmlParseURI((const char *) escURI);
2446
+ /* If successful, return the escaped string */
2447
+ if (uri != NULL) {
2448
+ xmlFreeURI(uri);
2449
+ return escURI;
2450
+ }
2451
+ xmlFree(escURI);
2452
+ }
2453
+ }
2454
+
2455
+ path_processing:
2456
+ /* For Windows implementations, replace backslashes with 'forward slashes' */
2457
+ #if defined(_WIN32) && !defined(__CYGWIN__)
2458
+ /*
2459
+ * Create a URI structure
2460
+ */
2461
+ uri = xmlCreateURI();
2462
+ if (uri == NULL) { /* Guard against 'out of memory' */
2463
+ return(NULL);
2464
+ }
2465
+
2466
+ len = xmlStrlen(path);
2467
+ if ((len > 2) && IS_WINDOWS_PATH(path)) {
2468
+ /* make the scheme 'file' */
2469
+ uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2470
+ /* allocate space for leading '/' + path + string terminator */
2471
+ uri->path = xmlMallocAtomic(len + 2);
2472
+ if (uri->path == NULL) {
2473
+ xmlFreeURI(uri); /* Guard against 'out of memory' */
2474
+ return(NULL);
2475
+ }
2476
+ /* Put in leading '/' plus path */
2477
+ uri->path[0] = '/';
2478
+ p = uri->path + 1;
2479
+ strncpy(p, (char *) path, len + 1);
2480
+ } else {
2481
+ uri->path = (char *) xmlStrdup(path);
2482
+ if (uri->path == NULL) {
2483
+ xmlFreeURI(uri);
2484
+ return(NULL);
2485
+ }
2486
+ p = uri->path;
2487
+ }
2488
+ /* Now change all occurrences of '\' to '/' */
2489
+ while (*p != '\0') {
2490
+ if (*p == '\\')
2491
+ *p = '/';
2492
+ p++;
2493
+ }
2494
+
2495
+ if (uri->scheme == NULL) {
2496
+ ret = xmlStrdup((const xmlChar *) uri->path);
2497
+ } else {
2498
+ ret = xmlSaveUri(uri);
2499
+ }
2500
+
2501
+ xmlFreeURI(uri);
2502
+ #else
2503
+ ret = xmlStrdup((const xmlChar *) path);
2504
+ #endif
2505
+ return(ret);
2506
+ }
2507
+
2508
+ /**
2509
+ * xmlPathToURI:
2510
+ * @path: the resource locator in a filesystem notation
2511
+ *
2512
+ * Constructs an URI expressing the existing path
2513
+ *
2514
+ * Returns a new URI, or a duplicate of the path parameter if the
2515
+ * construction fails. The caller is responsible for freeing the memory
2516
+ * occupied by the returned string. If there is insufficient memory available,
2517
+ * or the argument is NULL, the function returns NULL.
2518
+ */
2519
+ xmlChar *
2520
+ xmlPathToURI(const xmlChar *path)
2521
+ {
2522
+ xmlURIPtr uri;
2523
+ xmlURI temp;
2524
+ xmlChar *ret, *cal;
2525
+
2526
+ if (path == NULL)
2527
+ return(NULL);
2528
+
2529
+ if ((uri = xmlParseURI((const char *) path)) != NULL) {
2530
+ xmlFreeURI(uri);
2531
+ return xmlStrdup(path);
2532
+ }
2533
+ cal = xmlCanonicPath(path);
2534
+ if (cal == NULL)
2535
+ return(NULL);
2536
+ #if defined(_WIN32) && !defined(__CYGWIN__)
2537
+ /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2538
+ If 'cal' is a valid URI already then we are done here, as continuing would make
2539
+ it invalid. */
2540
+ if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2541
+ xmlFreeURI(uri);
2542
+ return cal;
2543
+ }
2544
+ /* 'cal' can contain a relative path with backslashes. If that is processed
2545
+ by xmlSaveURI, they will be escaped and the external entity loader machinery
2546
+ will fail. So convert them to slashes. Misuse 'ret' for walking. */
2547
+ ret = cal;
2548
+ while (*ret != '\0') {
2549
+ if (*ret == '\\')
2550
+ *ret = '/';
2551
+ ret++;
2552
+ }
2553
+ #endif
2554
+ memset(&temp, 0, sizeof(temp));
2555
+ temp.path = (char *) cal;
2556
+ ret = xmlSaveUri(&temp);
2557
+ xmlFree(cal);
2558
+ return(ret);
2559
+ }
2560
+ #define bottom_uri
2561
+ #include "elfgcchack.h"