nokolexbor 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (486) hide show
  1. checksums.yaml +7 -0
  2. data/ext/nokolexbor/config.h +186 -0
  3. data/ext/nokolexbor/extconf.rb +131 -0
  4. data/ext/nokolexbor/libxml/HTMLparser.h +320 -0
  5. data/ext/nokolexbor/libxml/SAX2.h +173 -0
  6. data/ext/nokolexbor/libxml/chvalid.h +230 -0
  7. data/ext/nokolexbor/libxml/debugXML.h +217 -0
  8. data/ext/nokolexbor/libxml/dict.h +81 -0
  9. data/ext/nokolexbor/libxml/encoding.h +232 -0
  10. data/ext/nokolexbor/libxml/entities.h +153 -0
  11. data/ext/nokolexbor/libxml/globals.h +529 -0
  12. data/ext/nokolexbor/libxml/hash.h +236 -0
  13. data/ext/nokolexbor/libxml/list.h +137 -0
  14. data/ext/nokolexbor/libxml/parser.h +1264 -0
  15. data/ext/nokolexbor/libxml/parserInternals.h +641 -0
  16. data/ext/nokolexbor/libxml/pattern.h +100 -0
  17. data/ext/nokolexbor/libxml/threads.h +94 -0
  18. data/ext/nokolexbor/libxml/tree.h +1315 -0
  19. data/ext/nokolexbor/libxml/uri.h +94 -0
  20. data/ext/nokolexbor/libxml/valid.h +448 -0
  21. data/ext/nokolexbor/libxml/xmlIO.h +369 -0
  22. data/ext/nokolexbor/libxml/xmlautomata.h +146 -0
  23. data/ext/nokolexbor/libxml/xmlerror.h +919 -0
  24. data/ext/nokolexbor/libxml/xmlexports.h +79 -0
  25. data/ext/nokolexbor/libxml/xmlmemory.h +226 -0
  26. data/ext/nokolexbor/libxml/xmlregexp.h +222 -0
  27. data/ext/nokolexbor/libxml/xmlstring.h +140 -0
  28. data/ext/nokolexbor/libxml/xmlversion.h +526 -0
  29. data/ext/nokolexbor/libxml/xpath.h +575 -0
  30. data/ext/nokolexbor/libxml/xpathInternals.h +632 -0
  31. data/ext/nokolexbor/libxml/xpointer.h +137 -0
  32. data/ext/nokolexbor/libxml.h +76 -0
  33. data/ext/nokolexbor/memory.c +39 -0
  34. data/ext/nokolexbor/nl_document.c +51 -0
  35. data/ext/nokolexbor/nl_node.c +790 -0
  36. data/ext/nokolexbor/nl_node_set.c +368 -0
  37. data/ext/nokolexbor/nl_xpath_context.c +200 -0
  38. data/ext/nokolexbor/nokolexbor.c +63 -0
  39. data/ext/nokolexbor/nokolexbor.h +37 -0
  40. data/ext/nokolexbor/private/buf.h +70 -0
  41. data/ext/nokolexbor/private/dict.h +11 -0
  42. data/ext/nokolexbor/private/enc.h +17 -0
  43. data/ext/nokolexbor/private/error.h +21 -0
  44. data/ext/nokolexbor/private/globals.h +9 -0
  45. data/ext/nokolexbor/private/memory.h +9 -0
  46. data/ext/nokolexbor/private/parser.h +27 -0
  47. data/ext/nokolexbor/private/string.h +9 -0
  48. data/ext/nokolexbor/private/threads.h +50 -0
  49. data/ext/nokolexbor/private/tree.h +18 -0
  50. data/ext/nokolexbor/private/xpath.h +7 -0
  51. data/ext/nokolexbor/timsort.h +601 -0
  52. data/ext/nokolexbor/xml_SAX2.c +80 -0
  53. data/ext/nokolexbor/xml_buf.c +363 -0
  54. data/ext/nokolexbor/xml_chvalid.c +334 -0
  55. data/ext/nokolexbor/xml_dict.c +1264 -0
  56. data/ext/nokolexbor/xml_encoding.c +124 -0
  57. data/ext/nokolexbor/xml_error.c +134 -0
  58. data/ext/nokolexbor/xml_globals.c +1085 -0
  59. data/ext/nokolexbor/xml_hash.c +1141 -0
  60. data/ext/nokolexbor/xml_memory.c +203 -0
  61. data/ext/nokolexbor/xml_parser.c +127 -0
  62. data/ext/nokolexbor/xml_parserInternals.c +338 -0
  63. data/ext/nokolexbor/xml_pattern.c +2375 -0
  64. data/ext/nokolexbor/xml_string.c +1051 -0
  65. data/ext/nokolexbor/xml_threads.c +881 -0
  66. data/ext/nokolexbor/xml_tree.c +148 -0
  67. data/ext/nokolexbor/xml_xpath.c +14743 -0
  68. data/lib/nokolexbor/attribute.rb +18 -0
  69. data/lib/nokolexbor/document.rb +6 -0
  70. data/lib/nokolexbor/node.rb +264 -0
  71. data/lib/nokolexbor/node_set.rb +124 -0
  72. data/lib/nokolexbor/version.rb +5 -0
  73. data/lib/nokolexbor/xpath_context.rb +14 -0
  74. data/lib/nokolexbor.rb +17 -0
  75. data/patches/0001-lexbor-support-text-pseudo-element.patch +137 -0
  76. data/patches/0002-lexbor-match-id-class-case-sensitive.patch +22 -0
  77. data/patches/0003-lexbor-attach-template-content-to-self.patch +13 -0
  78. data/vendor/lexbor/CMakeLists.txt +331 -0
  79. data/vendor/lexbor/config.cmake +890 -0
  80. data/vendor/lexbor/feature.cmake +134 -0
  81. data/vendor/lexbor/source/lexbor/core/array.c +208 -0
  82. data/vendor/lexbor/source/lexbor/core/array.h +100 -0
  83. data/vendor/lexbor/source/lexbor/core/array_obj.c +216 -0
  84. data/vendor/lexbor/source/lexbor/core/array_obj.h +134 -0
  85. data/vendor/lexbor/source/lexbor/core/avl.c +442 -0
  86. data/vendor/lexbor/source/lexbor/core/avl.h +82 -0
  87. data/vendor/lexbor/source/lexbor/core/base.h +86 -0
  88. data/vendor/lexbor/source/lexbor/core/bst.c +468 -0
  89. data/vendor/lexbor/source/lexbor/core/bst.h +108 -0
  90. data/vendor/lexbor/source/lexbor/core/bst_map.c +238 -0
  91. data/vendor/lexbor/source/lexbor/core/bst_map.h +87 -0
  92. data/vendor/lexbor/source/lexbor/core/config.cmake +12 -0
  93. data/vendor/lexbor/source/lexbor/core/conv.c +203 -0
  94. data/vendor/lexbor/source/lexbor/core/conv.h +53 -0
  95. data/vendor/lexbor/source/lexbor/core/core.h +35 -0
  96. data/vendor/lexbor/source/lexbor/core/def.h +57 -0
  97. data/vendor/lexbor/source/lexbor/core/diyfp.c +153 -0
  98. data/vendor/lexbor/source/lexbor/core/diyfp.h +258 -0
  99. data/vendor/lexbor/source/lexbor/core/dobject.c +187 -0
  100. data/vendor/lexbor/source/lexbor/core/dobject.h +92 -0
  101. data/vendor/lexbor/source/lexbor/core/dtoa.c +404 -0
  102. data/vendor/lexbor/source/lexbor/core/dtoa.h +28 -0
  103. data/vendor/lexbor/source/lexbor/core/fs.h +60 -0
  104. data/vendor/lexbor/source/lexbor/core/hash.c +476 -0
  105. data/vendor/lexbor/source/lexbor/core/hash.h +218 -0
  106. data/vendor/lexbor/source/lexbor/core/in.c +267 -0
  107. data/vendor/lexbor/source/lexbor/core/in.h +172 -0
  108. data/vendor/lexbor/source/lexbor/core/lexbor.h +35 -0
  109. data/vendor/lexbor/source/lexbor/core/mem.c +228 -0
  110. data/vendor/lexbor/source/lexbor/core/mem.h +141 -0
  111. data/vendor/lexbor/source/lexbor/core/mraw.c +428 -0
  112. data/vendor/lexbor/source/lexbor/core/mraw.h +114 -0
  113. data/vendor/lexbor/source/lexbor/core/perf.h +45 -0
  114. data/vendor/lexbor/source/lexbor/core/plog.c +73 -0
  115. data/vendor/lexbor/source/lexbor/core/plog.h +102 -0
  116. data/vendor/lexbor/source/lexbor/core/print.c +168 -0
  117. data/vendor/lexbor/source/lexbor/core/print.h +39 -0
  118. data/vendor/lexbor/source/lexbor/core/sbst.h +59 -0
  119. data/vendor/lexbor/source/lexbor/core/serialize.c +27 -0
  120. data/vendor/lexbor/source/lexbor/core/serialize.h +32 -0
  121. data/vendor/lexbor/source/lexbor/core/shs.c +118 -0
  122. data/vendor/lexbor/source/lexbor/core/shs.h +82 -0
  123. data/vendor/lexbor/source/lexbor/core/str.c +617 -0
  124. data/vendor/lexbor/source/lexbor/core/str.h +247 -0
  125. data/vendor/lexbor/source/lexbor/core/str_res.h +369 -0
  126. data/vendor/lexbor/source/lexbor/core/strtod.c +326 -0
  127. data/vendor/lexbor/source/lexbor/core/strtod.h +28 -0
  128. data/vendor/lexbor/source/lexbor/core/types.h +39 -0
  129. data/vendor/lexbor/source/lexbor/core/utils.c +43 -0
  130. data/vendor/lexbor/source/lexbor/core/utils.h +36 -0
  131. data/vendor/lexbor/source/lexbor/css/base.h +44 -0
  132. data/vendor/lexbor/source/lexbor/css/config.cmake +2 -0
  133. data/vendor/lexbor/source/lexbor/css/css.h +25 -0
  134. data/vendor/lexbor/source/lexbor/css/log.c +336 -0
  135. data/vendor/lexbor/source/lexbor/css/log.h +103 -0
  136. data/vendor/lexbor/source/lexbor/css/node.h +29 -0
  137. data/vendor/lexbor/source/lexbor/css/parser.c +473 -0
  138. data/vendor/lexbor/source/lexbor/css/parser.h +368 -0
  139. data/vendor/lexbor/source/lexbor/css/selectors/base.h +48 -0
  140. data/vendor/lexbor/source/lexbor/css/selectors/pseudo.c +91 -0
  141. data/vendor/lexbor/source/lexbor/css/selectors/pseudo.h +66 -0
  142. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_const.h +109 -0
  143. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_res.h +302 -0
  144. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.c +279 -0
  145. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.h +85 -0
  146. data/vendor/lexbor/source/lexbor/css/selectors/selector.c +927 -0
  147. data/vendor/lexbor/source/lexbor/css/selectors/selector.h +200 -0
  148. data/vendor/lexbor/source/lexbor/css/selectors/selectors.c +340 -0
  149. data/vendor/lexbor/source/lexbor/css/selectors/selectors.h +137 -0
  150. data/vendor/lexbor/source/lexbor/css/selectors/state.c +1718 -0
  151. data/vendor/lexbor/source/lexbor/css/selectors/state.h +79 -0
  152. data/vendor/lexbor/source/lexbor/css/stylesheet.h +37 -0
  153. data/vendor/lexbor/source/lexbor/css/syntax/anb.c +443 -0
  154. data/vendor/lexbor/source/lexbor/css/syntax/anb.h +45 -0
  155. data/vendor/lexbor/source/lexbor/css/syntax/base.h +33 -0
  156. data/vendor/lexbor/source/lexbor/css/syntax/parser.c +9 -0
  157. data/vendor/lexbor/source/lexbor/css/syntax/parser.h +25 -0
  158. data/vendor/lexbor/source/lexbor/css/syntax/res.h +48 -0
  159. data/vendor/lexbor/source/lexbor/css/syntax/state.c +2603 -0
  160. data/vendor/lexbor/source/lexbor/css/syntax/state.h +140 -0
  161. data/vendor/lexbor/source/lexbor/css/syntax/state_res.h +273 -0
  162. data/vendor/lexbor/source/lexbor/css/syntax/syntax.c +67 -0
  163. data/vendor/lexbor/source/lexbor/css/syntax/token.c +618 -0
  164. data/vendor/lexbor/source/lexbor/css/syntax/token.h +298 -0
  165. data/vendor/lexbor/source/lexbor/css/syntax/token_res.h +68 -0
  166. data/vendor/lexbor/source/lexbor/css/syntax/tokenizer/error.c +30 -0
  167. data/vendor/lexbor/source/lexbor/css/syntax/tokenizer/error.h +58 -0
  168. data/vendor/lexbor/source/lexbor/css/syntax/tokenizer.c +278 -0
  169. data/vendor/lexbor/source/lexbor/css/syntax/tokenizer.h +121 -0
  170. data/vendor/lexbor/source/lexbor/dom/base.h +32 -0
  171. data/vendor/lexbor/source/lexbor/dom/collection.c +97 -0
  172. data/vendor/lexbor/source/lexbor/dom/collection.h +112 -0
  173. data/vendor/lexbor/source/lexbor/dom/config.cmake +3 -0
  174. data/vendor/lexbor/source/lexbor/dom/dom.h +29 -0
  175. data/vendor/lexbor/source/lexbor/dom/exception.c +18 -0
  176. data/vendor/lexbor/source/lexbor/dom/exception.h +73 -0
  177. data/vendor/lexbor/source/lexbor/dom/interface.c +110 -0
  178. data/vendor/lexbor/source/lexbor/dom/interface.h +88 -0
  179. data/vendor/lexbor/source/lexbor/dom/interfaces/attr.c +445 -0
  180. data/vendor/lexbor/source/lexbor/dom/interfaces/attr.h +152 -0
  181. data/vendor/lexbor/source/lexbor/dom/interfaces/attr_const.h +62 -0
  182. data/vendor/lexbor/source/lexbor/dom/interfaces/attr_res.h +143 -0
  183. data/vendor/lexbor/source/lexbor/dom/interfaces/cdata_section.c +55 -0
  184. data/vendor/lexbor/source/lexbor/dom/interfaces/cdata_section.h +38 -0
  185. data/vendor/lexbor/source/lexbor/dom/interfaces/character_data.c +110 -0
  186. data/vendor/lexbor/source/lexbor/dom/interfaces/character_data.h +51 -0
  187. data/vendor/lexbor/source/lexbor/dom/interfaces/comment.c +64 -0
  188. data/vendor/lexbor/source/lexbor/dom/interfaces/comment.h +42 -0
  189. data/vendor/lexbor/source/lexbor/dom/interfaces/document.c +536 -0
  190. data/vendor/lexbor/source/lexbor/dom/interfaces/document.h +243 -0
  191. data/vendor/lexbor/source/lexbor/dom/interfaces/document_fragment.c +36 -0
  192. data/vendor/lexbor/source/lexbor/dom/interfaces/document_fragment.h +36 -0
  193. data/vendor/lexbor/source/lexbor/dom/interfaces/document_type.c +125 -0
  194. data/vendor/lexbor/source/lexbor/dom/interfaces/document_type.h +108 -0
  195. data/vendor/lexbor/source/lexbor/dom/interfaces/element.c +1411 -0
  196. data/vendor/lexbor/source/lexbor/dom/interfaces/element.h +319 -0
  197. data/vendor/lexbor/source/lexbor/dom/interfaces/event_target.c +32 -0
  198. data/vendor/lexbor/source/lexbor/dom/interfaces/event_target.h +34 -0
  199. data/vendor/lexbor/source/lexbor/dom/interfaces/node.c +661 -0
  200. data/vendor/lexbor/source/lexbor/dom/interfaces/node.h +192 -0
  201. data/vendor/lexbor/source/lexbor/dom/interfaces/processing_instruction.c +87 -0
  202. data/vendor/lexbor/source/lexbor/dom/interfaces/processing_instruction.h +66 -0
  203. data/vendor/lexbor/source/lexbor/dom/interfaces/shadow_root.c +36 -0
  204. data/vendor/lexbor/source/lexbor/dom/interfaces/shadow_root.h +44 -0
  205. data/vendor/lexbor/source/lexbor/dom/interfaces/text.c +63 -0
  206. data/vendor/lexbor/source/lexbor/dom/interfaces/text.h +42 -0
  207. data/vendor/lexbor/source/lexbor/encoding/base.h +218 -0
  208. data/vendor/lexbor/source/lexbor/encoding/big5.c +42839 -0
  209. data/vendor/lexbor/source/lexbor/encoding/config.cmake +12 -0
  210. data/vendor/lexbor/source/lexbor/encoding/const.h +65 -0
  211. data/vendor/lexbor/source/lexbor/encoding/decode.c +3193 -0
  212. data/vendor/lexbor/source/lexbor/encoding/decode.h +370 -0
  213. data/vendor/lexbor/source/lexbor/encoding/encode.c +1931 -0
  214. data/vendor/lexbor/source/lexbor/encoding/encode.h +377 -0
  215. data/vendor/lexbor/source/lexbor/encoding/encoding.c +252 -0
  216. data/vendor/lexbor/source/lexbor/encoding/encoding.h +475 -0
  217. data/vendor/lexbor/source/lexbor/encoding/euc_kr.c +53883 -0
  218. data/vendor/lexbor/source/lexbor/encoding/gb18030.c +47905 -0
  219. data/vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c +159 -0
  220. data/vendor/lexbor/source/lexbor/encoding/jis0208.c +22477 -0
  221. data/vendor/lexbor/source/lexbor/encoding/jis0212.c +15787 -0
  222. data/vendor/lexbor/source/lexbor/encoding/multi.h +53 -0
  223. data/vendor/lexbor/source/lexbor/encoding/range.c +71 -0
  224. data/vendor/lexbor/source/lexbor/encoding/range.h +34 -0
  225. data/vendor/lexbor/source/lexbor/encoding/res.c +222 -0
  226. data/vendor/lexbor/source/lexbor/encoding/res.h +34 -0
  227. data/vendor/lexbor/source/lexbor/encoding/single.c +13748 -0
  228. data/vendor/lexbor/source/lexbor/encoding/single.h +116 -0
  229. data/vendor/lexbor/source/lexbor/html/base.h +44 -0
  230. data/vendor/lexbor/source/lexbor/html/config.cmake +3 -0
  231. data/vendor/lexbor/source/lexbor/html/encoding.c +574 -0
  232. data/vendor/lexbor/source/lexbor/html/encoding.h +106 -0
  233. data/vendor/lexbor/source/lexbor/html/html.h +107 -0
  234. data/vendor/lexbor/source/lexbor/html/interface.c +165 -0
  235. data/vendor/lexbor/source/lexbor/html/interface.h +186 -0
  236. data/vendor/lexbor/source/lexbor/html/interface_res.h +4449 -0
  237. data/vendor/lexbor/source/lexbor/html/interfaces/anchor_element.c +36 -0
  238. data/vendor/lexbor/source/lexbor/html/interfaces/anchor_element.h +34 -0
  239. data/vendor/lexbor/source/lexbor/html/interfaces/area_element.c +36 -0
  240. data/vendor/lexbor/source/lexbor/html/interfaces/area_element.h +34 -0
  241. data/vendor/lexbor/source/lexbor/html/interfaces/audio_element.c +36 -0
  242. data/vendor/lexbor/source/lexbor/html/interfaces/audio_element.h +34 -0
  243. data/vendor/lexbor/source/lexbor/html/interfaces/base_element.c +36 -0
  244. data/vendor/lexbor/source/lexbor/html/interfaces/base_element.h +34 -0
  245. data/vendor/lexbor/source/lexbor/html/interfaces/body_element.c +36 -0
  246. data/vendor/lexbor/source/lexbor/html/interfaces/body_element.h +34 -0
  247. data/vendor/lexbor/source/lexbor/html/interfaces/br_element.c +36 -0
  248. data/vendor/lexbor/source/lexbor/html/interfaces/br_element.h +34 -0
  249. data/vendor/lexbor/source/lexbor/html/interfaces/button_element.c +36 -0
  250. data/vendor/lexbor/source/lexbor/html/interfaces/button_element.h +34 -0
  251. data/vendor/lexbor/source/lexbor/html/interfaces/canvas_element.c +36 -0
  252. data/vendor/lexbor/source/lexbor/html/interfaces/canvas_element.h +34 -0
  253. data/vendor/lexbor/source/lexbor/html/interfaces/d_list_element.c +36 -0
  254. data/vendor/lexbor/source/lexbor/html/interfaces/d_list_element.h +34 -0
  255. data/vendor/lexbor/source/lexbor/html/interfaces/data_element.c +36 -0
  256. data/vendor/lexbor/source/lexbor/html/interfaces/data_element.h +34 -0
  257. data/vendor/lexbor/source/lexbor/html/interfaces/data_list_element.c +36 -0
  258. data/vendor/lexbor/source/lexbor/html/interfaces/data_list_element.h +34 -0
  259. data/vendor/lexbor/source/lexbor/html/interfaces/details_element.c +36 -0
  260. data/vendor/lexbor/source/lexbor/html/interfaces/details_element.h +34 -0
  261. data/vendor/lexbor/source/lexbor/html/interfaces/dialog_element.c +36 -0
  262. data/vendor/lexbor/source/lexbor/html/interfaces/dialog_element.h +34 -0
  263. data/vendor/lexbor/source/lexbor/html/interfaces/directory_element.c +36 -0
  264. data/vendor/lexbor/source/lexbor/html/interfaces/directory_element.h +34 -0
  265. data/vendor/lexbor/source/lexbor/html/interfaces/div_element.c +36 -0
  266. data/vendor/lexbor/source/lexbor/html/interfaces/div_element.h +34 -0
  267. data/vendor/lexbor/source/lexbor/html/interfaces/document.c +444 -0
  268. data/vendor/lexbor/source/lexbor/html/interfaces/document.h +256 -0
  269. data/vendor/lexbor/source/lexbor/html/interfaces/element.c +64 -0
  270. data/vendor/lexbor/source/lexbor/html/interfaces/element.h +54 -0
  271. data/vendor/lexbor/source/lexbor/html/interfaces/embed_element.c +36 -0
  272. data/vendor/lexbor/source/lexbor/html/interfaces/embed_element.h +34 -0
  273. data/vendor/lexbor/source/lexbor/html/interfaces/field_set_element.c +36 -0
  274. data/vendor/lexbor/source/lexbor/html/interfaces/field_set_element.h +34 -0
  275. data/vendor/lexbor/source/lexbor/html/interfaces/font_element.c +36 -0
  276. data/vendor/lexbor/source/lexbor/html/interfaces/font_element.h +34 -0
  277. data/vendor/lexbor/source/lexbor/html/interfaces/form_element.c +36 -0
  278. data/vendor/lexbor/source/lexbor/html/interfaces/form_element.h +34 -0
  279. data/vendor/lexbor/source/lexbor/html/interfaces/frame_element.c +36 -0
  280. data/vendor/lexbor/source/lexbor/html/interfaces/frame_element.h +34 -0
  281. data/vendor/lexbor/source/lexbor/html/interfaces/frame_set_element.c +36 -0
  282. data/vendor/lexbor/source/lexbor/html/interfaces/frame_set_element.h +34 -0
  283. data/vendor/lexbor/source/lexbor/html/interfaces/head_element.c +36 -0
  284. data/vendor/lexbor/source/lexbor/html/interfaces/head_element.h +34 -0
  285. data/vendor/lexbor/source/lexbor/html/interfaces/heading_element.c +36 -0
  286. data/vendor/lexbor/source/lexbor/html/interfaces/heading_element.h +34 -0
  287. data/vendor/lexbor/source/lexbor/html/interfaces/hr_element.c +36 -0
  288. data/vendor/lexbor/source/lexbor/html/interfaces/hr_element.h +34 -0
  289. data/vendor/lexbor/source/lexbor/html/interfaces/html_element.c +36 -0
  290. data/vendor/lexbor/source/lexbor/html/interfaces/html_element.h +34 -0
  291. data/vendor/lexbor/source/lexbor/html/interfaces/iframe_element.c +36 -0
  292. data/vendor/lexbor/source/lexbor/html/interfaces/iframe_element.h +34 -0
  293. data/vendor/lexbor/source/lexbor/html/interfaces/image_element.c +36 -0
  294. data/vendor/lexbor/source/lexbor/html/interfaces/image_element.h +34 -0
  295. data/vendor/lexbor/source/lexbor/html/interfaces/input_element.c +36 -0
  296. data/vendor/lexbor/source/lexbor/html/interfaces/input_element.h +34 -0
  297. data/vendor/lexbor/source/lexbor/html/interfaces/label_element.c +36 -0
  298. data/vendor/lexbor/source/lexbor/html/interfaces/label_element.h +34 -0
  299. data/vendor/lexbor/source/lexbor/html/interfaces/legend_element.c +36 -0
  300. data/vendor/lexbor/source/lexbor/html/interfaces/legend_element.h +34 -0
  301. data/vendor/lexbor/source/lexbor/html/interfaces/li_element.c +36 -0
  302. data/vendor/lexbor/source/lexbor/html/interfaces/li_element.h +34 -0
  303. data/vendor/lexbor/source/lexbor/html/interfaces/link_element.c +36 -0
  304. data/vendor/lexbor/source/lexbor/html/interfaces/link_element.h +34 -0
  305. data/vendor/lexbor/source/lexbor/html/interfaces/map_element.c +36 -0
  306. data/vendor/lexbor/source/lexbor/html/interfaces/map_element.h +34 -0
  307. data/vendor/lexbor/source/lexbor/html/interfaces/marquee_element.c +36 -0
  308. data/vendor/lexbor/source/lexbor/html/interfaces/marquee_element.h +34 -0
  309. data/vendor/lexbor/source/lexbor/html/interfaces/media_element.c +36 -0
  310. data/vendor/lexbor/source/lexbor/html/interfaces/media_element.h +34 -0
  311. data/vendor/lexbor/source/lexbor/html/interfaces/menu_element.c +36 -0
  312. data/vendor/lexbor/source/lexbor/html/interfaces/menu_element.h +34 -0
  313. data/vendor/lexbor/source/lexbor/html/interfaces/meta_element.c +36 -0
  314. data/vendor/lexbor/source/lexbor/html/interfaces/meta_element.h +34 -0
  315. data/vendor/lexbor/source/lexbor/html/interfaces/meter_element.c +36 -0
  316. data/vendor/lexbor/source/lexbor/html/interfaces/meter_element.h +34 -0
  317. data/vendor/lexbor/source/lexbor/html/interfaces/mod_element.c +36 -0
  318. data/vendor/lexbor/source/lexbor/html/interfaces/mod_element.h +34 -0
  319. data/vendor/lexbor/source/lexbor/html/interfaces/o_list_element.c +36 -0
  320. data/vendor/lexbor/source/lexbor/html/interfaces/o_list_element.h +34 -0
  321. data/vendor/lexbor/source/lexbor/html/interfaces/object_element.c +36 -0
  322. data/vendor/lexbor/source/lexbor/html/interfaces/object_element.h +34 -0
  323. data/vendor/lexbor/source/lexbor/html/interfaces/opt_group_element.c +36 -0
  324. data/vendor/lexbor/source/lexbor/html/interfaces/opt_group_element.h +34 -0
  325. data/vendor/lexbor/source/lexbor/html/interfaces/option_element.c +36 -0
  326. data/vendor/lexbor/source/lexbor/html/interfaces/option_element.h +34 -0
  327. data/vendor/lexbor/source/lexbor/html/interfaces/output_element.c +36 -0
  328. data/vendor/lexbor/source/lexbor/html/interfaces/output_element.h +34 -0
  329. data/vendor/lexbor/source/lexbor/html/interfaces/paragraph_element.c +36 -0
  330. data/vendor/lexbor/source/lexbor/html/interfaces/paragraph_element.h +34 -0
  331. data/vendor/lexbor/source/lexbor/html/interfaces/param_element.c +36 -0
  332. data/vendor/lexbor/source/lexbor/html/interfaces/param_element.h +34 -0
  333. data/vendor/lexbor/source/lexbor/html/interfaces/picture_element.c +36 -0
  334. data/vendor/lexbor/source/lexbor/html/interfaces/picture_element.h +34 -0
  335. data/vendor/lexbor/source/lexbor/html/interfaces/pre_element.c +36 -0
  336. data/vendor/lexbor/source/lexbor/html/interfaces/pre_element.h +34 -0
  337. data/vendor/lexbor/source/lexbor/html/interfaces/progress_element.c +36 -0
  338. data/vendor/lexbor/source/lexbor/html/interfaces/progress_element.h +34 -0
  339. data/vendor/lexbor/source/lexbor/html/interfaces/quote_element.c +36 -0
  340. data/vendor/lexbor/source/lexbor/html/interfaces/quote_element.h +34 -0
  341. data/vendor/lexbor/source/lexbor/html/interfaces/script_element.c +36 -0
  342. data/vendor/lexbor/source/lexbor/html/interfaces/script_element.h +34 -0
  343. data/vendor/lexbor/source/lexbor/html/interfaces/select_element.c +36 -0
  344. data/vendor/lexbor/source/lexbor/html/interfaces/select_element.h +34 -0
  345. data/vendor/lexbor/source/lexbor/html/interfaces/slot_element.c +36 -0
  346. data/vendor/lexbor/source/lexbor/html/interfaces/slot_element.h +34 -0
  347. data/vendor/lexbor/source/lexbor/html/interfaces/source_element.c +36 -0
  348. data/vendor/lexbor/source/lexbor/html/interfaces/source_element.h +34 -0
  349. data/vendor/lexbor/source/lexbor/html/interfaces/span_element.c +36 -0
  350. data/vendor/lexbor/source/lexbor/html/interfaces/span_element.h +34 -0
  351. data/vendor/lexbor/source/lexbor/html/interfaces/style_element.c +36 -0
  352. data/vendor/lexbor/source/lexbor/html/interfaces/style_element.h +34 -0
  353. data/vendor/lexbor/source/lexbor/html/interfaces/table_caption_element.c +36 -0
  354. data/vendor/lexbor/source/lexbor/html/interfaces/table_caption_element.h +34 -0
  355. data/vendor/lexbor/source/lexbor/html/interfaces/table_cell_element.c +36 -0
  356. data/vendor/lexbor/source/lexbor/html/interfaces/table_cell_element.h +34 -0
  357. data/vendor/lexbor/source/lexbor/html/interfaces/table_col_element.c +36 -0
  358. data/vendor/lexbor/source/lexbor/html/interfaces/table_col_element.h +34 -0
  359. data/vendor/lexbor/source/lexbor/html/interfaces/table_element.c +36 -0
  360. data/vendor/lexbor/source/lexbor/html/interfaces/table_element.h +34 -0
  361. data/vendor/lexbor/source/lexbor/html/interfaces/table_row_element.c +36 -0
  362. data/vendor/lexbor/source/lexbor/html/interfaces/table_row_element.h +34 -0
  363. data/vendor/lexbor/source/lexbor/html/interfaces/table_section_element.c +36 -0
  364. data/vendor/lexbor/source/lexbor/html/interfaces/table_section_element.h +34 -0
  365. data/vendor/lexbor/source/lexbor/html/interfaces/template_element.c +46 -0
  366. data/vendor/lexbor/source/lexbor/html/interfaces/template_element.h +38 -0
  367. data/vendor/lexbor/source/lexbor/html/interfaces/text_area_element.c +36 -0
  368. data/vendor/lexbor/source/lexbor/html/interfaces/text_area_element.h +34 -0
  369. data/vendor/lexbor/source/lexbor/html/interfaces/time_element.c +36 -0
  370. data/vendor/lexbor/source/lexbor/html/interfaces/time_element.h +34 -0
  371. data/vendor/lexbor/source/lexbor/html/interfaces/title_element.c +133 -0
  372. data/vendor/lexbor/source/lexbor/html/interfaces/title_element.h +42 -0
  373. data/vendor/lexbor/source/lexbor/html/interfaces/track_element.c +36 -0
  374. data/vendor/lexbor/source/lexbor/html/interfaces/track_element.h +34 -0
  375. data/vendor/lexbor/source/lexbor/html/interfaces/u_list_element.c +36 -0
  376. data/vendor/lexbor/source/lexbor/html/interfaces/u_list_element.h +34 -0
  377. data/vendor/lexbor/source/lexbor/html/interfaces/unknown_element.c +36 -0
  378. data/vendor/lexbor/source/lexbor/html/interfaces/unknown_element.h +34 -0
  379. data/vendor/lexbor/source/lexbor/html/interfaces/video_element.c +36 -0
  380. data/vendor/lexbor/source/lexbor/html/interfaces/video_element.h +34 -0
  381. data/vendor/lexbor/source/lexbor/html/interfaces/window.c +36 -0
  382. data/vendor/lexbor/source/lexbor/html/interfaces/window.h +34 -0
  383. data/vendor/lexbor/source/lexbor/html/node.c +14 -0
  384. data/vendor/lexbor/source/lexbor/html/node.h +67 -0
  385. data/vendor/lexbor/source/lexbor/html/parser.c +469 -0
  386. data/vendor/lexbor/source/lexbor/html/parser.h +170 -0
  387. data/vendor/lexbor/source/lexbor/html/serialize.c +1510 -0
  388. data/vendor/lexbor/source/lexbor/html/serialize.h +93 -0
  389. data/vendor/lexbor/source/lexbor/html/tag.h +103 -0
  390. data/vendor/lexbor/source/lexbor/html/tag_res.h +2262 -0
  391. data/vendor/lexbor/source/lexbor/html/token.c +386 -0
  392. data/vendor/lexbor/source/lexbor/html/token.h +130 -0
  393. data/vendor/lexbor/source/lexbor/html/token_attr.c +44 -0
  394. data/vendor/lexbor/source/lexbor/html/token_attr.h +67 -0
  395. data/vendor/lexbor/source/lexbor/html/tokenizer/error.c +28 -0
  396. data/vendor/lexbor/source/lexbor/html/tokenizer/error.h +141 -0
  397. data/vendor/lexbor/source/lexbor/html/tokenizer/res.h +4956 -0
  398. data/vendor/lexbor/source/lexbor/html/tokenizer/state.c +2171 -0
  399. data/vendor/lexbor/source/lexbor/html/tokenizer/state.h +225 -0
  400. data/vendor/lexbor/source/lexbor/html/tokenizer/state_comment.c +489 -0
  401. data/vendor/lexbor/source/lexbor/html/tokenizer/state_comment.h +27 -0
  402. data/vendor/lexbor/source/lexbor/html/tokenizer/state_doctype.c +1654 -0
  403. data/vendor/lexbor/source/lexbor/html/tokenizer/state_doctype.h +27 -0
  404. data/vendor/lexbor/source/lexbor/html/tokenizer/state_rawtext.c +303 -0
  405. data/vendor/lexbor/source/lexbor/html/tokenizer/state_rawtext.h +32 -0
  406. data/vendor/lexbor/source/lexbor/html/tokenizer/state_rcdata.c +311 -0
  407. data/vendor/lexbor/source/lexbor/html/tokenizer/state_rcdata.h +32 -0
  408. data/vendor/lexbor/source/lexbor/html/tokenizer/state_script.c +1209 -0
  409. data/vendor/lexbor/source/lexbor/html/tokenizer/state_script.h +32 -0
  410. data/vendor/lexbor/source/lexbor/html/tokenizer.c +499 -0
  411. data/vendor/lexbor/source/lexbor/html/tokenizer.h +343 -0
  412. data/vendor/lexbor/source/lexbor/html/tree/active_formatting.c +241 -0
  413. data/vendor/lexbor/source/lexbor/html/tree/active_formatting.h +117 -0
  414. data/vendor/lexbor/source/lexbor/html/tree/error.c +26 -0
  415. data/vendor/lexbor/source/lexbor/html/tree/error.h +114 -0
  416. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_after_body.c +62 -0
  417. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_after_frameset.c +63 -0
  418. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_body.c +82 -0
  419. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_frameset.c +88 -0
  420. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_head.c +222 -0
  421. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/before_head.c +144 -0
  422. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/before_html.c +166 -0
  423. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/foreign_content.c +358 -0
  424. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_body.c +1974 -0
  425. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_caption.c +158 -0
  426. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_cell.c +187 -0
  427. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_column_group.c +194 -0
  428. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_frameset.c +149 -0
  429. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_head.c +374 -0
  430. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_head_noscript.c +121 -0
  431. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_row.c +211 -0
  432. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_select.c +341 -0
  433. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_select_in_table.c +115 -0
  434. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_table.c +451 -0
  435. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_table_body.c +208 -0
  436. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_table_text.c +127 -0
  437. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_template.c +189 -0
  438. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/initial.c +411 -0
  439. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/text.c +61 -0
  440. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode.h +135 -0
  441. data/vendor/lexbor/source/lexbor/html/tree/open_elements.c +251 -0
  442. data/vendor/lexbor/source/lexbor/html/tree/open_elements.h +105 -0
  443. data/vendor/lexbor/source/lexbor/html/tree/template_insertion.c +10 -0
  444. data/vendor/lexbor/source/lexbor/html/tree/template_insertion.h +100 -0
  445. data/vendor/lexbor/source/lexbor/html/tree.c +1726 -0
  446. data/vendor/lexbor/source/lexbor/html/tree.h +431 -0
  447. data/vendor/lexbor/source/lexbor/html/tree_res.h +111 -0
  448. data/vendor/lexbor/source/lexbor/ns/base.h +32 -0
  449. data/vendor/lexbor/source/lexbor/ns/config.cmake +2 -0
  450. data/vendor/lexbor/source/lexbor/ns/const.h +37 -0
  451. data/vendor/lexbor/source/lexbor/ns/ns.c +154 -0
  452. data/vendor/lexbor/source/lexbor/ns/ns.h +66 -0
  453. data/vendor/lexbor/source/lexbor/ns/res.h +97 -0
  454. data/vendor/lexbor/source/lexbor/ports/posix/config.cmake +11 -0
  455. data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/fs.c +236 -0
  456. data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/memory.c +33 -0
  457. data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/perf.c +158 -0
  458. data/vendor/lexbor/source/lexbor/ports/windows_nt/config.cmake +18 -0
  459. data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/fs.c +239 -0
  460. data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/memory.c +33 -0
  461. data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/perf.c +81 -0
  462. data/vendor/lexbor/source/lexbor/selectors/base.h +30 -0
  463. data/vendor/lexbor/source/lexbor/selectors/config.cmake +2 -0
  464. data/vendor/lexbor/source/lexbor/selectors/selectors.c +1591 -0
  465. data/vendor/lexbor/source/lexbor/selectors/selectors.h +71 -0
  466. data/vendor/lexbor/source/lexbor/tag/base.h +32 -0
  467. data/vendor/lexbor/source/lexbor/tag/config.cmake +2 -0
  468. data/vendor/lexbor/source/lexbor/tag/const.h +225 -0
  469. data/vendor/lexbor/source/lexbor/tag/res.h +562 -0
  470. data/vendor/lexbor/source/lexbor/tag/tag.c +144 -0
  471. data/vendor/lexbor/source/lexbor/tag/tag.h +123 -0
  472. data/vendor/lexbor/source/lexbor/utils/base.h +32 -0
  473. data/vendor/lexbor/source/lexbor/utils/config.cmake +2 -0
  474. data/vendor/lexbor/source/lexbor/utils/http.c +534 -0
  475. data/vendor/lexbor/source/lexbor/utils/http.h +90 -0
  476. data/vendor/lexbor/source/lexbor/utils/utils.h +15 -0
  477. data/vendor/lexbor/source/lexbor/utils/warc.c +817 -0
  478. data/vendor/lexbor/source/lexbor/utils/warc.h +126 -0
  479. data/vendor/lexbor/utils/lexbor/css/selectors/pseudo.py +231 -0
  480. data/vendor/lexbor/utils/lexbor/css/selectors/tmp/const.h +21 -0
  481. data/vendor/lexbor/utils/lexbor/css/selectors/tmp/res.h +26 -0
  482. data/vendor/lexbor/utils/lexbor/css/syntax/definitions.py +49 -0
  483. data/vendor/lexbor/utils/lexbor/css/syntax/token_res.py +54 -0
  484. data/vendor/lexbor/utils/lexbor/css/syntax/tokenizer_code_map.py +36 -0
  485. data/vendor/lexbor/version +1 -0
  486. metadata +542 -0
@@ -0,0 +1,1051 @@
1
+ /*
2
+ * string.c : an XML string utilities module
3
+ *
4
+ * This module provides various utility functions for manipulating
5
+ * the xmlChar* type. All functions named xmlStr* have been moved here
6
+ * from the parser.c file (their original home).
7
+ *
8
+ * See Copyright for the status of this software.
9
+ *
10
+ * UTF8 string routines from:
11
+ * William Brack <wbrack@mmm.com.hk>
12
+ *
13
+ * daniel@veillard.com
14
+ */
15
+
16
+ #define IN_LIBXML
17
+ #include "libxml.h"
18
+
19
+ #include <stdlib.h>
20
+ #include <string.h>
21
+ #include <limits.h>
22
+ #include "libxml/xmlmemory.h"
23
+ #include "libxml/parserInternals.h"
24
+ #include "libxml/xmlstring.h"
25
+
26
+ #include "private/parser.h"
27
+ #include "private/string.h"
28
+
29
+ /************************************************************************
30
+ * *
31
+ * Commodity functions to handle xmlChars *
32
+ * *
33
+ ************************************************************************/
34
+
35
+ /**
36
+ * xmlStrndup:
37
+ * @cur: the input xmlChar *
38
+ * @len: the len of @cur
39
+ *
40
+ * a strndup for array of xmlChar's
41
+ *
42
+ * Returns a new xmlChar * or NULL
43
+ */
44
+ xmlChar *
45
+ xmlStrndup(const xmlChar *cur, int len) {
46
+ xmlChar *ret;
47
+
48
+ if ((cur == NULL) || (len < 0)) return(NULL);
49
+ ret = (xmlChar *) xmlMallocAtomic((size_t) len + 1);
50
+ if (ret == NULL) {
51
+ xmlErrMemory(NULL, NULL);
52
+ return(NULL);
53
+ }
54
+ memcpy(ret, cur, len);
55
+ ret[len] = 0;
56
+ return(ret);
57
+ }
58
+
59
+ /**
60
+ * xmlStrdup:
61
+ * @cur: the input xmlChar *
62
+ *
63
+ * a strdup for array of xmlChar's. Since they are supposed to be
64
+ * encoded in UTF-8 or an encoding with 8bit based chars, we assume
65
+ * a termination mark of '0'.
66
+ *
67
+ * Returns a new xmlChar * or NULL
68
+ */
69
+ xmlChar *
70
+ xmlStrdup(const xmlChar *cur) {
71
+ const xmlChar *p = cur;
72
+
73
+ if (cur == NULL) return(NULL);
74
+ while (*p != 0) p++; /* non input consuming */
75
+ return(xmlStrndup(cur, p - cur));
76
+ }
77
+
78
+ /**
79
+ * xmlCharStrndup:
80
+ * @cur: the input char *
81
+ * @len: the len of @cur
82
+ *
83
+ * a strndup for char's to xmlChar's
84
+ *
85
+ * Returns a new xmlChar * or NULL
86
+ */
87
+
88
+ xmlChar *
89
+ xmlCharStrndup(const char *cur, int len) {
90
+ int i;
91
+ xmlChar *ret;
92
+
93
+ if ((cur == NULL) || (len < 0)) return(NULL);
94
+ ret = (xmlChar *) xmlMallocAtomic((size_t) len + 1);
95
+ if (ret == NULL) {
96
+ xmlErrMemory(NULL, NULL);
97
+ return(NULL);
98
+ }
99
+ for (i = 0;i < len;i++) {
100
+ /* Explicit sign change */
101
+ ret[i] = (xmlChar) cur[i];
102
+ if (ret[i] == 0) return(ret);
103
+ }
104
+ ret[len] = 0;
105
+ return(ret);
106
+ }
107
+
108
+ /**
109
+ * xmlCharStrdup:
110
+ * @cur: the input char *
111
+ *
112
+ * a strdup for char's to xmlChar's
113
+ *
114
+ * Returns a new xmlChar * or NULL
115
+ */
116
+
117
+ xmlChar *
118
+ xmlCharStrdup(const char *cur) {
119
+ const char *p = cur;
120
+
121
+ if (cur == NULL) return(NULL);
122
+ while (*p != '\0') p++; /* non input consuming */
123
+ return(xmlCharStrndup(cur, p - cur));
124
+ }
125
+
126
+ /**
127
+ * xmlStrcmp:
128
+ * @str1: the first xmlChar *
129
+ * @str2: the second xmlChar *
130
+ *
131
+ * a strcmp for xmlChar's
132
+ *
133
+ * Returns the integer result of the comparison
134
+ */
135
+
136
+ int
137
+ xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
138
+ if (str1 == str2) return(0);
139
+ if (str1 == NULL) return(-1);
140
+ if (str2 == NULL) return(1);
141
+ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
142
+ return(strcmp((const char *)str1, (const char *)str2));
143
+ #else
144
+ do {
145
+ int tmp = *str1++ - *str2;
146
+ if (tmp != 0) return(tmp);
147
+ } while (*str2++ != 0);
148
+ return 0;
149
+ #endif
150
+ }
151
+
152
+ /**
153
+ * xmlStrEqual:
154
+ * @str1: the first xmlChar *
155
+ * @str2: the second xmlChar *
156
+ *
157
+ * Check if both strings are equal of have same content.
158
+ * Should be a bit more readable and faster than xmlStrcmp()
159
+ *
160
+ * Returns 1 if they are equal, 0 if they are different
161
+ */
162
+
163
+ int
164
+ xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
165
+ if (str1 == str2) return(1);
166
+ if (str1 == NULL) return(0);
167
+ if (str2 == NULL) return(0);
168
+ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
169
+ return(strcmp((const char *)str1, (const char *)str2) == 0);
170
+ #else
171
+ do {
172
+ if (*str1++ != *str2) return(0);
173
+ } while (*str2++);
174
+ return(1);
175
+ #endif
176
+ }
177
+
178
+ /**
179
+ * xmlStrQEqual:
180
+ * @pref: the prefix of the QName
181
+ * @name: the localname of the QName
182
+ * @str: the second xmlChar *
183
+ *
184
+ * Check if a QName is Equal to a given string
185
+ *
186
+ * Returns 1 if they are equal, 0 if they are different
187
+ */
188
+
189
+ int
190
+ xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
191
+ if (pref == NULL) return(xmlStrEqual(name, str));
192
+ if (name == NULL) return(0);
193
+ if (str == NULL) return(0);
194
+
195
+ do {
196
+ if (*pref++ != *str) return(0);
197
+ } while ((*str++) && (*pref));
198
+ if (*str++ != ':') return(0);
199
+ do {
200
+ if (*name++ != *str) return(0);
201
+ } while (*str++);
202
+ return(1);
203
+ }
204
+
205
+ /**
206
+ * xmlStrncmp:
207
+ * @str1: the first xmlChar *
208
+ * @str2: the second xmlChar *
209
+ * @len: the max comparison length
210
+ *
211
+ * a strncmp for xmlChar's
212
+ *
213
+ * Returns the integer result of the comparison
214
+ */
215
+
216
+ int
217
+ xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
218
+ if (len <= 0) return(0);
219
+ if (str1 == str2) return(0);
220
+ if (str1 == NULL) return(-1);
221
+ if (str2 == NULL) return(1);
222
+ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
223
+ return(strncmp((const char *)str1, (const char *)str2, len));
224
+ #else
225
+ do {
226
+ int tmp = *str1++ - *str2;
227
+ if (tmp != 0 || --len == 0) return(tmp);
228
+ } while (*str2++ != 0);
229
+ return 0;
230
+ #endif
231
+ }
232
+
233
+ static const xmlChar casemap[256] = {
234
+ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
235
+ 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
236
+ 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
237
+ 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
238
+ 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
239
+ 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
240
+ 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
241
+ 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
242
+ 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
243
+ 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
244
+ 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
245
+ 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
246
+ 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
247
+ 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
248
+ 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
249
+ 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
250
+ 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
251
+ 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
252
+ 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
253
+ 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
254
+ 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
255
+ 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
256
+ 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
257
+ 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
258
+ 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
259
+ 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
260
+ 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
261
+ 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
262
+ 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
263
+ 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
264
+ 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
265
+ 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
266
+ };
267
+
268
+ /**
269
+ * xmlStrcasecmp:
270
+ * @str1: the first xmlChar *
271
+ * @str2: the second xmlChar *
272
+ *
273
+ * a strcasecmp for xmlChar's
274
+ *
275
+ * Returns the integer result of the comparison
276
+ */
277
+
278
+ int
279
+ xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
280
+ register int tmp;
281
+
282
+ if (str1 == str2) return(0);
283
+ if (str1 == NULL) return(-1);
284
+ if (str2 == NULL) return(1);
285
+ do {
286
+ tmp = casemap[*str1++] - casemap[*str2];
287
+ if (tmp != 0) return(tmp);
288
+ } while (*str2++ != 0);
289
+ return 0;
290
+ }
291
+
292
+ /**
293
+ * xmlStrncasecmp:
294
+ * @str1: the first xmlChar *
295
+ * @str2: the second xmlChar *
296
+ * @len: the max comparison length
297
+ *
298
+ * a strncasecmp for xmlChar's
299
+ *
300
+ * Returns the integer result of the comparison
301
+ */
302
+
303
+ int
304
+ xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
305
+ register int tmp;
306
+
307
+ if (len <= 0) return(0);
308
+ if (str1 == str2) return(0);
309
+ if (str1 == NULL) return(-1);
310
+ if (str2 == NULL) return(1);
311
+ do {
312
+ tmp = casemap[*str1++] - casemap[*str2];
313
+ if (tmp != 0 || --len == 0) return(tmp);
314
+ } while (*str2++ != 0);
315
+ return 0;
316
+ }
317
+
318
+ /**
319
+ * xmlStrchr:
320
+ * @str: the xmlChar * array
321
+ * @val: the xmlChar to search
322
+ *
323
+ * a strchr for xmlChar's
324
+ *
325
+ * Returns the xmlChar * for the first occurrence or NULL.
326
+ */
327
+
328
+ const xmlChar *
329
+ xmlStrchr(const xmlChar *str, xmlChar val) {
330
+ if (str == NULL) return(NULL);
331
+ while (*str != 0) { /* non input consuming */
332
+ if (*str == val) return((xmlChar *) str);
333
+ str++;
334
+ }
335
+ return(NULL);
336
+ }
337
+
338
+ /**
339
+ * xmlStrstr:
340
+ * @str: the xmlChar * array (haystack)
341
+ * @val: the xmlChar to search (needle)
342
+ *
343
+ * a strstr for xmlChar's
344
+ *
345
+ * Returns the xmlChar * for the first occurrence or NULL.
346
+ */
347
+
348
+ const xmlChar *
349
+ xmlStrstr(const xmlChar *str, const xmlChar *val) {
350
+ int n;
351
+
352
+ if (str == NULL) return(NULL);
353
+ if (val == NULL) return(NULL);
354
+ n = xmlStrlen(val);
355
+
356
+ if (n == 0) return(str);
357
+ while (*str != 0) { /* non input consuming */
358
+ if (*str == *val) {
359
+ if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
360
+ }
361
+ str++;
362
+ }
363
+ return(NULL);
364
+ }
365
+
366
+ /**
367
+ * xmlStrcasestr:
368
+ * @str: the xmlChar * array (haystack)
369
+ * @val: the xmlChar to search (needle)
370
+ *
371
+ * a case-ignoring strstr for xmlChar's
372
+ *
373
+ * Returns the xmlChar * for the first occurrence or NULL.
374
+ */
375
+
376
+ const xmlChar *
377
+ xmlStrcasestr(const xmlChar *str, const xmlChar *val) {
378
+ int n;
379
+
380
+ if (str == NULL) return(NULL);
381
+ if (val == NULL) return(NULL);
382
+ n = xmlStrlen(val);
383
+
384
+ if (n == 0) return(str);
385
+ while (*str != 0) { /* non input consuming */
386
+ if (casemap[*str] == casemap[*val])
387
+ if (!xmlStrncasecmp(str, val, n)) return(str);
388
+ str++;
389
+ }
390
+ return(NULL);
391
+ }
392
+
393
+ /**
394
+ * xmlStrsub:
395
+ * @str: the xmlChar * array (haystack)
396
+ * @start: the index of the first char (zero based)
397
+ * @len: the length of the substring
398
+ *
399
+ * Extract a substring of a given string
400
+ *
401
+ * Returns the xmlChar * for the first occurrence or NULL.
402
+ */
403
+
404
+ xmlChar *
405
+ xmlStrsub(const xmlChar *str, int start, int len) {
406
+ int i;
407
+
408
+ if (str == NULL) return(NULL);
409
+ if (start < 0) return(NULL);
410
+ if (len < 0) return(NULL);
411
+
412
+ for (i = 0;i < start;i++) {
413
+ if (*str == 0) return(NULL);
414
+ str++;
415
+ }
416
+ if (*str == 0) return(NULL);
417
+ return(xmlStrndup(str, len));
418
+ }
419
+
420
+ /**
421
+ * xmlStrlen:
422
+ * @str: the xmlChar * array
423
+ *
424
+ * length of a xmlChar's string
425
+ *
426
+ * Returns the number of xmlChar contained in the ARRAY.
427
+ */
428
+
429
+ int
430
+ xmlStrlen(const xmlChar *str) {
431
+ size_t len = str ? strlen((const char *)str) : 0;
432
+ return(len > INT_MAX ? 0 : len);
433
+ }
434
+
435
+ /**
436
+ * xmlStrncat:
437
+ * @cur: the original xmlChar * array
438
+ * @add: the xmlChar * array added
439
+ * @len: the length of @add
440
+ *
441
+ * a strncat for array of xmlChar's, it will extend @cur with the len
442
+ * first bytes of @add. Note that if @len < 0 then this is an API error
443
+ * and NULL will be returned.
444
+ *
445
+ * Returns a new xmlChar *, the original @cur is reallocated and should
446
+ * not be freed.
447
+ */
448
+
449
+ xmlChar *
450
+ xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
451
+ int size;
452
+ xmlChar *ret;
453
+
454
+ if ((add == NULL) || (len == 0))
455
+ return(cur);
456
+ if (len < 0)
457
+ return(NULL);
458
+ if (cur == NULL)
459
+ return(xmlStrndup(add, len));
460
+
461
+ size = xmlStrlen(cur);
462
+ if ((size < 0) || (size > INT_MAX - len))
463
+ return(NULL);
464
+ ret = (xmlChar *) xmlRealloc(cur, (size_t) size + len + 1);
465
+ if (ret == NULL) {
466
+ xmlErrMemory(NULL, NULL);
467
+ return(cur);
468
+ }
469
+ memcpy(&ret[size], add, len);
470
+ ret[size + len] = 0;
471
+ return(ret);
472
+ }
473
+
474
+ /**
475
+ * xmlStrncatNew:
476
+ * @str1: first xmlChar string
477
+ * @str2: second xmlChar string
478
+ * @len: the len of @str2 or < 0
479
+ *
480
+ * same as xmlStrncat, but creates a new string. The original
481
+ * two strings are not freed. If @len is < 0 then the length
482
+ * will be calculated automatically.
483
+ *
484
+ * Returns a new xmlChar * or NULL
485
+ */
486
+ xmlChar *
487
+ xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) {
488
+ int size;
489
+ xmlChar *ret;
490
+
491
+ if (len < 0) {
492
+ len = xmlStrlen(str2);
493
+ if (len < 0)
494
+ return(NULL);
495
+ }
496
+ if ((str2 == NULL) || (len == 0))
497
+ return(xmlStrdup(str1));
498
+ if (str1 == NULL)
499
+ return(xmlStrndup(str2, len));
500
+
501
+ size = xmlStrlen(str1);
502
+ if ((size < 0) || (size > INT_MAX - len))
503
+ return(NULL);
504
+ ret = (xmlChar *) xmlMalloc((size_t) size + len + 1);
505
+ if (ret == NULL) {
506
+ xmlErrMemory(NULL, NULL);
507
+ return(xmlStrndup(str1, size));
508
+ }
509
+ memcpy(ret, str1, size);
510
+ memcpy(&ret[size], str2, len);
511
+ ret[size + len] = 0;
512
+ return(ret);
513
+ }
514
+
515
+ /**
516
+ * xmlStrcat:
517
+ * @cur: the original xmlChar * array
518
+ * @add: the xmlChar * array added
519
+ *
520
+ * a strcat for array of xmlChar's. Since they are supposed to be
521
+ * encoded in UTF-8 or an encoding with 8bit based chars, we assume
522
+ * a termination mark of '0'.
523
+ *
524
+ * Returns a new xmlChar * containing the concatenated string. The original
525
+ * @cur is reallocated and should not be freed.
526
+ */
527
+ xmlChar *
528
+ xmlStrcat(xmlChar *cur, const xmlChar *add) {
529
+ const xmlChar *p = add;
530
+
531
+ if (add == NULL) return(cur);
532
+ if (cur == NULL)
533
+ return(xmlStrdup(add));
534
+
535
+ while (*p != 0) p++; /* non input consuming */
536
+ return(xmlStrncat(cur, add, p - add));
537
+ }
538
+
539
+ /**
540
+ * xmlStrPrintf:
541
+ * @buf: the result buffer.
542
+ * @len: the result buffer length.
543
+ * @msg: the message with printf formatting.
544
+ * @...: extra parameters for the message.
545
+ *
546
+ * Formats @msg and places result into @buf.
547
+ *
548
+ * Returns the number of characters written to @buf or -1 if an error occurs.
549
+ */
550
+ int XMLCDECL
551
+ xmlStrPrintf(xmlChar *buf, int len, const char *msg, ...) {
552
+ va_list args;
553
+ int ret;
554
+
555
+ if((buf == NULL) || (msg == NULL)) {
556
+ return(-1);
557
+ }
558
+
559
+ va_start(args, msg);
560
+ ret = vsnprintf((char *) buf, len, (const char *) msg, args);
561
+ va_end(args);
562
+ buf[len - 1] = 0; /* be safe ! */
563
+
564
+ return(ret);
565
+ }
566
+
567
+ /**
568
+ * xmlStrVPrintf:
569
+ * @buf: the result buffer.
570
+ * @len: the result buffer length.
571
+ * @msg: the message with printf formatting.
572
+ * @ap: extra parameters for the message.
573
+ *
574
+ * Formats @msg and places result into @buf.
575
+ *
576
+ * Returns the number of characters written to @buf or -1 if an error occurs.
577
+ */
578
+ int
579
+ xmlStrVPrintf(xmlChar *buf, int len, const char *msg, va_list ap) {
580
+ int ret;
581
+
582
+ if((buf == NULL) || (msg == NULL)) {
583
+ return(-1);
584
+ }
585
+
586
+ ret = vsnprintf((char *) buf, len, (const char *) msg, ap);
587
+ buf[len - 1] = 0; /* be safe ! */
588
+
589
+ return(ret);
590
+ }
591
+
592
+ /************************************************************************
593
+ * *
594
+ * Generic UTF8 handling routines *
595
+ * *
596
+ * From rfc2044: encoding of the Unicode values on UTF-8: *
597
+ * *
598
+ * UCS-4 range (hex.) UTF-8 octet sequence (binary) *
599
+ * 0000 0000-0000 007F 0xxxxxxx *
600
+ * 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
601
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
602
+ * *
603
+ * I hope we won't use values > 0xFFFF anytime soon ! *
604
+ * *
605
+ ************************************************************************/
606
+
607
+
608
+ /**
609
+ * xmlUTF8Size:
610
+ * @utf: pointer to the UTF8 character
611
+ *
612
+ * calculates the internal size of a UTF8 character
613
+ *
614
+ * returns the numbers of bytes in the character, -1 on format error
615
+ */
616
+ int
617
+ xmlUTF8Size(const xmlChar *utf) {
618
+ xmlChar mask;
619
+ int len;
620
+
621
+ if (utf == NULL)
622
+ return -1;
623
+ if (*utf < 0x80)
624
+ return 1;
625
+ /* check valid UTF8 character */
626
+ if (!(*utf & 0x40))
627
+ return -1;
628
+ /* determine number of bytes in char */
629
+ len = 2;
630
+ for (mask=0x20; mask != 0; mask>>=1) {
631
+ if (!(*utf & mask))
632
+ return len;
633
+ len++;
634
+ }
635
+ return -1;
636
+ }
637
+
638
+ /**
639
+ * xmlUTF8Charcmp:
640
+ * @utf1: pointer to first UTF8 char
641
+ * @utf2: pointer to second UTF8 char
642
+ *
643
+ * compares the two UCS4 values
644
+ *
645
+ * returns result of the compare as with xmlStrncmp
646
+ */
647
+ int
648
+ xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
649
+
650
+ if (utf1 == NULL ) {
651
+ if (utf2 == NULL)
652
+ return 0;
653
+ return -1;
654
+ }
655
+ return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
656
+ }
657
+
658
+ /**
659
+ * xmlUTF8Strlen:
660
+ * @utf: a sequence of UTF-8 encoded bytes
661
+ *
662
+ * compute the length of an UTF8 string, it doesn't do a full UTF8
663
+ * checking of the content of the string.
664
+ *
665
+ * Returns the number of characters in the string or -1 in case of error
666
+ */
667
+ int
668
+ xmlUTF8Strlen(const xmlChar *utf) {
669
+ size_t ret = 0;
670
+
671
+ if (utf == NULL)
672
+ return(-1);
673
+
674
+ while (*utf != 0) {
675
+ if (utf[0] & 0x80) {
676
+ if ((utf[1] & 0xc0) != 0x80)
677
+ return(-1);
678
+ if ((utf[0] & 0xe0) == 0xe0) {
679
+ if ((utf[2] & 0xc0) != 0x80)
680
+ return(-1);
681
+ if ((utf[0] & 0xf0) == 0xf0) {
682
+ if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
683
+ return(-1);
684
+ utf += 4;
685
+ } else {
686
+ utf += 3;
687
+ }
688
+ } else {
689
+ utf += 2;
690
+ }
691
+ } else {
692
+ utf++;
693
+ }
694
+ ret++;
695
+ }
696
+ return(ret > INT_MAX ? 0 : ret);
697
+ }
698
+
699
+ /**
700
+ * xmlGetUTF8Char:
701
+ * @utf: a sequence of UTF-8 encoded bytes
702
+ * @len: a pointer to the minimum number of bytes present in
703
+ * the sequence. This is used to assure the next character
704
+ * is completely contained within the sequence.
705
+ *
706
+ * Read the first UTF8 character from @utf
707
+ *
708
+ * Returns the char value or -1 in case of error, and sets *len to
709
+ * the actual number of bytes consumed (0 in case of error)
710
+ */
711
+ int
712
+ xmlGetUTF8Char(const unsigned char *utf, int *len) {
713
+ unsigned int c;
714
+
715
+ if (utf == NULL)
716
+ goto error;
717
+ if (len == NULL)
718
+ goto error;
719
+ if (*len < 1)
720
+ goto error;
721
+
722
+ c = utf[0];
723
+ if (c & 0x80) {
724
+ if (*len < 2)
725
+ goto error;
726
+ if ((utf[1] & 0xc0) != 0x80)
727
+ goto error;
728
+ if ((c & 0xe0) == 0xe0) {
729
+ if (*len < 3)
730
+ goto error;
731
+ if ((utf[2] & 0xc0) != 0x80)
732
+ goto error;
733
+ if ((c & 0xf0) == 0xf0) {
734
+ if (*len < 4)
735
+ goto error;
736
+ if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
737
+ goto error;
738
+ *len = 4;
739
+ /* 4-byte code */
740
+ c = (utf[0] & 0x7) << 18;
741
+ c |= (utf[1] & 0x3f) << 12;
742
+ c |= (utf[2] & 0x3f) << 6;
743
+ c |= utf[3] & 0x3f;
744
+ } else {
745
+ /* 3-byte code */
746
+ *len = 3;
747
+ c = (utf[0] & 0xf) << 12;
748
+ c |= (utf[1] & 0x3f) << 6;
749
+ c |= utf[2] & 0x3f;
750
+ }
751
+ } else {
752
+ /* 2-byte code */
753
+ *len = 2;
754
+ c = (utf[0] & 0x1f) << 6;
755
+ c |= utf[1] & 0x3f;
756
+ }
757
+ } else {
758
+ /* 1-byte code */
759
+ *len = 1;
760
+ }
761
+ return(c);
762
+
763
+ error:
764
+ if (len != NULL)
765
+ *len = 0;
766
+ return(-1);
767
+ }
768
+
769
+ /**
770
+ * xmlCheckUTF8:
771
+ * @utf: Pointer to putative UTF-8 encoded string.
772
+ *
773
+ * Checks @utf for being valid UTF-8. @utf is assumed to be
774
+ * null-terminated. This function is not super-strict, as it will
775
+ * allow longer UTF-8 sequences than necessary. Note that Java is
776
+ * capable of producing these sequences if provoked. Also note, this
777
+ * routine checks for the 4-byte maximum size, but does not check for
778
+ * 0x10ffff maximum value.
779
+ *
780
+ * Return value: true if @utf is valid.
781
+ **/
782
+ int
783
+ xmlCheckUTF8(const unsigned char *utf)
784
+ {
785
+ int ix;
786
+ unsigned char c;
787
+
788
+ if (utf == NULL)
789
+ return(0);
790
+ /*
791
+ * utf is a string of 1, 2, 3 or 4 bytes. The valid strings
792
+ * are as follows (in "bit format"):
793
+ * 0xxxxxxx valid 1-byte
794
+ * 110xxxxx 10xxxxxx valid 2-byte
795
+ * 1110xxxx 10xxxxxx 10xxxxxx valid 3-byte
796
+ * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx valid 4-byte
797
+ */
798
+ while ((c = utf[0])) { /* string is 0-terminated */
799
+ ix = 0;
800
+ if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
801
+ ix = 1;
802
+ } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
803
+ if ((utf[1] & 0xc0 ) != 0x80)
804
+ return 0;
805
+ ix = 2;
806
+ } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
807
+ if (((utf[1] & 0xc0) != 0x80) ||
808
+ ((utf[2] & 0xc0) != 0x80))
809
+ return 0;
810
+ ix = 3;
811
+ } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
812
+ if (((utf[1] & 0xc0) != 0x80) ||
813
+ ((utf[2] & 0xc0) != 0x80) ||
814
+ ((utf[3] & 0xc0) != 0x80))
815
+ return 0;
816
+ ix = 4;
817
+ } else /* unknown encoding */
818
+ return 0;
819
+ utf += ix;
820
+ }
821
+ return(1);
822
+ }
823
+
824
+ /**
825
+ * xmlUTF8Strsize:
826
+ * @utf: a sequence of UTF-8 encoded bytes
827
+ * @len: the number of characters in the array
828
+ *
829
+ * storage size of an UTF8 string
830
+ * the behaviour is not guaranteed if the input string is not UTF-8
831
+ *
832
+ * Returns the storage size of
833
+ * the first 'len' characters of ARRAY
834
+ */
835
+
836
+ int
837
+ xmlUTF8Strsize(const xmlChar *utf, int len) {
838
+ const xmlChar *ptr=utf;
839
+ int ch;
840
+ size_t ret;
841
+
842
+ if (utf == NULL)
843
+ return(0);
844
+
845
+ if (len <= 0)
846
+ return(0);
847
+
848
+ while ( len-- > 0) {
849
+ if ( !*ptr )
850
+ break;
851
+ if ( (ch = *ptr++) & 0x80)
852
+ while ((ch<<=1) & 0x80 ) {
853
+ if (*ptr == 0) break;
854
+ ptr++;
855
+ }
856
+ }
857
+ ret = ptr - utf;
858
+ return (ret > INT_MAX ? 0 : ret);
859
+ }
860
+
861
+
862
+ /**
863
+ * xmlUTF8Strndup:
864
+ * @utf: the input UTF8 *
865
+ * @len: the len of @utf (in chars)
866
+ *
867
+ * a strndup for array of UTF8's
868
+ *
869
+ * Returns a new UTF8 * or NULL
870
+ */
871
+ xmlChar *
872
+ xmlUTF8Strndup(const xmlChar *utf, int len) {
873
+ xmlChar *ret;
874
+ int i;
875
+
876
+ if ((utf == NULL) || (len < 0)) return(NULL);
877
+ i = xmlUTF8Strsize(utf, len);
878
+ ret = (xmlChar *) xmlMallocAtomic((size_t) i + 1);
879
+ if (ret == NULL) {
880
+ return(NULL);
881
+ }
882
+ memcpy(ret, utf, i);
883
+ ret[i] = 0;
884
+ return(ret);
885
+ }
886
+
887
+ /**
888
+ * xmlUTF8Strpos:
889
+ * @utf: the input UTF8 *
890
+ * @pos: the position of the desired UTF8 char (in chars)
891
+ *
892
+ * a function to provide the equivalent of fetching a
893
+ * character from a string array
894
+ *
895
+ * Returns a pointer to the UTF8 character or NULL
896
+ */
897
+ const xmlChar *
898
+ xmlUTF8Strpos(const xmlChar *utf, int pos) {
899
+ int ch;
900
+
901
+ if (utf == NULL) return(NULL);
902
+ if (pos < 0)
903
+ return(NULL);
904
+ while (pos--) {
905
+ if ((ch=*utf++) == 0) return(NULL);
906
+ if ( ch & 0x80 ) {
907
+ /* if not simple ascii, verify proper format */
908
+ if ( (ch & 0xc0) != 0xc0 )
909
+ return(NULL);
910
+ /* then skip over remaining bytes for this char */
911
+ while ( (ch <<= 1) & 0x80 )
912
+ if ( (*utf++ & 0xc0) != 0x80 )
913
+ return(NULL);
914
+ }
915
+ }
916
+ return((xmlChar *)utf);
917
+ }
918
+
919
+ /**
920
+ * xmlUTF8Strloc:
921
+ * @utf: the input UTF8 *
922
+ * @utfchar: the UTF8 character to be found
923
+ *
924
+ * a function to provide the relative location of a UTF8 char
925
+ *
926
+ * Returns the relative character position of the desired char
927
+ * or -1 if not found
928
+ */
929
+ int
930
+ xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
931
+ size_t i;
932
+ int size;
933
+ int ch;
934
+
935
+ if (utf==NULL || utfchar==NULL) return -1;
936
+ size = xmlUTF8Strsize(utfchar, 1);
937
+ for(i=0; (ch=*utf) != 0; i++) {
938
+ if (xmlStrncmp(utf, utfchar, size)==0)
939
+ return(i > INT_MAX ? 0 : i);
940
+ utf++;
941
+ if ( ch & 0x80 ) {
942
+ /* if not simple ascii, verify proper format */
943
+ if ( (ch & 0xc0) != 0xc0 )
944
+ return(-1);
945
+ /* then skip over remaining bytes for this char */
946
+ while ( (ch <<= 1) & 0x80 )
947
+ if ( (*utf++ & 0xc0) != 0x80 )
948
+ return(-1);
949
+ }
950
+ }
951
+
952
+ return(-1);
953
+ }
954
+ /**
955
+ * xmlUTF8Strsub:
956
+ * @utf: a sequence of UTF-8 encoded bytes
957
+ * @start: relative pos of first char
958
+ * @len: total number to copy
959
+ *
960
+ * Create a substring from a given UTF-8 string
961
+ * Note: positions are given in units of UTF-8 chars
962
+ *
963
+ * Returns a pointer to a newly created string
964
+ * or NULL if any problem
965
+ */
966
+
967
+ xmlChar *
968
+ xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
969
+ int i;
970
+ int ch;
971
+
972
+ if (utf == NULL) return(NULL);
973
+ if (start < 0) return(NULL);
974
+ if (len < 0) return(NULL);
975
+
976
+ /*
977
+ * Skip over any leading chars
978
+ */
979
+ for (i = 0;i < start;i++) {
980
+ if ((ch=*utf++) == 0) return(NULL);
981
+ if ( ch & 0x80 ) {
982
+ /* if not simple ascii, verify proper format */
983
+ if ( (ch & 0xc0) != 0xc0 )
984
+ return(NULL);
985
+ /* then skip over remaining bytes for this char */
986
+ while ( (ch <<= 1) & 0x80 )
987
+ if ( (*utf++ & 0xc0) != 0x80 )
988
+ return(NULL);
989
+ }
990
+ }
991
+
992
+ return(xmlUTF8Strndup(utf, len));
993
+ }
994
+
995
+ /**
996
+ * xmlEscapeFormatString:
997
+ * @msg: a pointer to the string in which to escape '%' characters.
998
+ * Must be a heap-allocated buffer created by libxml2 that may be
999
+ * returned, or that may be freed and replaced.
1000
+ *
1001
+ * Replaces the string pointed to by 'msg' with an escaped string.
1002
+ * Returns the same string with all '%' characters escaped.
1003
+ */
1004
+ xmlChar *
1005
+ xmlEscapeFormatString(xmlChar **msg)
1006
+ {
1007
+ xmlChar *msgPtr = NULL;
1008
+ xmlChar *result = NULL;
1009
+ xmlChar *resultPtr = NULL;
1010
+ size_t count = 0;
1011
+ size_t msgLen = 0;
1012
+ size_t resultLen = 0;
1013
+
1014
+ if (!msg || !*msg)
1015
+ return(NULL);
1016
+
1017
+ for (msgPtr = *msg; *msgPtr != '\0'; ++msgPtr) {
1018
+ ++msgLen;
1019
+ if (*msgPtr == '%')
1020
+ ++count;
1021
+ }
1022
+
1023
+ if (count == 0)
1024
+ return(*msg);
1025
+
1026
+ if ((count > INT_MAX) || (msgLen > INT_MAX - count))
1027
+ return(NULL);
1028
+ resultLen = msgLen + count + 1;
1029
+ result = (xmlChar *) xmlMallocAtomic(resultLen);
1030
+ if (result == NULL) {
1031
+ /* Clear *msg to prevent format string vulnerabilities in
1032
+ out-of-memory situations. */
1033
+ xmlFree(*msg);
1034
+ *msg = NULL;
1035
+ xmlErrMemory(NULL, NULL);
1036
+ return(NULL);
1037
+ }
1038
+
1039
+ for (msgPtr = *msg, resultPtr = result; *msgPtr != '\0'; ++msgPtr, ++resultPtr) {
1040
+ *resultPtr = *msgPtr;
1041
+ if (*msgPtr == '%')
1042
+ *(++resultPtr) = '%';
1043
+ }
1044
+ result[resultLen - 1] = '\0';
1045
+
1046
+ xmlFree(*msg);
1047
+ *msg = result;
1048
+
1049
+ return *msg;
1050
+ }
1051
+