nokolexbor 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (486) hide show
  1. checksums.yaml +7 -0
  2. data/ext/nokolexbor/config.h +186 -0
  3. data/ext/nokolexbor/extconf.rb +131 -0
  4. data/ext/nokolexbor/libxml/HTMLparser.h +320 -0
  5. data/ext/nokolexbor/libxml/SAX2.h +173 -0
  6. data/ext/nokolexbor/libxml/chvalid.h +230 -0
  7. data/ext/nokolexbor/libxml/debugXML.h +217 -0
  8. data/ext/nokolexbor/libxml/dict.h +81 -0
  9. data/ext/nokolexbor/libxml/encoding.h +232 -0
  10. data/ext/nokolexbor/libxml/entities.h +153 -0
  11. data/ext/nokolexbor/libxml/globals.h +529 -0
  12. data/ext/nokolexbor/libxml/hash.h +236 -0
  13. data/ext/nokolexbor/libxml/list.h +137 -0
  14. data/ext/nokolexbor/libxml/parser.h +1264 -0
  15. data/ext/nokolexbor/libxml/parserInternals.h +641 -0
  16. data/ext/nokolexbor/libxml/pattern.h +100 -0
  17. data/ext/nokolexbor/libxml/threads.h +94 -0
  18. data/ext/nokolexbor/libxml/tree.h +1315 -0
  19. data/ext/nokolexbor/libxml/uri.h +94 -0
  20. data/ext/nokolexbor/libxml/valid.h +448 -0
  21. data/ext/nokolexbor/libxml/xmlIO.h +369 -0
  22. data/ext/nokolexbor/libxml/xmlautomata.h +146 -0
  23. data/ext/nokolexbor/libxml/xmlerror.h +919 -0
  24. data/ext/nokolexbor/libxml/xmlexports.h +79 -0
  25. data/ext/nokolexbor/libxml/xmlmemory.h +226 -0
  26. data/ext/nokolexbor/libxml/xmlregexp.h +222 -0
  27. data/ext/nokolexbor/libxml/xmlstring.h +140 -0
  28. data/ext/nokolexbor/libxml/xmlversion.h +526 -0
  29. data/ext/nokolexbor/libxml/xpath.h +575 -0
  30. data/ext/nokolexbor/libxml/xpathInternals.h +632 -0
  31. data/ext/nokolexbor/libxml/xpointer.h +137 -0
  32. data/ext/nokolexbor/libxml.h +76 -0
  33. data/ext/nokolexbor/memory.c +39 -0
  34. data/ext/nokolexbor/nl_document.c +51 -0
  35. data/ext/nokolexbor/nl_node.c +790 -0
  36. data/ext/nokolexbor/nl_node_set.c +368 -0
  37. data/ext/nokolexbor/nl_xpath_context.c +200 -0
  38. data/ext/nokolexbor/nokolexbor.c +63 -0
  39. data/ext/nokolexbor/nokolexbor.h +37 -0
  40. data/ext/nokolexbor/private/buf.h +70 -0
  41. data/ext/nokolexbor/private/dict.h +11 -0
  42. data/ext/nokolexbor/private/enc.h +17 -0
  43. data/ext/nokolexbor/private/error.h +21 -0
  44. data/ext/nokolexbor/private/globals.h +9 -0
  45. data/ext/nokolexbor/private/memory.h +9 -0
  46. data/ext/nokolexbor/private/parser.h +27 -0
  47. data/ext/nokolexbor/private/string.h +9 -0
  48. data/ext/nokolexbor/private/threads.h +50 -0
  49. data/ext/nokolexbor/private/tree.h +18 -0
  50. data/ext/nokolexbor/private/xpath.h +7 -0
  51. data/ext/nokolexbor/timsort.h +601 -0
  52. data/ext/nokolexbor/xml_SAX2.c +80 -0
  53. data/ext/nokolexbor/xml_buf.c +363 -0
  54. data/ext/nokolexbor/xml_chvalid.c +334 -0
  55. data/ext/nokolexbor/xml_dict.c +1264 -0
  56. data/ext/nokolexbor/xml_encoding.c +124 -0
  57. data/ext/nokolexbor/xml_error.c +134 -0
  58. data/ext/nokolexbor/xml_globals.c +1085 -0
  59. data/ext/nokolexbor/xml_hash.c +1141 -0
  60. data/ext/nokolexbor/xml_memory.c +203 -0
  61. data/ext/nokolexbor/xml_parser.c +127 -0
  62. data/ext/nokolexbor/xml_parserInternals.c +338 -0
  63. data/ext/nokolexbor/xml_pattern.c +2375 -0
  64. data/ext/nokolexbor/xml_string.c +1051 -0
  65. data/ext/nokolexbor/xml_threads.c +881 -0
  66. data/ext/nokolexbor/xml_tree.c +148 -0
  67. data/ext/nokolexbor/xml_xpath.c +14743 -0
  68. data/lib/nokolexbor/attribute.rb +18 -0
  69. data/lib/nokolexbor/document.rb +6 -0
  70. data/lib/nokolexbor/node.rb +264 -0
  71. data/lib/nokolexbor/node_set.rb +124 -0
  72. data/lib/nokolexbor/version.rb +5 -0
  73. data/lib/nokolexbor/xpath_context.rb +14 -0
  74. data/lib/nokolexbor.rb +17 -0
  75. data/patches/0001-lexbor-support-text-pseudo-element.patch +137 -0
  76. data/patches/0002-lexbor-match-id-class-case-sensitive.patch +22 -0
  77. data/patches/0003-lexbor-attach-template-content-to-self.patch +13 -0
  78. data/vendor/lexbor/CMakeLists.txt +331 -0
  79. data/vendor/lexbor/config.cmake +890 -0
  80. data/vendor/lexbor/feature.cmake +134 -0
  81. data/vendor/lexbor/source/lexbor/core/array.c +208 -0
  82. data/vendor/lexbor/source/lexbor/core/array.h +100 -0
  83. data/vendor/lexbor/source/lexbor/core/array_obj.c +216 -0
  84. data/vendor/lexbor/source/lexbor/core/array_obj.h +134 -0
  85. data/vendor/lexbor/source/lexbor/core/avl.c +442 -0
  86. data/vendor/lexbor/source/lexbor/core/avl.h +82 -0
  87. data/vendor/lexbor/source/lexbor/core/base.h +86 -0
  88. data/vendor/lexbor/source/lexbor/core/bst.c +468 -0
  89. data/vendor/lexbor/source/lexbor/core/bst.h +108 -0
  90. data/vendor/lexbor/source/lexbor/core/bst_map.c +238 -0
  91. data/vendor/lexbor/source/lexbor/core/bst_map.h +87 -0
  92. data/vendor/lexbor/source/lexbor/core/config.cmake +12 -0
  93. data/vendor/lexbor/source/lexbor/core/conv.c +203 -0
  94. data/vendor/lexbor/source/lexbor/core/conv.h +53 -0
  95. data/vendor/lexbor/source/lexbor/core/core.h +35 -0
  96. data/vendor/lexbor/source/lexbor/core/def.h +57 -0
  97. data/vendor/lexbor/source/lexbor/core/diyfp.c +153 -0
  98. data/vendor/lexbor/source/lexbor/core/diyfp.h +258 -0
  99. data/vendor/lexbor/source/lexbor/core/dobject.c +187 -0
  100. data/vendor/lexbor/source/lexbor/core/dobject.h +92 -0
  101. data/vendor/lexbor/source/lexbor/core/dtoa.c +404 -0
  102. data/vendor/lexbor/source/lexbor/core/dtoa.h +28 -0
  103. data/vendor/lexbor/source/lexbor/core/fs.h +60 -0
  104. data/vendor/lexbor/source/lexbor/core/hash.c +476 -0
  105. data/vendor/lexbor/source/lexbor/core/hash.h +218 -0
  106. data/vendor/lexbor/source/lexbor/core/in.c +267 -0
  107. data/vendor/lexbor/source/lexbor/core/in.h +172 -0
  108. data/vendor/lexbor/source/lexbor/core/lexbor.h +35 -0
  109. data/vendor/lexbor/source/lexbor/core/mem.c +228 -0
  110. data/vendor/lexbor/source/lexbor/core/mem.h +141 -0
  111. data/vendor/lexbor/source/lexbor/core/mraw.c +428 -0
  112. data/vendor/lexbor/source/lexbor/core/mraw.h +114 -0
  113. data/vendor/lexbor/source/lexbor/core/perf.h +45 -0
  114. data/vendor/lexbor/source/lexbor/core/plog.c +73 -0
  115. data/vendor/lexbor/source/lexbor/core/plog.h +102 -0
  116. data/vendor/lexbor/source/lexbor/core/print.c +168 -0
  117. data/vendor/lexbor/source/lexbor/core/print.h +39 -0
  118. data/vendor/lexbor/source/lexbor/core/sbst.h +59 -0
  119. data/vendor/lexbor/source/lexbor/core/serialize.c +27 -0
  120. data/vendor/lexbor/source/lexbor/core/serialize.h +32 -0
  121. data/vendor/lexbor/source/lexbor/core/shs.c +118 -0
  122. data/vendor/lexbor/source/lexbor/core/shs.h +82 -0
  123. data/vendor/lexbor/source/lexbor/core/str.c +617 -0
  124. data/vendor/lexbor/source/lexbor/core/str.h +247 -0
  125. data/vendor/lexbor/source/lexbor/core/str_res.h +369 -0
  126. data/vendor/lexbor/source/lexbor/core/strtod.c +326 -0
  127. data/vendor/lexbor/source/lexbor/core/strtod.h +28 -0
  128. data/vendor/lexbor/source/lexbor/core/types.h +39 -0
  129. data/vendor/lexbor/source/lexbor/core/utils.c +43 -0
  130. data/vendor/lexbor/source/lexbor/core/utils.h +36 -0
  131. data/vendor/lexbor/source/lexbor/css/base.h +44 -0
  132. data/vendor/lexbor/source/lexbor/css/config.cmake +2 -0
  133. data/vendor/lexbor/source/lexbor/css/css.h +25 -0
  134. data/vendor/lexbor/source/lexbor/css/log.c +336 -0
  135. data/vendor/lexbor/source/lexbor/css/log.h +103 -0
  136. data/vendor/lexbor/source/lexbor/css/node.h +29 -0
  137. data/vendor/lexbor/source/lexbor/css/parser.c +473 -0
  138. data/vendor/lexbor/source/lexbor/css/parser.h +368 -0
  139. data/vendor/lexbor/source/lexbor/css/selectors/base.h +48 -0
  140. data/vendor/lexbor/source/lexbor/css/selectors/pseudo.c +91 -0
  141. data/vendor/lexbor/source/lexbor/css/selectors/pseudo.h +66 -0
  142. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_const.h +109 -0
  143. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_res.h +302 -0
  144. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.c +279 -0
  145. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.h +85 -0
  146. data/vendor/lexbor/source/lexbor/css/selectors/selector.c +927 -0
  147. data/vendor/lexbor/source/lexbor/css/selectors/selector.h +200 -0
  148. data/vendor/lexbor/source/lexbor/css/selectors/selectors.c +340 -0
  149. data/vendor/lexbor/source/lexbor/css/selectors/selectors.h +137 -0
  150. data/vendor/lexbor/source/lexbor/css/selectors/state.c +1718 -0
  151. data/vendor/lexbor/source/lexbor/css/selectors/state.h +79 -0
  152. data/vendor/lexbor/source/lexbor/css/stylesheet.h +37 -0
  153. data/vendor/lexbor/source/lexbor/css/syntax/anb.c +443 -0
  154. data/vendor/lexbor/source/lexbor/css/syntax/anb.h +45 -0
  155. data/vendor/lexbor/source/lexbor/css/syntax/base.h +33 -0
  156. data/vendor/lexbor/source/lexbor/css/syntax/parser.c +9 -0
  157. data/vendor/lexbor/source/lexbor/css/syntax/parser.h +25 -0
  158. data/vendor/lexbor/source/lexbor/css/syntax/res.h +48 -0
  159. data/vendor/lexbor/source/lexbor/css/syntax/state.c +2603 -0
  160. data/vendor/lexbor/source/lexbor/css/syntax/state.h +140 -0
  161. data/vendor/lexbor/source/lexbor/css/syntax/state_res.h +273 -0
  162. data/vendor/lexbor/source/lexbor/css/syntax/syntax.c +67 -0
  163. data/vendor/lexbor/source/lexbor/css/syntax/token.c +618 -0
  164. data/vendor/lexbor/source/lexbor/css/syntax/token.h +298 -0
  165. data/vendor/lexbor/source/lexbor/css/syntax/token_res.h +68 -0
  166. data/vendor/lexbor/source/lexbor/css/syntax/tokenizer/error.c +30 -0
  167. data/vendor/lexbor/source/lexbor/css/syntax/tokenizer/error.h +58 -0
  168. data/vendor/lexbor/source/lexbor/css/syntax/tokenizer.c +278 -0
  169. data/vendor/lexbor/source/lexbor/css/syntax/tokenizer.h +121 -0
  170. data/vendor/lexbor/source/lexbor/dom/base.h +32 -0
  171. data/vendor/lexbor/source/lexbor/dom/collection.c +97 -0
  172. data/vendor/lexbor/source/lexbor/dom/collection.h +112 -0
  173. data/vendor/lexbor/source/lexbor/dom/config.cmake +3 -0
  174. data/vendor/lexbor/source/lexbor/dom/dom.h +29 -0
  175. data/vendor/lexbor/source/lexbor/dom/exception.c +18 -0
  176. data/vendor/lexbor/source/lexbor/dom/exception.h +73 -0
  177. data/vendor/lexbor/source/lexbor/dom/interface.c +110 -0
  178. data/vendor/lexbor/source/lexbor/dom/interface.h +88 -0
  179. data/vendor/lexbor/source/lexbor/dom/interfaces/attr.c +445 -0
  180. data/vendor/lexbor/source/lexbor/dom/interfaces/attr.h +152 -0
  181. data/vendor/lexbor/source/lexbor/dom/interfaces/attr_const.h +62 -0
  182. data/vendor/lexbor/source/lexbor/dom/interfaces/attr_res.h +143 -0
  183. data/vendor/lexbor/source/lexbor/dom/interfaces/cdata_section.c +55 -0
  184. data/vendor/lexbor/source/lexbor/dom/interfaces/cdata_section.h +38 -0
  185. data/vendor/lexbor/source/lexbor/dom/interfaces/character_data.c +110 -0
  186. data/vendor/lexbor/source/lexbor/dom/interfaces/character_data.h +51 -0
  187. data/vendor/lexbor/source/lexbor/dom/interfaces/comment.c +64 -0
  188. data/vendor/lexbor/source/lexbor/dom/interfaces/comment.h +42 -0
  189. data/vendor/lexbor/source/lexbor/dom/interfaces/document.c +536 -0
  190. data/vendor/lexbor/source/lexbor/dom/interfaces/document.h +243 -0
  191. data/vendor/lexbor/source/lexbor/dom/interfaces/document_fragment.c +36 -0
  192. data/vendor/lexbor/source/lexbor/dom/interfaces/document_fragment.h +36 -0
  193. data/vendor/lexbor/source/lexbor/dom/interfaces/document_type.c +125 -0
  194. data/vendor/lexbor/source/lexbor/dom/interfaces/document_type.h +108 -0
  195. data/vendor/lexbor/source/lexbor/dom/interfaces/element.c +1411 -0
  196. data/vendor/lexbor/source/lexbor/dom/interfaces/element.h +319 -0
  197. data/vendor/lexbor/source/lexbor/dom/interfaces/event_target.c +32 -0
  198. data/vendor/lexbor/source/lexbor/dom/interfaces/event_target.h +34 -0
  199. data/vendor/lexbor/source/lexbor/dom/interfaces/node.c +661 -0
  200. data/vendor/lexbor/source/lexbor/dom/interfaces/node.h +192 -0
  201. data/vendor/lexbor/source/lexbor/dom/interfaces/processing_instruction.c +87 -0
  202. data/vendor/lexbor/source/lexbor/dom/interfaces/processing_instruction.h +66 -0
  203. data/vendor/lexbor/source/lexbor/dom/interfaces/shadow_root.c +36 -0
  204. data/vendor/lexbor/source/lexbor/dom/interfaces/shadow_root.h +44 -0
  205. data/vendor/lexbor/source/lexbor/dom/interfaces/text.c +63 -0
  206. data/vendor/lexbor/source/lexbor/dom/interfaces/text.h +42 -0
  207. data/vendor/lexbor/source/lexbor/encoding/base.h +218 -0
  208. data/vendor/lexbor/source/lexbor/encoding/big5.c +42839 -0
  209. data/vendor/lexbor/source/lexbor/encoding/config.cmake +12 -0
  210. data/vendor/lexbor/source/lexbor/encoding/const.h +65 -0
  211. data/vendor/lexbor/source/lexbor/encoding/decode.c +3193 -0
  212. data/vendor/lexbor/source/lexbor/encoding/decode.h +370 -0
  213. data/vendor/lexbor/source/lexbor/encoding/encode.c +1931 -0
  214. data/vendor/lexbor/source/lexbor/encoding/encode.h +377 -0
  215. data/vendor/lexbor/source/lexbor/encoding/encoding.c +252 -0
  216. data/vendor/lexbor/source/lexbor/encoding/encoding.h +475 -0
  217. data/vendor/lexbor/source/lexbor/encoding/euc_kr.c +53883 -0
  218. data/vendor/lexbor/source/lexbor/encoding/gb18030.c +47905 -0
  219. data/vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c +159 -0
  220. data/vendor/lexbor/source/lexbor/encoding/jis0208.c +22477 -0
  221. data/vendor/lexbor/source/lexbor/encoding/jis0212.c +15787 -0
  222. data/vendor/lexbor/source/lexbor/encoding/multi.h +53 -0
  223. data/vendor/lexbor/source/lexbor/encoding/range.c +71 -0
  224. data/vendor/lexbor/source/lexbor/encoding/range.h +34 -0
  225. data/vendor/lexbor/source/lexbor/encoding/res.c +222 -0
  226. data/vendor/lexbor/source/lexbor/encoding/res.h +34 -0
  227. data/vendor/lexbor/source/lexbor/encoding/single.c +13748 -0
  228. data/vendor/lexbor/source/lexbor/encoding/single.h +116 -0
  229. data/vendor/lexbor/source/lexbor/html/base.h +44 -0
  230. data/vendor/lexbor/source/lexbor/html/config.cmake +3 -0
  231. data/vendor/lexbor/source/lexbor/html/encoding.c +574 -0
  232. data/vendor/lexbor/source/lexbor/html/encoding.h +106 -0
  233. data/vendor/lexbor/source/lexbor/html/html.h +107 -0
  234. data/vendor/lexbor/source/lexbor/html/interface.c +165 -0
  235. data/vendor/lexbor/source/lexbor/html/interface.h +186 -0
  236. data/vendor/lexbor/source/lexbor/html/interface_res.h +4449 -0
  237. data/vendor/lexbor/source/lexbor/html/interfaces/anchor_element.c +36 -0
  238. data/vendor/lexbor/source/lexbor/html/interfaces/anchor_element.h +34 -0
  239. data/vendor/lexbor/source/lexbor/html/interfaces/area_element.c +36 -0
  240. data/vendor/lexbor/source/lexbor/html/interfaces/area_element.h +34 -0
  241. data/vendor/lexbor/source/lexbor/html/interfaces/audio_element.c +36 -0
  242. data/vendor/lexbor/source/lexbor/html/interfaces/audio_element.h +34 -0
  243. data/vendor/lexbor/source/lexbor/html/interfaces/base_element.c +36 -0
  244. data/vendor/lexbor/source/lexbor/html/interfaces/base_element.h +34 -0
  245. data/vendor/lexbor/source/lexbor/html/interfaces/body_element.c +36 -0
  246. data/vendor/lexbor/source/lexbor/html/interfaces/body_element.h +34 -0
  247. data/vendor/lexbor/source/lexbor/html/interfaces/br_element.c +36 -0
  248. data/vendor/lexbor/source/lexbor/html/interfaces/br_element.h +34 -0
  249. data/vendor/lexbor/source/lexbor/html/interfaces/button_element.c +36 -0
  250. data/vendor/lexbor/source/lexbor/html/interfaces/button_element.h +34 -0
  251. data/vendor/lexbor/source/lexbor/html/interfaces/canvas_element.c +36 -0
  252. data/vendor/lexbor/source/lexbor/html/interfaces/canvas_element.h +34 -0
  253. data/vendor/lexbor/source/lexbor/html/interfaces/d_list_element.c +36 -0
  254. data/vendor/lexbor/source/lexbor/html/interfaces/d_list_element.h +34 -0
  255. data/vendor/lexbor/source/lexbor/html/interfaces/data_element.c +36 -0
  256. data/vendor/lexbor/source/lexbor/html/interfaces/data_element.h +34 -0
  257. data/vendor/lexbor/source/lexbor/html/interfaces/data_list_element.c +36 -0
  258. data/vendor/lexbor/source/lexbor/html/interfaces/data_list_element.h +34 -0
  259. data/vendor/lexbor/source/lexbor/html/interfaces/details_element.c +36 -0
  260. data/vendor/lexbor/source/lexbor/html/interfaces/details_element.h +34 -0
  261. data/vendor/lexbor/source/lexbor/html/interfaces/dialog_element.c +36 -0
  262. data/vendor/lexbor/source/lexbor/html/interfaces/dialog_element.h +34 -0
  263. data/vendor/lexbor/source/lexbor/html/interfaces/directory_element.c +36 -0
  264. data/vendor/lexbor/source/lexbor/html/interfaces/directory_element.h +34 -0
  265. data/vendor/lexbor/source/lexbor/html/interfaces/div_element.c +36 -0
  266. data/vendor/lexbor/source/lexbor/html/interfaces/div_element.h +34 -0
  267. data/vendor/lexbor/source/lexbor/html/interfaces/document.c +444 -0
  268. data/vendor/lexbor/source/lexbor/html/interfaces/document.h +256 -0
  269. data/vendor/lexbor/source/lexbor/html/interfaces/element.c +64 -0
  270. data/vendor/lexbor/source/lexbor/html/interfaces/element.h +54 -0
  271. data/vendor/lexbor/source/lexbor/html/interfaces/embed_element.c +36 -0
  272. data/vendor/lexbor/source/lexbor/html/interfaces/embed_element.h +34 -0
  273. data/vendor/lexbor/source/lexbor/html/interfaces/field_set_element.c +36 -0
  274. data/vendor/lexbor/source/lexbor/html/interfaces/field_set_element.h +34 -0
  275. data/vendor/lexbor/source/lexbor/html/interfaces/font_element.c +36 -0
  276. data/vendor/lexbor/source/lexbor/html/interfaces/font_element.h +34 -0
  277. data/vendor/lexbor/source/lexbor/html/interfaces/form_element.c +36 -0
  278. data/vendor/lexbor/source/lexbor/html/interfaces/form_element.h +34 -0
  279. data/vendor/lexbor/source/lexbor/html/interfaces/frame_element.c +36 -0
  280. data/vendor/lexbor/source/lexbor/html/interfaces/frame_element.h +34 -0
  281. data/vendor/lexbor/source/lexbor/html/interfaces/frame_set_element.c +36 -0
  282. data/vendor/lexbor/source/lexbor/html/interfaces/frame_set_element.h +34 -0
  283. data/vendor/lexbor/source/lexbor/html/interfaces/head_element.c +36 -0
  284. data/vendor/lexbor/source/lexbor/html/interfaces/head_element.h +34 -0
  285. data/vendor/lexbor/source/lexbor/html/interfaces/heading_element.c +36 -0
  286. data/vendor/lexbor/source/lexbor/html/interfaces/heading_element.h +34 -0
  287. data/vendor/lexbor/source/lexbor/html/interfaces/hr_element.c +36 -0
  288. data/vendor/lexbor/source/lexbor/html/interfaces/hr_element.h +34 -0
  289. data/vendor/lexbor/source/lexbor/html/interfaces/html_element.c +36 -0
  290. data/vendor/lexbor/source/lexbor/html/interfaces/html_element.h +34 -0
  291. data/vendor/lexbor/source/lexbor/html/interfaces/iframe_element.c +36 -0
  292. data/vendor/lexbor/source/lexbor/html/interfaces/iframe_element.h +34 -0
  293. data/vendor/lexbor/source/lexbor/html/interfaces/image_element.c +36 -0
  294. data/vendor/lexbor/source/lexbor/html/interfaces/image_element.h +34 -0
  295. data/vendor/lexbor/source/lexbor/html/interfaces/input_element.c +36 -0
  296. data/vendor/lexbor/source/lexbor/html/interfaces/input_element.h +34 -0
  297. data/vendor/lexbor/source/lexbor/html/interfaces/label_element.c +36 -0
  298. data/vendor/lexbor/source/lexbor/html/interfaces/label_element.h +34 -0
  299. data/vendor/lexbor/source/lexbor/html/interfaces/legend_element.c +36 -0
  300. data/vendor/lexbor/source/lexbor/html/interfaces/legend_element.h +34 -0
  301. data/vendor/lexbor/source/lexbor/html/interfaces/li_element.c +36 -0
  302. data/vendor/lexbor/source/lexbor/html/interfaces/li_element.h +34 -0
  303. data/vendor/lexbor/source/lexbor/html/interfaces/link_element.c +36 -0
  304. data/vendor/lexbor/source/lexbor/html/interfaces/link_element.h +34 -0
  305. data/vendor/lexbor/source/lexbor/html/interfaces/map_element.c +36 -0
  306. data/vendor/lexbor/source/lexbor/html/interfaces/map_element.h +34 -0
  307. data/vendor/lexbor/source/lexbor/html/interfaces/marquee_element.c +36 -0
  308. data/vendor/lexbor/source/lexbor/html/interfaces/marquee_element.h +34 -0
  309. data/vendor/lexbor/source/lexbor/html/interfaces/media_element.c +36 -0
  310. data/vendor/lexbor/source/lexbor/html/interfaces/media_element.h +34 -0
  311. data/vendor/lexbor/source/lexbor/html/interfaces/menu_element.c +36 -0
  312. data/vendor/lexbor/source/lexbor/html/interfaces/menu_element.h +34 -0
  313. data/vendor/lexbor/source/lexbor/html/interfaces/meta_element.c +36 -0
  314. data/vendor/lexbor/source/lexbor/html/interfaces/meta_element.h +34 -0
  315. data/vendor/lexbor/source/lexbor/html/interfaces/meter_element.c +36 -0
  316. data/vendor/lexbor/source/lexbor/html/interfaces/meter_element.h +34 -0
  317. data/vendor/lexbor/source/lexbor/html/interfaces/mod_element.c +36 -0
  318. data/vendor/lexbor/source/lexbor/html/interfaces/mod_element.h +34 -0
  319. data/vendor/lexbor/source/lexbor/html/interfaces/o_list_element.c +36 -0
  320. data/vendor/lexbor/source/lexbor/html/interfaces/o_list_element.h +34 -0
  321. data/vendor/lexbor/source/lexbor/html/interfaces/object_element.c +36 -0
  322. data/vendor/lexbor/source/lexbor/html/interfaces/object_element.h +34 -0
  323. data/vendor/lexbor/source/lexbor/html/interfaces/opt_group_element.c +36 -0
  324. data/vendor/lexbor/source/lexbor/html/interfaces/opt_group_element.h +34 -0
  325. data/vendor/lexbor/source/lexbor/html/interfaces/option_element.c +36 -0
  326. data/vendor/lexbor/source/lexbor/html/interfaces/option_element.h +34 -0
  327. data/vendor/lexbor/source/lexbor/html/interfaces/output_element.c +36 -0
  328. data/vendor/lexbor/source/lexbor/html/interfaces/output_element.h +34 -0
  329. data/vendor/lexbor/source/lexbor/html/interfaces/paragraph_element.c +36 -0
  330. data/vendor/lexbor/source/lexbor/html/interfaces/paragraph_element.h +34 -0
  331. data/vendor/lexbor/source/lexbor/html/interfaces/param_element.c +36 -0
  332. data/vendor/lexbor/source/lexbor/html/interfaces/param_element.h +34 -0
  333. data/vendor/lexbor/source/lexbor/html/interfaces/picture_element.c +36 -0
  334. data/vendor/lexbor/source/lexbor/html/interfaces/picture_element.h +34 -0
  335. data/vendor/lexbor/source/lexbor/html/interfaces/pre_element.c +36 -0
  336. data/vendor/lexbor/source/lexbor/html/interfaces/pre_element.h +34 -0
  337. data/vendor/lexbor/source/lexbor/html/interfaces/progress_element.c +36 -0
  338. data/vendor/lexbor/source/lexbor/html/interfaces/progress_element.h +34 -0
  339. data/vendor/lexbor/source/lexbor/html/interfaces/quote_element.c +36 -0
  340. data/vendor/lexbor/source/lexbor/html/interfaces/quote_element.h +34 -0
  341. data/vendor/lexbor/source/lexbor/html/interfaces/script_element.c +36 -0
  342. data/vendor/lexbor/source/lexbor/html/interfaces/script_element.h +34 -0
  343. data/vendor/lexbor/source/lexbor/html/interfaces/select_element.c +36 -0
  344. data/vendor/lexbor/source/lexbor/html/interfaces/select_element.h +34 -0
  345. data/vendor/lexbor/source/lexbor/html/interfaces/slot_element.c +36 -0
  346. data/vendor/lexbor/source/lexbor/html/interfaces/slot_element.h +34 -0
  347. data/vendor/lexbor/source/lexbor/html/interfaces/source_element.c +36 -0
  348. data/vendor/lexbor/source/lexbor/html/interfaces/source_element.h +34 -0
  349. data/vendor/lexbor/source/lexbor/html/interfaces/span_element.c +36 -0
  350. data/vendor/lexbor/source/lexbor/html/interfaces/span_element.h +34 -0
  351. data/vendor/lexbor/source/lexbor/html/interfaces/style_element.c +36 -0
  352. data/vendor/lexbor/source/lexbor/html/interfaces/style_element.h +34 -0
  353. data/vendor/lexbor/source/lexbor/html/interfaces/table_caption_element.c +36 -0
  354. data/vendor/lexbor/source/lexbor/html/interfaces/table_caption_element.h +34 -0
  355. data/vendor/lexbor/source/lexbor/html/interfaces/table_cell_element.c +36 -0
  356. data/vendor/lexbor/source/lexbor/html/interfaces/table_cell_element.h +34 -0
  357. data/vendor/lexbor/source/lexbor/html/interfaces/table_col_element.c +36 -0
  358. data/vendor/lexbor/source/lexbor/html/interfaces/table_col_element.h +34 -0
  359. data/vendor/lexbor/source/lexbor/html/interfaces/table_element.c +36 -0
  360. data/vendor/lexbor/source/lexbor/html/interfaces/table_element.h +34 -0
  361. data/vendor/lexbor/source/lexbor/html/interfaces/table_row_element.c +36 -0
  362. data/vendor/lexbor/source/lexbor/html/interfaces/table_row_element.h +34 -0
  363. data/vendor/lexbor/source/lexbor/html/interfaces/table_section_element.c +36 -0
  364. data/vendor/lexbor/source/lexbor/html/interfaces/table_section_element.h +34 -0
  365. data/vendor/lexbor/source/lexbor/html/interfaces/template_element.c +46 -0
  366. data/vendor/lexbor/source/lexbor/html/interfaces/template_element.h +38 -0
  367. data/vendor/lexbor/source/lexbor/html/interfaces/text_area_element.c +36 -0
  368. data/vendor/lexbor/source/lexbor/html/interfaces/text_area_element.h +34 -0
  369. data/vendor/lexbor/source/lexbor/html/interfaces/time_element.c +36 -0
  370. data/vendor/lexbor/source/lexbor/html/interfaces/time_element.h +34 -0
  371. data/vendor/lexbor/source/lexbor/html/interfaces/title_element.c +133 -0
  372. data/vendor/lexbor/source/lexbor/html/interfaces/title_element.h +42 -0
  373. data/vendor/lexbor/source/lexbor/html/interfaces/track_element.c +36 -0
  374. data/vendor/lexbor/source/lexbor/html/interfaces/track_element.h +34 -0
  375. data/vendor/lexbor/source/lexbor/html/interfaces/u_list_element.c +36 -0
  376. data/vendor/lexbor/source/lexbor/html/interfaces/u_list_element.h +34 -0
  377. data/vendor/lexbor/source/lexbor/html/interfaces/unknown_element.c +36 -0
  378. data/vendor/lexbor/source/lexbor/html/interfaces/unknown_element.h +34 -0
  379. data/vendor/lexbor/source/lexbor/html/interfaces/video_element.c +36 -0
  380. data/vendor/lexbor/source/lexbor/html/interfaces/video_element.h +34 -0
  381. data/vendor/lexbor/source/lexbor/html/interfaces/window.c +36 -0
  382. data/vendor/lexbor/source/lexbor/html/interfaces/window.h +34 -0
  383. data/vendor/lexbor/source/lexbor/html/node.c +14 -0
  384. data/vendor/lexbor/source/lexbor/html/node.h +67 -0
  385. data/vendor/lexbor/source/lexbor/html/parser.c +469 -0
  386. data/vendor/lexbor/source/lexbor/html/parser.h +170 -0
  387. data/vendor/lexbor/source/lexbor/html/serialize.c +1510 -0
  388. data/vendor/lexbor/source/lexbor/html/serialize.h +93 -0
  389. data/vendor/lexbor/source/lexbor/html/tag.h +103 -0
  390. data/vendor/lexbor/source/lexbor/html/tag_res.h +2262 -0
  391. data/vendor/lexbor/source/lexbor/html/token.c +386 -0
  392. data/vendor/lexbor/source/lexbor/html/token.h +130 -0
  393. data/vendor/lexbor/source/lexbor/html/token_attr.c +44 -0
  394. data/vendor/lexbor/source/lexbor/html/token_attr.h +67 -0
  395. data/vendor/lexbor/source/lexbor/html/tokenizer/error.c +28 -0
  396. data/vendor/lexbor/source/lexbor/html/tokenizer/error.h +141 -0
  397. data/vendor/lexbor/source/lexbor/html/tokenizer/res.h +4956 -0
  398. data/vendor/lexbor/source/lexbor/html/tokenizer/state.c +2171 -0
  399. data/vendor/lexbor/source/lexbor/html/tokenizer/state.h +225 -0
  400. data/vendor/lexbor/source/lexbor/html/tokenizer/state_comment.c +489 -0
  401. data/vendor/lexbor/source/lexbor/html/tokenizer/state_comment.h +27 -0
  402. data/vendor/lexbor/source/lexbor/html/tokenizer/state_doctype.c +1654 -0
  403. data/vendor/lexbor/source/lexbor/html/tokenizer/state_doctype.h +27 -0
  404. data/vendor/lexbor/source/lexbor/html/tokenizer/state_rawtext.c +303 -0
  405. data/vendor/lexbor/source/lexbor/html/tokenizer/state_rawtext.h +32 -0
  406. data/vendor/lexbor/source/lexbor/html/tokenizer/state_rcdata.c +311 -0
  407. data/vendor/lexbor/source/lexbor/html/tokenizer/state_rcdata.h +32 -0
  408. data/vendor/lexbor/source/lexbor/html/tokenizer/state_script.c +1209 -0
  409. data/vendor/lexbor/source/lexbor/html/tokenizer/state_script.h +32 -0
  410. data/vendor/lexbor/source/lexbor/html/tokenizer.c +499 -0
  411. data/vendor/lexbor/source/lexbor/html/tokenizer.h +343 -0
  412. data/vendor/lexbor/source/lexbor/html/tree/active_formatting.c +241 -0
  413. data/vendor/lexbor/source/lexbor/html/tree/active_formatting.h +117 -0
  414. data/vendor/lexbor/source/lexbor/html/tree/error.c +26 -0
  415. data/vendor/lexbor/source/lexbor/html/tree/error.h +114 -0
  416. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_after_body.c +62 -0
  417. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_after_frameset.c +63 -0
  418. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_body.c +82 -0
  419. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_frameset.c +88 -0
  420. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_head.c +222 -0
  421. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/before_head.c +144 -0
  422. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/before_html.c +166 -0
  423. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/foreign_content.c +358 -0
  424. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_body.c +1974 -0
  425. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_caption.c +158 -0
  426. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_cell.c +187 -0
  427. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_column_group.c +194 -0
  428. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_frameset.c +149 -0
  429. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_head.c +374 -0
  430. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_head_noscript.c +121 -0
  431. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_row.c +211 -0
  432. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_select.c +341 -0
  433. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_select_in_table.c +115 -0
  434. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_table.c +451 -0
  435. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_table_body.c +208 -0
  436. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_table_text.c +127 -0
  437. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_template.c +189 -0
  438. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/initial.c +411 -0
  439. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/text.c +61 -0
  440. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode.h +135 -0
  441. data/vendor/lexbor/source/lexbor/html/tree/open_elements.c +251 -0
  442. data/vendor/lexbor/source/lexbor/html/tree/open_elements.h +105 -0
  443. data/vendor/lexbor/source/lexbor/html/tree/template_insertion.c +10 -0
  444. data/vendor/lexbor/source/lexbor/html/tree/template_insertion.h +100 -0
  445. data/vendor/lexbor/source/lexbor/html/tree.c +1726 -0
  446. data/vendor/lexbor/source/lexbor/html/tree.h +431 -0
  447. data/vendor/lexbor/source/lexbor/html/tree_res.h +111 -0
  448. data/vendor/lexbor/source/lexbor/ns/base.h +32 -0
  449. data/vendor/lexbor/source/lexbor/ns/config.cmake +2 -0
  450. data/vendor/lexbor/source/lexbor/ns/const.h +37 -0
  451. data/vendor/lexbor/source/lexbor/ns/ns.c +154 -0
  452. data/vendor/lexbor/source/lexbor/ns/ns.h +66 -0
  453. data/vendor/lexbor/source/lexbor/ns/res.h +97 -0
  454. data/vendor/lexbor/source/lexbor/ports/posix/config.cmake +11 -0
  455. data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/fs.c +236 -0
  456. data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/memory.c +33 -0
  457. data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/perf.c +158 -0
  458. data/vendor/lexbor/source/lexbor/ports/windows_nt/config.cmake +18 -0
  459. data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/fs.c +239 -0
  460. data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/memory.c +33 -0
  461. data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/perf.c +81 -0
  462. data/vendor/lexbor/source/lexbor/selectors/base.h +30 -0
  463. data/vendor/lexbor/source/lexbor/selectors/config.cmake +2 -0
  464. data/vendor/lexbor/source/lexbor/selectors/selectors.c +1591 -0
  465. data/vendor/lexbor/source/lexbor/selectors/selectors.h +71 -0
  466. data/vendor/lexbor/source/lexbor/tag/base.h +32 -0
  467. data/vendor/lexbor/source/lexbor/tag/config.cmake +2 -0
  468. data/vendor/lexbor/source/lexbor/tag/const.h +225 -0
  469. data/vendor/lexbor/source/lexbor/tag/res.h +562 -0
  470. data/vendor/lexbor/source/lexbor/tag/tag.c +144 -0
  471. data/vendor/lexbor/source/lexbor/tag/tag.h +123 -0
  472. data/vendor/lexbor/source/lexbor/utils/base.h +32 -0
  473. data/vendor/lexbor/source/lexbor/utils/config.cmake +2 -0
  474. data/vendor/lexbor/source/lexbor/utils/http.c +534 -0
  475. data/vendor/lexbor/source/lexbor/utils/http.h +90 -0
  476. data/vendor/lexbor/source/lexbor/utils/utils.h +15 -0
  477. data/vendor/lexbor/source/lexbor/utils/warc.c +817 -0
  478. data/vendor/lexbor/source/lexbor/utils/warc.h +126 -0
  479. data/vendor/lexbor/utils/lexbor/css/selectors/pseudo.py +231 -0
  480. data/vendor/lexbor/utils/lexbor/css/selectors/tmp/const.h +21 -0
  481. data/vendor/lexbor/utils/lexbor/css/selectors/tmp/res.h +26 -0
  482. data/vendor/lexbor/utils/lexbor/css/syntax/definitions.py +49 -0
  483. data/vendor/lexbor/utils/lexbor/css/syntax/token_res.py +54 -0
  484. data/vendor/lexbor/utils/lexbor/css/syntax/tokenizer_code_map.py +36 -0
  485. data/vendor/lexbor/version +1 -0
  486. metadata +542 -0
@@ -0,0 +1,203 @@
1
+ /*
2
+ * xmlmemory.c: libxml memory allocator wrapper.
3
+ *
4
+ * daniel@veillard.com
5
+ */
6
+
7
+ #define IN_LIBXML
8
+ #include "libxml.h"
9
+
10
+ #include <string.h>
11
+ #include <stdlib.h>
12
+ #include <ctype.h>
13
+ #include <time.h>
14
+
15
+ /* #define DEBUG_MEMORY */
16
+
17
+ /**
18
+ * MEM_LIST:
19
+ *
20
+ * keep track of all allocated blocks for error reporting
21
+ * Always build the memory list !
22
+ */
23
+ #ifdef DEBUG_MEMORY_LOCATION
24
+ #ifndef MEM_LIST
25
+ #define MEM_LIST /* keep a list of all the allocated memory blocks */
26
+ #endif
27
+ #endif
28
+
29
+ #include "libxml/globals.h" /* must come before xmlmemory.h */
30
+ #include "libxml/xmlmemory.h"
31
+ #include "libxml/xmlerror.h"
32
+ #include "libxml/threads.h"
33
+
34
+ #include "private/memory.h"
35
+ #include "private/threads.h"
36
+
37
+ static unsigned long debugMemSize = 0;
38
+ static unsigned long debugMemBlocks = 0;
39
+ static unsigned long debugMaxMemSize = 0;
40
+ static xmlMutex xmlMemMutex;
41
+
42
+ void xmlMallocBreakpoint(void);
43
+
44
+ /************************************************************************
45
+ * *
46
+ * Macros, variables and associated types *
47
+ * *
48
+ ************************************************************************/
49
+
50
+ #if !defined(LIBXML_THREAD_ENABLED) && !defined(LIBXML_THREAD_ALLOC_ENABLED)
51
+ #ifdef xmlMalloc
52
+ #undef xmlMalloc
53
+ #endif
54
+ #ifdef xmlRealloc
55
+ #undef xmlRealloc
56
+ #endif
57
+ #ifdef xmlMemStrdup
58
+ #undef xmlMemStrdup
59
+ #endif
60
+ #endif
61
+
62
+ /*
63
+ * Each of the blocks allocated begin with a header containing information
64
+ */
65
+
66
+ #define MEMTAG 0x5aa5U
67
+
68
+ #define MALLOC_TYPE 1
69
+ #define REALLOC_TYPE 2
70
+ #define STRDUP_TYPE 3
71
+ #define MALLOC_ATOMIC_TYPE 4
72
+ #define REALLOC_ATOMIC_TYPE 5
73
+
74
+ typedef struct memnod {
75
+ unsigned int mh_tag;
76
+ unsigned int mh_type;
77
+ unsigned long mh_number;
78
+ size_t mh_size;
79
+ #ifdef MEM_LIST
80
+ struct memnod *mh_next;
81
+ struct memnod *mh_prev;
82
+ #endif
83
+ const char *mh_file;
84
+ unsigned int mh_line;
85
+ } MEMHDR;
86
+
87
+
88
+ #ifdef SUN4
89
+ #define ALIGN_SIZE 16
90
+ #else
91
+ #define ALIGN_SIZE sizeof(double)
92
+ #endif
93
+ #define HDR_SIZE sizeof(MEMHDR)
94
+ #define RESERVE_SIZE (((HDR_SIZE + (ALIGN_SIZE-1)) \
95
+ / ALIGN_SIZE ) * ALIGN_SIZE)
96
+
97
+ #define MAX_SIZE_T ((size_t)-1)
98
+
99
+ #define CLIENT_2_HDR(a) ((void *) (((char *) (a)) - RESERVE_SIZE))
100
+ #define HDR_2_CLIENT(a) ((void *) (((char *) (a)) + RESERVE_SIZE))
101
+
102
+
103
+ static unsigned int block=0;
104
+ static unsigned int xmlMemStopAtBlock = 0;
105
+ static void *xmlMemTraceBlockAt = NULL;
106
+ #ifdef MEM_LIST
107
+ static MEMHDR *memlist = NULL;
108
+ #endif
109
+
110
+ static void debugmem_tag_error(void *addr);
111
+ #ifdef MEM_LIST
112
+ static void debugmem_list_add(MEMHDR *);
113
+ static void debugmem_list_delete(MEMHDR *);
114
+ #endif
115
+ #define Mem_Tag_Err(a) debugmem_tag_error(a);
116
+
117
+ #ifndef TEST_POINT
118
+ #define TEST_POINT
119
+ #endif
120
+
121
+ /**
122
+ * xmlInitMemory:
123
+ *
124
+ * DEPRECATED: Alias for xmlInitParser.
125
+ */
126
+ int
127
+ xmlInitMemory(void) {
128
+ xmlInitParser();
129
+ return(0);
130
+ }
131
+
132
+ /**
133
+ * xmlInitMemoryInternal:
134
+ *
135
+ * Initialize the memory layer.
136
+ *
137
+ * Returns 0 on success
138
+ */
139
+ void
140
+ xmlInitMemoryInternal(void) {
141
+ char *breakpoint;
142
+ #ifdef DEBUG_MEMORY
143
+ xmlGenericError(xmlGenericErrorContext,
144
+ "xmlInitMemory()\n");
145
+ #endif
146
+ xmlInitMutex(&xmlMemMutex);
147
+
148
+ breakpoint = getenv("XML_MEM_BREAKPOINT");
149
+ if (breakpoint != NULL) {
150
+ sscanf(breakpoint, "%ud", &xmlMemStopAtBlock);
151
+ }
152
+ breakpoint = getenv("XML_MEM_TRACE");
153
+ if (breakpoint != NULL) {
154
+ sscanf(breakpoint, "%p", &xmlMemTraceBlockAt);
155
+ }
156
+
157
+ #ifdef DEBUG_MEMORY
158
+ xmlGenericError(xmlGenericErrorContext,
159
+ "xmlInitMemory() Ok\n");
160
+ #endif
161
+ }
162
+
163
+ /**
164
+ * xmlMemSetup:
165
+ * @freeFunc: the free() function to use
166
+ * @mallocFunc: the malloc() function to use
167
+ * @reallocFunc: the realloc() function to use
168
+ * @strdupFunc: the strdup() function to use
169
+ *
170
+ * Override the default memory access functions with a new set
171
+ * This has to be called before any other libxml routines !
172
+ *
173
+ * Should this be blocked if there was already some allocations
174
+ * done ?
175
+ *
176
+ * Returns 0 on success
177
+ */
178
+ int
179
+ xmlMemSetup(xmlFreeFunc freeFunc, xmlMallocFunc mallocFunc,
180
+ xmlReallocFunc reallocFunc, xmlStrdupFunc strdupFunc) {
181
+ #ifdef DEBUG_MEMORY
182
+ xmlGenericError(xmlGenericErrorContext,
183
+ "xmlMemSetup()\n");
184
+ #endif
185
+ if (freeFunc == NULL)
186
+ return(-1);
187
+ if (mallocFunc == NULL)
188
+ return(-1);
189
+ if (reallocFunc == NULL)
190
+ return(-1);
191
+ if (strdupFunc == NULL)
192
+ return(-1);
193
+ xmlFree = freeFunc;
194
+ xmlMalloc = mallocFunc;
195
+ xmlMallocAtomic = mallocFunc;
196
+ xmlRealloc = reallocFunc;
197
+ xmlMemStrdup = strdupFunc;
198
+ #ifdef DEBUG_MEMORY
199
+ xmlGenericError(xmlGenericErrorContext,
200
+ "xmlMemSetup() Ok\n");
201
+ #endif
202
+ return(0);
203
+ }
@@ -0,0 +1,127 @@
1
+ /*
2
+ * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
+ * implemented on top of the SAX interfaces
4
+ *
5
+ * References:
6
+ * The XML specification:
7
+ * http://www.w3.org/TR/REC-xml
8
+ * Original 1.0 version:
9
+ * http://www.w3.org/TR/1998/REC-xml-19980210
10
+ * XML second edition working draft
11
+ * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
+ *
13
+ * Okay this is a big file, the parser core is around 7000 lines, then it
14
+ * is followed by the progressive parser top routines, then the various
15
+ * high level APIs to call the parser and a few miscellaneous functions.
16
+ * A number of helper functions and deprecated ones have been moved to
17
+ * parserInternals.c to reduce this file size.
18
+ * As much as possible the functions are associated with their relative
19
+ * production in the XML specification. A few productions defining the
20
+ * different ranges of character are actually implanted either in
21
+ * parserInternals.h or parserInternals.c
22
+ * The DOM tree build is realized from the default SAX callbacks in
23
+ * the module SAX.c.
24
+ * The routines doing the validation checks are in valid.c and called either
25
+ * from the SAX callbacks or as standalone functions using a preparsed
26
+ * document.
27
+ *
28
+ * See Copyright for the status of this software.
29
+ *
30
+ * daniel@veillard.com
31
+ */
32
+
33
+ /* To avoid EBCDIC trouble when parsing on zOS */
34
+ #if defined(__MVS__)
35
+ #pragma convert("ISO8859-1")
36
+ #endif
37
+
38
+ #define IN_LIBXML
39
+ #include "libxml.h"
40
+
41
+ #if defined(_WIN32)
42
+ #define XML_DIR_SEP '\\'
43
+ #else
44
+ #define XML_DIR_SEP '/'
45
+ #endif
46
+
47
+ #include <stdlib.h>
48
+ #include <limits.h>
49
+ #include <string.h>
50
+ #include <stdarg.h>
51
+ #include <stddef.h>
52
+ #include <ctype.h>
53
+ #include <stdlib.h>
54
+ #include "libxml/xmlmemory.h"
55
+ #include "libxml/threads.h"
56
+ #include "libxml/globals.h"
57
+ #include "libxml/tree.h"
58
+ #include "libxml/parser.h"
59
+ #include "libxml/parserInternals.h"
60
+ #include "libxml/HTMLparser.h"
61
+ #include "libxml/valid.h"
62
+ #include "libxml/entities.h"
63
+ #include "libxml/xmlerror.h"
64
+ #include "libxml/encoding.h"
65
+ #include "libxml/xmlIO.h"
66
+ #if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
67
+ #include "libxml/xpath.h"
68
+ #endif
69
+
70
+ #include "private/threads.h"
71
+ #include "private/enc.h"
72
+ #include "private/xpath.h"
73
+ #include "private/dict.h"
74
+ #include "private/memory.h"
75
+ #include "private/globals.h"
76
+
77
+ /************************************************************************
78
+ * *
79
+ * Miscellaneous *
80
+ * *
81
+ ************************************************************************/
82
+
83
+ static int xmlParserInitialized = 0;
84
+
85
+ /**
86
+ * xmlInitParser:
87
+ *
88
+ * Initialization function for the XML parser.
89
+ * This is not reentrant. Call once before processing in case of
90
+ * use in multithreaded programs.
91
+ */
92
+
93
+ void
94
+ xmlInitParser(void) {
95
+ /*
96
+ * Note that the initialization code must not make memory allocations.
97
+ */
98
+ if (xmlParserInitialized != 0)
99
+ return;
100
+
101
+ #ifdef LIBXML_THREAD_ENABLED
102
+ __xmlGlobalInitMutexLock();
103
+ if (xmlParserInitialized == 0) {
104
+ #endif
105
+ #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
106
+ if (xmlFree == free)
107
+ atexit(xmlCleanupParser);
108
+ #endif
109
+
110
+ xmlInitThreadsInternal();
111
+ xmlInitGlobalsInternal();
112
+ xmlInitMemoryInternal();
113
+ __xmlInitializeDict();
114
+ xmlInitEncodingInternal();
115
+ // xmlRegisterDefaultInputCallbacks();
116
+ // #ifdef LIBXML_OUTPUT_ENABLED
117
+ // xmlRegisterDefaultOutputCallbacks();
118
+ // #endif /* LIBXML_OUTPUT_ENABLED */
119
+ #if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
120
+ xmlInitXPathInternal();
121
+ #endif
122
+ xmlParserInitialized = 1;
123
+ #ifdef LIBXML_THREAD_ENABLED
124
+ }
125
+ __xmlGlobalInitMutexUnlock();
126
+ #endif
127
+ }
@@ -0,0 +1,338 @@
1
+ /*
2
+ * parserInternals.c : Internal routines (and obsolete ones) needed for the
3
+ * XML and HTML parsers.
4
+ *
5
+ * See Copyright for the status of this software.
6
+ *
7
+ * daniel@veillard.com
8
+ */
9
+
10
+ #define IN_LIBXML
11
+ #include "libxml.h"
12
+
13
+ #if defined(_WIN32)
14
+ #define XML_DIR_SEP '\\'
15
+ #else
16
+ #define XML_DIR_SEP '/'
17
+ #endif
18
+
19
+ #include <string.h>
20
+ #include <ctype.h>
21
+ #include <stdlib.h>
22
+
23
+ #include "libxml/xmlmemory.h"
24
+ #include "libxml/parser.h"
25
+ #include "libxml/parserInternals.h"
26
+ #include "libxml/globals.h"
27
+ #include "libxml/chvalid.h"
28
+
29
+ #define CUR(ctxt) ctxt->input->cur
30
+ #define END(ctxt) ctxt->input->end
31
+ #define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
32
+
33
+ #include "private/buf.h"
34
+ #include "private/error.h"
35
+ #include "private/parser.h"
36
+
37
+ /**
38
+ * xmlErrMemory:
39
+ * @ctxt: an XML parser context
40
+ * @extra: extra information
41
+ *
42
+ * Handle a redefinition of attribute error
43
+ */
44
+ void
45
+ xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
46
+ {
47
+ if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
48
+ (ctxt->instate == XML_PARSER_EOF))
49
+ return;
50
+ if (ctxt != NULL) {
51
+ ctxt->errNo = XML_ERR_NO_MEMORY;
52
+ ctxt->instate = XML_PARSER_EOF;
53
+ ctxt->disableSAX = 1;
54
+ }
55
+ if (extra)
56
+ __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
57
+ XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
58
+ NULL, NULL, 0, 0,
59
+ "Memory allocation failed : %s\n", extra);
60
+ else
61
+ __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
62
+ XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
63
+ NULL, NULL, 0, 0, "Memory allocation failed\n");
64
+ }
65
+
66
+ /**
67
+ * __xmlErrEncoding:
68
+ * @ctxt: an XML parser context
69
+ * @xmlerr: the error number
70
+ * @msg: the error message
71
+ * @str1: an string info
72
+ * @str2: an string info
73
+ *
74
+ * Handle an encoding error
75
+ */
76
+ void
77
+ __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
78
+ const char *msg, const xmlChar * str1, const xmlChar * str2)
79
+ {
80
+ if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
81
+ (ctxt->instate == XML_PARSER_EOF))
82
+ return;
83
+ if (ctxt != NULL)
84
+ ctxt->errNo = xmlerr;
85
+ __xmlRaiseError(NULL, NULL, NULL,
86
+ ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
87
+ NULL, 0, (const char *) str1, (const char *) str2,
88
+ NULL, 0, 0, msg, str1, str2);
89
+ if (ctxt != NULL) {
90
+ ctxt->wellFormed = 0;
91
+ if (ctxt->recovery == 0)
92
+ ctxt->disableSAX = 1;
93
+ }
94
+ }
95
+
96
+ /**
97
+ * xmlErrInternal:
98
+ * @ctxt: an XML parser context
99
+ * @msg: the error message
100
+ * @str: error information
101
+ *
102
+ * Handle an internal error
103
+ */
104
+ static void LIBXML_ATTR_FORMAT(2,0)
105
+ xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
106
+ {
107
+ if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
108
+ (ctxt->instate == XML_PARSER_EOF))
109
+ return;
110
+ if (ctxt != NULL)
111
+ ctxt->errNo = XML_ERR_INTERNAL_ERROR;
112
+ __xmlRaiseError(NULL, NULL, NULL,
113
+ ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
114
+ XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
115
+ 0, 0, msg, str);
116
+ if (ctxt != NULL) {
117
+ ctxt->wellFormed = 0;
118
+ if (ctxt->recovery == 0)
119
+ ctxt->disableSAX = 1;
120
+ }
121
+ }
122
+
123
+ /**
124
+ * xmlErrEncodingInt:
125
+ * @ctxt: an XML parser context
126
+ * @error: the error number
127
+ * @msg: the error message
128
+ * @val: an integer value
129
+ *
130
+ * n encoding error
131
+ */
132
+ static void LIBXML_ATTR_FORMAT(3,0)
133
+ xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
134
+ const char *msg, int val)
135
+ {
136
+ if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
137
+ (ctxt->instate == XML_PARSER_EOF))
138
+ return;
139
+ if (ctxt != NULL)
140
+ ctxt->errNo = error;
141
+ __xmlRaiseError(NULL, NULL, NULL,
142
+ ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
143
+ NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
144
+ if (ctxt != NULL) {
145
+ ctxt->wellFormed = 0;
146
+ if (ctxt->recovery == 0)
147
+ ctxt->disableSAX = 1;
148
+ }
149
+ }
150
+
151
+ /**
152
+ * xmlIsLetter:
153
+ * @c: an unicode character (int)
154
+ *
155
+ * Check whether the character is allowed by the production
156
+ * [84] Letter ::= BaseChar | Ideographic
157
+ *
158
+ * Returns 0 if not, non-zero otherwise
159
+ */
160
+ int
161
+ xmlIsLetter(int c) {
162
+ return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
163
+ }
164
+
165
+ /**
166
+ * xmlCopyCharMultiByte:
167
+ * @out: pointer to an array of xmlChar
168
+ * @val: the char value
169
+ *
170
+ * append the char value in the array
171
+ *
172
+ * Returns the number of xmlChar written
173
+ */
174
+ int
175
+ xmlCopyCharMultiByte(xmlChar *out, int val) {
176
+ if ((out == NULL) || (val < 0)) return(0);
177
+ /*
178
+ * We are supposed to handle UTF8, check it's valid
179
+ * From rfc2044: encoding of the Unicode values on UTF-8:
180
+ *
181
+ * UCS-4 range (hex.) UTF-8 octet sequence (binary)
182
+ * 0000 0000-0000 007F 0xxxxxxx
183
+ * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
184
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
185
+ */
186
+ if (val >= 0x80) {
187
+ xmlChar *savedout = out;
188
+ int bits;
189
+ if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
190
+ else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
191
+ else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
192
+ else {
193
+ xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
194
+ "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
195
+ val);
196
+ return(0);
197
+ }
198
+ for ( ; bits >= 0; bits-= 6)
199
+ *out++= ((val >> bits) & 0x3F) | 0x80 ;
200
+ return (out - savedout);
201
+ }
202
+ *out = val;
203
+ return 1;
204
+ }
205
+
206
+ /**
207
+ * xmlCopyChar:
208
+ * @len: Ignored, compatibility
209
+ * @out: pointer to an array of xmlChar
210
+ * @val: the char value
211
+ *
212
+ * append the char value in the array
213
+ *
214
+ * Returns the number of xmlChar written
215
+ */
216
+
217
+ int
218
+ xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
219
+ if ((out == NULL) || (val < 0)) return(0);
220
+ /* the len parameter is ignored */
221
+ if (val >= 0x80) {
222
+ return(xmlCopyCharMultiByte (out, val));
223
+ }
224
+ *out = val;
225
+ return 1;
226
+ }
227
+
228
+ /**
229
+ * xmlStringCurrentChar:
230
+ * @ctxt: the XML parser context
231
+ * @cur: pointer to the beginning of the char
232
+ * @len: pointer to the length of the char read
233
+ *
234
+ * The current char value, if using UTF-8 this may actually span multiple
235
+ * bytes in the input buffer.
236
+ *
237
+ * Returns the current char value and its length
238
+ */
239
+
240
+ int
241
+ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
242
+ {
243
+ if ((len == NULL) || (cur == NULL)) return(0);
244
+ if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
245
+ /*
246
+ * We are supposed to handle UTF8, check it's valid
247
+ * From rfc2044: encoding of the Unicode values on UTF-8:
248
+ *
249
+ * UCS-4 range (hex.) UTF-8 octet sequence (binary)
250
+ * 0000 0000-0000 007F 0xxxxxxx
251
+ * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
252
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
253
+ *
254
+ * Check for the 0x110000 limit too
255
+ */
256
+ unsigned char c;
257
+ unsigned int val;
258
+
259
+ c = *cur;
260
+ if (c & 0x80) {
261
+ if ((cur[1] & 0xc0) != 0x80)
262
+ goto encoding_error;
263
+ if ((c & 0xe0) == 0xe0) {
264
+
265
+ if ((cur[2] & 0xc0) != 0x80)
266
+ goto encoding_error;
267
+ if ((c & 0xf0) == 0xf0) {
268
+ if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
269
+ goto encoding_error;
270
+ /* 4-byte code */
271
+ *len = 4;
272
+ val = (cur[0] & 0x7) << 18;
273
+ val |= (cur[1] & 0x3f) << 12;
274
+ val |= (cur[2] & 0x3f) << 6;
275
+ val |= cur[3] & 0x3f;
276
+ } else {
277
+ /* 3-byte code */
278
+ *len = 3;
279
+ val = (cur[0] & 0xf) << 12;
280
+ val |= (cur[1] & 0x3f) << 6;
281
+ val |= cur[2] & 0x3f;
282
+ }
283
+ } else {
284
+ /* 2-byte code */
285
+ *len = 2;
286
+ val = (cur[0] & 0x1f) << 6;
287
+ val |= cur[1] & 0x3f;
288
+ }
289
+ if (!IS_CHAR(val)) {
290
+ xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
291
+ "Char 0x%X out of allowed range\n", val);
292
+ }
293
+ return (val);
294
+ } else {
295
+ /* 1-byte code */
296
+ *len = 1;
297
+ return (*cur);
298
+ }
299
+ }
300
+ /*
301
+ * Assume it's a fixed length encoding (1) with
302
+ * a compatible encoding for the ASCII set, since
303
+ * XML constructs only use < 128 chars
304
+ */
305
+ *len = 1;
306
+ return (*cur);
307
+ encoding_error:
308
+
309
+ /*
310
+ * An encoding problem may arise from a truncated input buffer
311
+ * splitting a character in the middle. In that case do not raise
312
+ * an error but return 0 to indicate an end of stream problem
313
+ */
314
+ if ((ctxt == NULL) || (ctxt->input == NULL) ||
315
+ (ctxt->input->end - ctxt->input->cur < 4)) {
316
+ *len = 0;
317
+ return(0);
318
+ }
319
+ /*
320
+ * If we detect an UTF8 error that probably mean that the
321
+ * input encoding didn't get properly advertised in the
322
+ * declaration header. Report the error and switch the encoding
323
+ * to ISO-Latin-1 (if you don't like this policy, just declare the
324
+ * encoding !)
325
+ */
326
+ {
327
+ char buffer[150];
328
+
329
+ snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
330
+ ctxt->input->cur[0], ctxt->input->cur[1],
331
+ ctxt->input->cur[2], ctxt->input->cur[3]);
332
+ __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
333
+ "Input is not proper UTF-8, indicate encoding !\n%s",
334
+ BAD_CAST buffer, NULL);
335
+ }
336
+ *len = 1;
337
+ return (*cur);
338
+ }