nokolexbor 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (486) hide show
  1. checksums.yaml +7 -0
  2. data/ext/nokolexbor/config.h +186 -0
  3. data/ext/nokolexbor/extconf.rb +131 -0
  4. data/ext/nokolexbor/libxml/HTMLparser.h +320 -0
  5. data/ext/nokolexbor/libxml/SAX2.h +173 -0
  6. data/ext/nokolexbor/libxml/chvalid.h +230 -0
  7. data/ext/nokolexbor/libxml/debugXML.h +217 -0
  8. data/ext/nokolexbor/libxml/dict.h +81 -0
  9. data/ext/nokolexbor/libxml/encoding.h +232 -0
  10. data/ext/nokolexbor/libxml/entities.h +153 -0
  11. data/ext/nokolexbor/libxml/globals.h +529 -0
  12. data/ext/nokolexbor/libxml/hash.h +236 -0
  13. data/ext/nokolexbor/libxml/list.h +137 -0
  14. data/ext/nokolexbor/libxml/parser.h +1264 -0
  15. data/ext/nokolexbor/libxml/parserInternals.h +641 -0
  16. data/ext/nokolexbor/libxml/pattern.h +100 -0
  17. data/ext/nokolexbor/libxml/threads.h +94 -0
  18. data/ext/nokolexbor/libxml/tree.h +1315 -0
  19. data/ext/nokolexbor/libxml/uri.h +94 -0
  20. data/ext/nokolexbor/libxml/valid.h +448 -0
  21. data/ext/nokolexbor/libxml/xmlIO.h +369 -0
  22. data/ext/nokolexbor/libxml/xmlautomata.h +146 -0
  23. data/ext/nokolexbor/libxml/xmlerror.h +919 -0
  24. data/ext/nokolexbor/libxml/xmlexports.h +79 -0
  25. data/ext/nokolexbor/libxml/xmlmemory.h +226 -0
  26. data/ext/nokolexbor/libxml/xmlregexp.h +222 -0
  27. data/ext/nokolexbor/libxml/xmlstring.h +140 -0
  28. data/ext/nokolexbor/libxml/xmlversion.h +526 -0
  29. data/ext/nokolexbor/libxml/xpath.h +575 -0
  30. data/ext/nokolexbor/libxml/xpathInternals.h +632 -0
  31. data/ext/nokolexbor/libxml/xpointer.h +137 -0
  32. data/ext/nokolexbor/libxml.h +76 -0
  33. data/ext/nokolexbor/memory.c +39 -0
  34. data/ext/nokolexbor/nl_document.c +51 -0
  35. data/ext/nokolexbor/nl_node.c +790 -0
  36. data/ext/nokolexbor/nl_node_set.c +368 -0
  37. data/ext/nokolexbor/nl_xpath_context.c +200 -0
  38. data/ext/nokolexbor/nokolexbor.c +63 -0
  39. data/ext/nokolexbor/nokolexbor.h +37 -0
  40. data/ext/nokolexbor/private/buf.h +70 -0
  41. data/ext/nokolexbor/private/dict.h +11 -0
  42. data/ext/nokolexbor/private/enc.h +17 -0
  43. data/ext/nokolexbor/private/error.h +21 -0
  44. data/ext/nokolexbor/private/globals.h +9 -0
  45. data/ext/nokolexbor/private/memory.h +9 -0
  46. data/ext/nokolexbor/private/parser.h +27 -0
  47. data/ext/nokolexbor/private/string.h +9 -0
  48. data/ext/nokolexbor/private/threads.h +50 -0
  49. data/ext/nokolexbor/private/tree.h +18 -0
  50. data/ext/nokolexbor/private/xpath.h +7 -0
  51. data/ext/nokolexbor/timsort.h +601 -0
  52. data/ext/nokolexbor/xml_SAX2.c +80 -0
  53. data/ext/nokolexbor/xml_buf.c +363 -0
  54. data/ext/nokolexbor/xml_chvalid.c +334 -0
  55. data/ext/nokolexbor/xml_dict.c +1264 -0
  56. data/ext/nokolexbor/xml_encoding.c +124 -0
  57. data/ext/nokolexbor/xml_error.c +134 -0
  58. data/ext/nokolexbor/xml_globals.c +1085 -0
  59. data/ext/nokolexbor/xml_hash.c +1141 -0
  60. data/ext/nokolexbor/xml_memory.c +203 -0
  61. data/ext/nokolexbor/xml_parser.c +127 -0
  62. data/ext/nokolexbor/xml_parserInternals.c +338 -0
  63. data/ext/nokolexbor/xml_pattern.c +2375 -0
  64. data/ext/nokolexbor/xml_string.c +1051 -0
  65. data/ext/nokolexbor/xml_threads.c +881 -0
  66. data/ext/nokolexbor/xml_tree.c +148 -0
  67. data/ext/nokolexbor/xml_xpath.c +14743 -0
  68. data/lib/nokolexbor/attribute.rb +18 -0
  69. data/lib/nokolexbor/document.rb +6 -0
  70. data/lib/nokolexbor/node.rb +264 -0
  71. data/lib/nokolexbor/node_set.rb +124 -0
  72. data/lib/nokolexbor/version.rb +5 -0
  73. data/lib/nokolexbor/xpath_context.rb +14 -0
  74. data/lib/nokolexbor.rb +17 -0
  75. data/patches/0001-lexbor-support-text-pseudo-element.patch +137 -0
  76. data/patches/0002-lexbor-match-id-class-case-sensitive.patch +22 -0
  77. data/patches/0003-lexbor-attach-template-content-to-self.patch +13 -0
  78. data/vendor/lexbor/CMakeLists.txt +331 -0
  79. data/vendor/lexbor/config.cmake +890 -0
  80. data/vendor/lexbor/feature.cmake +134 -0
  81. data/vendor/lexbor/source/lexbor/core/array.c +208 -0
  82. data/vendor/lexbor/source/lexbor/core/array.h +100 -0
  83. data/vendor/lexbor/source/lexbor/core/array_obj.c +216 -0
  84. data/vendor/lexbor/source/lexbor/core/array_obj.h +134 -0
  85. data/vendor/lexbor/source/lexbor/core/avl.c +442 -0
  86. data/vendor/lexbor/source/lexbor/core/avl.h +82 -0
  87. data/vendor/lexbor/source/lexbor/core/base.h +86 -0
  88. data/vendor/lexbor/source/lexbor/core/bst.c +468 -0
  89. data/vendor/lexbor/source/lexbor/core/bst.h +108 -0
  90. data/vendor/lexbor/source/lexbor/core/bst_map.c +238 -0
  91. data/vendor/lexbor/source/lexbor/core/bst_map.h +87 -0
  92. data/vendor/lexbor/source/lexbor/core/config.cmake +12 -0
  93. data/vendor/lexbor/source/lexbor/core/conv.c +203 -0
  94. data/vendor/lexbor/source/lexbor/core/conv.h +53 -0
  95. data/vendor/lexbor/source/lexbor/core/core.h +35 -0
  96. data/vendor/lexbor/source/lexbor/core/def.h +57 -0
  97. data/vendor/lexbor/source/lexbor/core/diyfp.c +153 -0
  98. data/vendor/lexbor/source/lexbor/core/diyfp.h +258 -0
  99. data/vendor/lexbor/source/lexbor/core/dobject.c +187 -0
  100. data/vendor/lexbor/source/lexbor/core/dobject.h +92 -0
  101. data/vendor/lexbor/source/lexbor/core/dtoa.c +404 -0
  102. data/vendor/lexbor/source/lexbor/core/dtoa.h +28 -0
  103. data/vendor/lexbor/source/lexbor/core/fs.h +60 -0
  104. data/vendor/lexbor/source/lexbor/core/hash.c +476 -0
  105. data/vendor/lexbor/source/lexbor/core/hash.h +218 -0
  106. data/vendor/lexbor/source/lexbor/core/in.c +267 -0
  107. data/vendor/lexbor/source/lexbor/core/in.h +172 -0
  108. data/vendor/lexbor/source/lexbor/core/lexbor.h +35 -0
  109. data/vendor/lexbor/source/lexbor/core/mem.c +228 -0
  110. data/vendor/lexbor/source/lexbor/core/mem.h +141 -0
  111. data/vendor/lexbor/source/lexbor/core/mraw.c +428 -0
  112. data/vendor/lexbor/source/lexbor/core/mraw.h +114 -0
  113. data/vendor/lexbor/source/lexbor/core/perf.h +45 -0
  114. data/vendor/lexbor/source/lexbor/core/plog.c +73 -0
  115. data/vendor/lexbor/source/lexbor/core/plog.h +102 -0
  116. data/vendor/lexbor/source/lexbor/core/print.c +168 -0
  117. data/vendor/lexbor/source/lexbor/core/print.h +39 -0
  118. data/vendor/lexbor/source/lexbor/core/sbst.h +59 -0
  119. data/vendor/lexbor/source/lexbor/core/serialize.c +27 -0
  120. data/vendor/lexbor/source/lexbor/core/serialize.h +32 -0
  121. data/vendor/lexbor/source/lexbor/core/shs.c +118 -0
  122. data/vendor/lexbor/source/lexbor/core/shs.h +82 -0
  123. data/vendor/lexbor/source/lexbor/core/str.c +617 -0
  124. data/vendor/lexbor/source/lexbor/core/str.h +247 -0
  125. data/vendor/lexbor/source/lexbor/core/str_res.h +369 -0
  126. data/vendor/lexbor/source/lexbor/core/strtod.c +326 -0
  127. data/vendor/lexbor/source/lexbor/core/strtod.h +28 -0
  128. data/vendor/lexbor/source/lexbor/core/types.h +39 -0
  129. data/vendor/lexbor/source/lexbor/core/utils.c +43 -0
  130. data/vendor/lexbor/source/lexbor/core/utils.h +36 -0
  131. data/vendor/lexbor/source/lexbor/css/base.h +44 -0
  132. data/vendor/lexbor/source/lexbor/css/config.cmake +2 -0
  133. data/vendor/lexbor/source/lexbor/css/css.h +25 -0
  134. data/vendor/lexbor/source/lexbor/css/log.c +336 -0
  135. data/vendor/lexbor/source/lexbor/css/log.h +103 -0
  136. data/vendor/lexbor/source/lexbor/css/node.h +29 -0
  137. data/vendor/lexbor/source/lexbor/css/parser.c +473 -0
  138. data/vendor/lexbor/source/lexbor/css/parser.h +368 -0
  139. data/vendor/lexbor/source/lexbor/css/selectors/base.h +48 -0
  140. data/vendor/lexbor/source/lexbor/css/selectors/pseudo.c +91 -0
  141. data/vendor/lexbor/source/lexbor/css/selectors/pseudo.h +66 -0
  142. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_const.h +109 -0
  143. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_res.h +302 -0
  144. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.c +279 -0
  145. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.h +85 -0
  146. data/vendor/lexbor/source/lexbor/css/selectors/selector.c +927 -0
  147. data/vendor/lexbor/source/lexbor/css/selectors/selector.h +200 -0
  148. data/vendor/lexbor/source/lexbor/css/selectors/selectors.c +340 -0
  149. data/vendor/lexbor/source/lexbor/css/selectors/selectors.h +137 -0
  150. data/vendor/lexbor/source/lexbor/css/selectors/state.c +1718 -0
  151. data/vendor/lexbor/source/lexbor/css/selectors/state.h +79 -0
  152. data/vendor/lexbor/source/lexbor/css/stylesheet.h +37 -0
  153. data/vendor/lexbor/source/lexbor/css/syntax/anb.c +443 -0
  154. data/vendor/lexbor/source/lexbor/css/syntax/anb.h +45 -0
  155. data/vendor/lexbor/source/lexbor/css/syntax/base.h +33 -0
  156. data/vendor/lexbor/source/lexbor/css/syntax/parser.c +9 -0
  157. data/vendor/lexbor/source/lexbor/css/syntax/parser.h +25 -0
  158. data/vendor/lexbor/source/lexbor/css/syntax/res.h +48 -0
  159. data/vendor/lexbor/source/lexbor/css/syntax/state.c +2603 -0
  160. data/vendor/lexbor/source/lexbor/css/syntax/state.h +140 -0
  161. data/vendor/lexbor/source/lexbor/css/syntax/state_res.h +273 -0
  162. data/vendor/lexbor/source/lexbor/css/syntax/syntax.c +67 -0
  163. data/vendor/lexbor/source/lexbor/css/syntax/token.c +618 -0
  164. data/vendor/lexbor/source/lexbor/css/syntax/token.h +298 -0
  165. data/vendor/lexbor/source/lexbor/css/syntax/token_res.h +68 -0
  166. data/vendor/lexbor/source/lexbor/css/syntax/tokenizer/error.c +30 -0
  167. data/vendor/lexbor/source/lexbor/css/syntax/tokenizer/error.h +58 -0
  168. data/vendor/lexbor/source/lexbor/css/syntax/tokenizer.c +278 -0
  169. data/vendor/lexbor/source/lexbor/css/syntax/tokenizer.h +121 -0
  170. data/vendor/lexbor/source/lexbor/dom/base.h +32 -0
  171. data/vendor/lexbor/source/lexbor/dom/collection.c +97 -0
  172. data/vendor/lexbor/source/lexbor/dom/collection.h +112 -0
  173. data/vendor/lexbor/source/lexbor/dom/config.cmake +3 -0
  174. data/vendor/lexbor/source/lexbor/dom/dom.h +29 -0
  175. data/vendor/lexbor/source/lexbor/dom/exception.c +18 -0
  176. data/vendor/lexbor/source/lexbor/dom/exception.h +73 -0
  177. data/vendor/lexbor/source/lexbor/dom/interface.c +110 -0
  178. data/vendor/lexbor/source/lexbor/dom/interface.h +88 -0
  179. data/vendor/lexbor/source/lexbor/dom/interfaces/attr.c +445 -0
  180. data/vendor/lexbor/source/lexbor/dom/interfaces/attr.h +152 -0
  181. data/vendor/lexbor/source/lexbor/dom/interfaces/attr_const.h +62 -0
  182. data/vendor/lexbor/source/lexbor/dom/interfaces/attr_res.h +143 -0
  183. data/vendor/lexbor/source/lexbor/dom/interfaces/cdata_section.c +55 -0
  184. data/vendor/lexbor/source/lexbor/dom/interfaces/cdata_section.h +38 -0
  185. data/vendor/lexbor/source/lexbor/dom/interfaces/character_data.c +110 -0
  186. data/vendor/lexbor/source/lexbor/dom/interfaces/character_data.h +51 -0
  187. data/vendor/lexbor/source/lexbor/dom/interfaces/comment.c +64 -0
  188. data/vendor/lexbor/source/lexbor/dom/interfaces/comment.h +42 -0
  189. data/vendor/lexbor/source/lexbor/dom/interfaces/document.c +536 -0
  190. data/vendor/lexbor/source/lexbor/dom/interfaces/document.h +243 -0
  191. data/vendor/lexbor/source/lexbor/dom/interfaces/document_fragment.c +36 -0
  192. data/vendor/lexbor/source/lexbor/dom/interfaces/document_fragment.h +36 -0
  193. data/vendor/lexbor/source/lexbor/dom/interfaces/document_type.c +125 -0
  194. data/vendor/lexbor/source/lexbor/dom/interfaces/document_type.h +108 -0
  195. data/vendor/lexbor/source/lexbor/dom/interfaces/element.c +1411 -0
  196. data/vendor/lexbor/source/lexbor/dom/interfaces/element.h +319 -0
  197. data/vendor/lexbor/source/lexbor/dom/interfaces/event_target.c +32 -0
  198. data/vendor/lexbor/source/lexbor/dom/interfaces/event_target.h +34 -0
  199. data/vendor/lexbor/source/lexbor/dom/interfaces/node.c +661 -0
  200. data/vendor/lexbor/source/lexbor/dom/interfaces/node.h +192 -0
  201. data/vendor/lexbor/source/lexbor/dom/interfaces/processing_instruction.c +87 -0
  202. data/vendor/lexbor/source/lexbor/dom/interfaces/processing_instruction.h +66 -0
  203. data/vendor/lexbor/source/lexbor/dom/interfaces/shadow_root.c +36 -0
  204. data/vendor/lexbor/source/lexbor/dom/interfaces/shadow_root.h +44 -0
  205. data/vendor/lexbor/source/lexbor/dom/interfaces/text.c +63 -0
  206. data/vendor/lexbor/source/lexbor/dom/interfaces/text.h +42 -0
  207. data/vendor/lexbor/source/lexbor/encoding/base.h +218 -0
  208. data/vendor/lexbor/source/lexbor/encoding/big5.c +42839 -0
  209. data/vendor/lexbor/source/lexbor/encoding/config.cmake +12 -0
  210. data/vendor/lexbor/source/lexbor/encoding/const.h +65 -0
  211. data/vendor/lexbor/source/lexbor/encoding/decode.c +3193 -0
  212. data/vendor/lexbor/source/lexbor/encoding/decode.h +370 -0
  213. data/vendor/lexbor/source/lexbor/encoding/encode.c +1931 -0
  214. data/vendor/lexbor/source/lexbor/encoding/encode.h +377 -0
  215. data/vendor/lexbor/source/lexbor/encoding/encoding.c +252 -0
  216. data/vendor/lexbor/source/lexbor/encoding/encoding.h +475 -0
  217. data/vendor/lexbor/source/lexbor/encoding/euc_kr.c +53883 -0
  218. data/vendor/lexbor/source/lexbor/encoding/gb18030.c +47905 -0
  219. data/vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c +159 -0
  220. data/vendor/lexbor/source/lexbor/encoding/jis0208.c +22477 -0
  221. data/vendor/lexbor/source/lexbor/encoding/jis0212.c +15787 -0
  222. data/vendor/lexbor/source/lexbor/encoding/multi.h +53 -0
  223. data/vendor/lexbor/source/lexbor/encoding/range.c +71 -0
  224. data/vendor/lexbor/source/lexbor/encoding/range.h +34 -0
  225. data/vendor/lexbor/source/lexbor/encoding/res.c +222 -0
  226. data/vendor/lexbor/source/lexbor/encoding/res.h +34 -0
  227. data/vendor/lexbor/source/lexbor/encoding/single.c +13748 -0
  228. data/vendor/lexbor/source/lexbor/encoding/single.h +116 -0
  229. data/vendor/lexbor/source/lexbor/html/base.h +44 -0
  230. data/vendor/lexbor/source/lexbor/html/config.cmake +3 -0
  231. data/vendor/lexbor/source/lexbor/html/encoding.c +574 -0
  232. data/vendor/lexbor/source/lexbor/html/encoding.h +106 -0
  233. data/vendor/lexbor/source/lexbor/html/html.h +107 -0
  234. data/vendor/lexbor/source/lexbor/html/interface.c +165 -0
  235. data/vendor/lexbor/source/lexbor/html/interface.h +186 -0
  236. data/vendor/lexbor/source/lexbor/html/interface_res.h +4449 -0
  237. data/vendor/lexbor/source/lexbor/html/interfaces/anchor_element.c +36 -0
  238. data/vendor/lexbor/source/lexbor/html/interfaces/anchor_element.h +34 -0
  239. data/vendor/lexbor/source/lexbor/html/interfaces/area_element.c +36 -0
  240. data/vendor/lexbor/source/lexbor/html/interfaces/area_element.h +34 -0
  241. data/vendor/lexbor/source/lexbor/html/interfaces/audio_element.c +36 -0
  242. data/vendor/lexbor/source/lexbor/html/interfaces/audio_element.h +34 -0
  243. data/vendor/lexbor/source/lexbor/html/interfaces/base_element.c +36 -0
  244. data/vendor/lexbor/source/lexbor/html/interfaces/base_element.h +34 -0
  245. data/vendor/lexbor/source/lexbor/html/interfaces/body_element.c +36 -0
  246. data/vendor/lexbor/source/lexbor/html/interfaces/body_element.h +34 -0
  247. data/vendor/lexbor/source/lexbor/html/interfaces/br_element.c +36 -0
  248. data/vendor/lexbor/source/lexbor/html/interfaces/br_element.h +34 -0
  249. data/vendor/lexbor/source/lexbor/html/interfaces/button_element.c +36 -0
  250. data/vendor/lexbor/source/lexbor/html/interfaces/button_element.h +34 -0
  251. data/vendor/lexbor/source/lexbor/html/interfaces/canvas_element.c +36 -0
  252. data/vendor/lexbor/source/lexbor/html/interfaces/canvas_element.h +34 -0
  253. data/vendor/lexbor/source/lexbor/html/interfaces/d_list_element.c +36 -0
  254. data/vendor/lexbor/source/lexbor/html/interfaces/d_list_element.h +34 -0
  255. data/vendor/lexbor/source/lexbor/html/interfaces/data_element.c +36 -0
  256. data/vendor/lexbor/source/lexbor/html/interfaces/data_element.h +34 -0
  257. data/vendor/lexbor/source/lexbor/html/interfaces/data_list_element.c +36 -0
  258. data/vendor/lexbor/source/lexbor/html/interfaces/data_list_element.h +34 -0
  259. data/vendor/lexbor/source/lexbor/html/interfaces/details_element.c +36 -0
  260. data/vendor/lexbor/source/lexbor/html/interfaces/details_element.h +34 -0
  261. data/vendor/lexbor/source/lexbor/html/interfaces/dialog_element.c +36 -0
  262. data/vendor/lexbor/source/lexbor/html/interfaces/dialog_element.h +34 -0
  263. data/vendor/lexbor/source/lexbor/html/interfaces/directory_element.c +36 -0
  264. data/vendor/lexbor/source/lexbor/html/interfaces/directory_element.h +34 -0
  265. data/vendor/lexbor/source/lexbor/html/interfaces/div_element.c +36 -0
  266. data/vendor/lexbor/source/lexbor/html/interfaces/div_element.h +34 -0
  267. data/vendor/lexbor/source/lexbor/html/interfaces/document.c +444 -0
  268. data/vendor/lexbor/source/lexbor/html/interfaces/document.h +256 -0
  269. data/vendor/lexbor/source/lexbor/html/interfaces/element.c +64 -0
  270. data/vendor/lexbor/source/lexbor/html/interfaces/element.h +54 -0
  271. data/vendor/lexbor/source/lexbor/html/interfaces/embed_element.c +36 -0
  272. data/vendor/lexbor/source/lexbor/html/interfaces/embed_element.h +34 -0
  273. data/vendor/lexbor/source/lexbor/html/interfaces/field_set_element.c +36 -0
  274. data/vendor/lexbor/source/lexbor/html/interfaces/field_set_element.h +34 -0
  275. data/vendor/lexbor/source/lexbor/html/interfaces/font_element.c +36 -0
  276. data/vendor/lexbor/source/lexbor/html/interfaces/font_element.h +34 -0
  277. data/vendor/lexbor/source/lexbor/html/interfaces/form_element.c +36 -0
  278. data/vendor/lexbor/source/lexbor/html/interfaces/form_element.h +34 -0
  279. data/vendor/lexbor/source/lexbor/html/interfaces/frame_element.c +36 -0
  280. data/vendor/lexbor/source/lexbor/html/interfaces/frame_element.h +34 -0
  281. data/vendor/lexbor/source/lexbor/html/interfaces/frame_set_element.c +36 -0
  282. data/vendor/lexbor/source/lexbor/html/interfaces/frame_set_element.h +34 -0
  283. data/vendor/lexbor/source/lexbor/html/interfaces/head_element.c +36 -0
  284. data/vendor/lexbor/source/lexbor/html/interfaces/head_element.h +34 -0
  285. data/vendor/lexbor/source/lexbor/html/interfaces/heading_element.c +36 -0
  286. data/vendor/lexbor/source/lexbor/html/interfaces/heading_element.h +34 -0
  287. data/vendor/lexbor/source/lexbor/html/interfaces/hr_element.c +36 -0
  288. data/vendor/lexbor/source/lexbor/html/interfaces/hr_element.h +34 -0
  289. data/vendor/lexbor/source/lexbor/html/interfaces/html_element.c +36 -0
  290. data/vendor/lexbor/source/lexbor/html/interfaces/html_element.h +34 -0
  291. data/vendor/lexbor/source/lexbor/html/interfaces/iframe_element.c +36 -0
  292. data/vendor/lexbor/source/lexbor/html/interfaces/iframe_element.h +34 -0
  293. data/vendor/lexbor/source/lexbor/html/interfaces/image_element.c +36 -0
  294. data/vendor/lexbor/source/lexbor/html/interfaces/image_element.h +34 -0
  295. data/vendor/lexbor/source/lexbor/html/interfaces/input_element.c +36 -0
  296. data/vendor/lexbor/source/lexbor/html/interfaces/input_element.h +34 -0
  297. data/vendor/lexbor/source/lexbor/html/interfaces/label_element.c +36 -0
  298. data/vendor/lexbor/source/lexbor/html/interfaces/label_element.h +34 -0
  299. data/vendor/lexbor/source/lexbor/html/interfaces/legend_element.c +36 -0
  300. data/vendor/lexbor/source/lexbor/html/interfaces/legend_element.h +34 -0
  301. data/vendor/lexbor/source/lexbor/html/interfaces/li_element.c +36 -0
  302. data/vendor/lexbor/source/lexbor/html/interfaces/li_element.h +34 -0
  303. data/vendor/lexbor/source/lexbor/html/interfaces/link_element.c +36 -0
  304. data/vendor/lexbor/source/lexbor/html/interfaces/link_element.h +34 -0
  305. data/vendor/lexbor/source/lexbor/html/interfaces/map_element.c +36 -0
  306. data/vendor/lexbor/source/lexbor/html/interfaces/map_element.h +34 -0
  307. data/vendor/lexbor/source/lexbor/html/interfaces/marquee_element.c +36 -0
  308. data/vendor/lexbor/source/lexbor/html/interfaces/marquee_element.h +34 -0
  309. data/vendor/lexbor/source/lexbor/html/interfaces/media_element.c +36 -0
  310. data/vendor/lexbor/source/lexbor/html/interfaces/media_element.h +34 -0
  311. data/vendor/lexbor/source/lexbor/html/interfaces/menu_element.c +36 -0
  312. data/vendor/lexbor/source/lexbor/html/interfaces/menu_element.h +34 -0
  313. data/vendor/lexbor/source/lexbor/html/interfaces/meta_element.c +36 -0
  314. data/vendor/lexbor/source/lexbor/html/interfaces/meta_element.h +34 -0
  315. data/vendor/lexbor/source/lexbor/html/interfaces/meter_element.c +36 -0
  316. data/vendor/lexbor/source/lexbor/html/interfaces/meter_element.h +34 -0
  317. data/vendor/lexbor/source/lexbor/html/interfaces/mod_element.c +36 -0
  318. data/vendor/lexbor/source/lexbor/html/interfaces/mod_element.h +34 -0
  319. data/vendor/lexbor/source/lexbor/html/interfaces/o_list_element.c +36 -0
  320. data/vendor/lexbor/source/lexbor/html/interfaces/o_list_element.h +34 -0
  321. data/vendor/lexbor/source/lexbor/html/interfaces/object_element.c +36 -0
  322. data/vendor/lexbor/source/lexbor/html/interfaces/object_element.h +34 -0
  323. data/vendor/lexbor/source/lexbor/html/interfaces/opt_group_element.c +36 -0
  324. data/vendor/lexbor/source/lexbor/html/interfaces/opt_group_element.h +34 -0
  325. data/vendor/lexbor/source/lexbor/html/interfaces/option_element.c +36 -0
  326. data/vendor/lexbor/source/lexbor/html/interfaces/option_element.h +34 -0
  327. data/vendor/lexbor/source/lexbor/html/interfaces/output_element.c +36 -0
  328. data/vendor/lexbor/source/lexbor/html/interfaces/output_element.h +34 -0
  329. data/vendor/lexbor/source/lexbor/html/interfaces/paragraph_element.c +36 -0
  330. data/vendor/lexbor/source/lexbor/html/interfaces/paragraph_element.h +34 -0
  331. data/vendor/lexbor/source/lexbor/html/interfaces/param_element.c +36 -0
  332. data/vendor/lexbor/source/lexbor/html/interfaces/param_element.h +34 -0
  333. data/vendor/lexbor/source/lexbor/html/interfaces/picture_element.c +36 -0
  334. data/vendor/lexbor/source/lexbor/html/interfaces/picture_element.h +34 -0
  335. data/vendor/lexbor/source/lexbor/html/interfaces/pre_element.c +36 -0
  336. data/vendor/lexbor/source/lexbor/html/interfaces/pre_element.h +34 -0
  337. data/vendor/lexbor/source/lexbor/html/interfaces/progress_element.c +36 -0
  338. data/vendor/lexbor/source/lexbor/html/interfaces/progress_element.h +34 -0
  339. data/vendor/lexbor/source/lexbor/html/interfaces/quote_element.c +36 -0
  340. data/vendor/lexbor/source/lexbor/html/interfaces/quote_element.h +34 -0
  341. data/vendor/lexbor/source/lexbor/html/interfaces/script_element.c +36 -0
  342. data/vendor/lexbor/source/lexbor/html/interfaces/script_element.h +34 -0
  343. data/vendor/lexbor/source/lexbor/html/interfaces/select_element.c +36 -0
  344. data/vendor/lexbor/source/lexbor/html/interfaces/select_element.h +34 -0
  345. data/vendor/lexbor/source/lexbor/html/interfaces/slot_element.c +36 -0
  346. data/vendor/lexbor/source/lexbor/html/interfaces/slot_element.h +34 -0
  347. data/vendor/lexbor/source/lexbor/html/interfaces/source_element.c +36 -0
  348. data/vendor/lexbor/source/lexbor/html/interfaces/source_element.h +34 -0
  349. data/vendor/lexbor/source/lexbor/html/interfaces/span_element.c +36 -0
  350. data/vendor/lexbor/source/lexbor/html/interfaces/span_element.h +34 -0
  351. data/vendor/lexbor/source/lexbor/html/interfaces/style_element.c +36 -0
  352. data/vendor/lexbor/source/lexbor/html/interfaces/style_element.h +34 -0
  353. data/vendor/lexbor/source/lexbor/html/interfaces/table_caption_element.c +36 -0
  354. data/vendor/lexbor/source/lexbor/html/interfaces/table_caption_element.h +34 -0
  355. data/vendor/lexbor/source/lexbor/html/interfaces/table_cell_element.c +36 -0
  356. data/vendor/lexbor/source/lexbor/html/interfaces/table_cell_element.h +34 -0
  357. data/vendor/lexbor/source/lexbor/html/interfaces/table_col_element.c +36 -0
  358. data/vendor/lexbor/source/lexbor/html/interfaces/table_col_element.h +34 -0
  359. data/vendor/lexbor/source/lexbor/html/interfaces/table_element.c +36 -0
  360. data/vendor/lexbor/source/lexbor/html/interfaces/table_element.h +34 -0
  361. data/vendor/lexbor/source/lexbor/html/interfaces/table_row_element.c +36 -0
  362. data/vendor/lexbor/source/lexbor/html/interfaces/table_row_element.h +34 -0
  363. data/vendor/lexbor/source/lexbor/html/interfaces/table_section_element.c +36 -0
  364. data/vendor/lexbor/source/lexbor/html/interfaces/table_section_element.h +34 -0
  365. data/vendor/lexbor/source/lexbor/html/interfaces/template_element.c +46 -0
  366. data/vendor/lexbor/source/lexbor/html/interfaces/template_element.h +38 -0
  367. data/vendor/lexbor/source/lexbor/html/interfaces/text_area_element.c +36 -0
  368. data/vendor/lexbor/source/lexbor/html/interfaces/text_area_element.h +34 -0
  369. data/vendor/lexbor/source/lexbor/html/interfaces/time_element.c +36 -0
  370. data/vendor/lexbor/source/lexbor/html/interfaces/time_element.h +34 -0
  371. data/vendor/lexbor/source/lexbor/html/interfaces/title_element.c +133 -0
  372. data/vendor/lexbor/source/lexbor/html/interfaces/title_element.h +42 -0
  373. data/vendor/lexbor/source/lexbor/html/interfaces/track_element.c +36 -0
  374. data/vendor/lexbor/source/lexbor/html/interfaces/track_element.h +34 -0
  375. data/vendor/lexbor/source/lexbor/html/interfaces/u_list_element.c +36 -0
  376. data/vendor/lexbor/source/lexbor/html/interfaces/u_list_element.h +34 -0
  377. data/vendor/lexbor/source/lexbor/html/interfaces/unknown_element.c +36 -0
  378. data/vendor/lexbor/source/lexbor/html/interfaces/unknown_element.h +34 -0
  379. data/vendor/lexbor/source/lexbor/html/interfaces/video_element.c +36 -0
  380. data/vendor/lexbor/source/lexbor/html/interfaces/video_element.h +34 -0
  381. data/vendor/lexbor/source/lexbor/html/interfaces/window.c +36 -0
  382. data/vendor/lexbor/source/lexbor/html/interfaces/window.h +34 -0
  383. data/vendor/lexbor/source/lexbor/html/node.c +14 -0
  384. data/vendor/lexbor/source/lexbor/html/node.h +67 -0
  385. data/vendor/lexbor/source/lexbor/html/parser.c +469 -0
  386. data/vendor/lexbor/source/lexbor/html/parser.h +170 -0
  387. data/vendor/lexbor/source/lexbor/html/serialize.c +1510 -0
  388. data/vendor/lexbor/source/lexbor/html/serialize.h +93 -0
  389. data/vendor/lexbor/source/lexbor/html/tag.h +103 -0
  390. data/vendor/lexbor/source/lexbor/html/tag_res.h +2262 -0
  391. data/vendor/lexbor/source/lexbor/html/token.c +386 -0
  392. data/vendor/lexbor/source/lexbor/html/token.h +130 -0
  393. data/vendor/lexbor/source/lexbor/html/token_attr.c +44 -0
  394. data/vendor/lexbor/source/lexbor/html/token_attr.h +67 -0
  395. data/vendor/lexbor/source/lexbor/html/tokenizer/error.c +28 -0
  396. data/vendor/lexbor/source/lexbor/html/tokenizer/error.h +141 -0
  397. data/vendor/lexbor/source/lexbor/html/tokenizer/res.h +4956 -0
  398. data/vendor/lexbor/source/lexbor/html/tokenizer/state.c +2171 -0
  399. data/vendor/lexbor/source/lexbor/html/tokenizer/state.h +225 -0
  400. data/vendor/lexbor/source/lexbor/html/tokenizer/state_comment.c +489 -0
  401. data/vendor/lexbor/source/lexbor/html/tokenizer/state_comment.h +27 -0
  402. data/vendor/lexbor/source/lexbor/html/tokenizer/state_doctype.c +1654 -0
  403. data/vendor/lexbor/source/lexbor/html/tokenizer/state_doctype.h +27 -0
  404. data/vendor/lexbor/source/lexbor/html/tokenizer/state_rawtext.c +303 -0
  405. data/vendor/lexbor/source/lexbor/html/tokenizer/state_rawtext.h +32 -0
  406. data/vendor/lexbor/source/lexbor/html/tokenizer/state_rcdata.c +311 -0
  407. data/vendor/lexbor/source/lexbor/html/tokenizer/state_rcdata.h +32 -0
  408. data/vendor/lexbor/source/lexbor/html/tokenizer/state_script.c +1209 -0
  409. data/vendor/lexbor/source/lexbor/html/tokenizer/state_script.h +32 -0
  410. data/vendor/lexbor/source/lexbor/html/tokenizer.c +499 -0
  411. data/vendor/lexbor/source/lexbor/html/tokenizer.h +343 -0
  412. data/vendor/lexbor/source/lexbor/html/tree/active_formatting.c +241 -0
  413. data/vendor/lexbor/source/lexbor/html/tree/active_formatting.h +117 -0
  414. data/vendor/lexbor/source/lexbor/html/tree/error.c +26 -0
  415. data/vendor/lexbor/source/lexbor/html/tree/error.h +114 -0
  416. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_after_body.c +62 -0
  417. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_after_frameset.c +63 -0
  418. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_body.c +82 -0
  419. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_frameset.c +88 -0
  420. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_head.c +222 -0
  421. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/before_head.c +144 -0
  422. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/before_html.c +166 -0
  423. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/foreign_content.c +358 -0
  424. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_body.c +1974 -0
  425. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_caption.c +158 -0
  426. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_cell.c +187 -0
  427. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_column_group.c +194 -0
  428. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_frameset.c +149 -0
  429. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_head.c +374 -0
  430. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_head_noscript.c +121 -0
  431. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_row.c +211 -0
  432. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_select.c +341 -0
  433. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_select_in_table.c +115 -0
  434. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_table.c +451 -0
  435. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_table_body.c +208 -0
  436. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_table_text.c +127 -0
  437. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_template.c +189 -0
  438. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/initial.c +411 -0
  439. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/text.c +61 -0
  440. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode.h +135 -0
  441. data/vendor/lexbor/source/lexbor/html/tree/open_elements.c +251 -0
  442. data/vendor/lexbor/source/lexbor/html/tree/open_elements.h +105 -0
  443. data/vendor/lexbor/source/lexbor/html/tree/template_insertion.c +10 -0
  444. data/vendor/lexbor/source/lexbor/html/tree/template_insertion.h +100 -0
  445. data/vendor/lexbor/source/lexbor/html/tree.c +1726 -0
  446. data/vendor/lexbor/source/lexbor/html/tree.h +431 -0
  447. data/vendor/lexbor/source/lexbor/html/tree_res.h +111 -0
  448. data/vendor/lexbor/source/lexbor/ns/base.h +32 -0
  449. data/vendor/lexbor/source/lexbor/ns/config.cmake +2 -0
  450. data/vendor/lexbor/source/lexbor/ns/const.h +37 -0
  451. data/vendor/lexbor/source/lexbor/ns/ns.c +154 -0
  452. data/vendor/lexbor/source/lexbor/ns/ns.h +66 -0
  453. data/vendor/lexbor/source/lexbor/ns/res.h +97 -0
  454. data/vendor/lexbor/source/lexbor/ports/posix/config.cmake +11 -0
  455. data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/fs.c +236 -0
  456. data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/memory.c +33 -0
  457. data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/perf.c +158 -0
  458. data/vendor/lexbor/source/lexbor/ports/windows_nt/config.cmake +18 -0
  459. data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/fs.c +239 -0
  460. data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/memory.c +33 -0
  461. data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/perf.c +81 -0
  462. data/vendor/lexbor/source/lexbor/selectors/base.h +30 -0
  463. data/vendor/lexbor/source/lexbor/selectors/config.cmake +2 -0
  464. data/vendor/lexbor/source/lexbor/selectors/selectors.c +1591 -0
  465. data/vendor/lexbor/source/lexbor/selectors/selectors.h +71 -0
  466. data/vendor/lexbor/source/lexbor/tag/base.h +32 -0
  467. data/vendor/lexbor/source/lexbor/tag/config.cmake +2 -0
  468. data/vendor/lexbor/source/lexbor/tag/const.h +225 -0
  469. data/vendor/lexbor/source/lexbor/tag/res.h +562 -0
  470. data/vendor/lexbor/source/lexbor/tag/tag.c +144 -0
  471. data/vendor/lexbor/source/lexbor/tag/tag.h +123 -0
  472. data/vendor/lexbor/source/lexbor/utils/base.h +32 -0
  473. data/vendor/lexbor/source/lexbor/utils/config.cmake +2 -0
  474. data/vendor/lexbor/source/lexbor/utils/http.c +534 -0
  475. data/vendor/lexbor/source/lexbor/utils/http.h +90 -0
  476. data/vendor/lexbor/source/lexbor/utils/utils.h +15 -0
  477. data/vendor/lexbor/source/lexbor/utils/warc.c +817 -0
  478. data/vendor/lexbor/source/lexbor/utils/warc.h +126 -0
  479. data/vendor/lexbor/utils/lexbor/css/selectors/pseudo.py +231 -0
  480. data/vendor/lexbor/utils/lexbor/css/selectors/tmp/const.h +21 -0
  481. data/vendor/lexbor/utils/lexbor/css/selectors/tmp/res.h +26 -0
  482. data/vendor/lexbor/utils/lexbor/css/syntax/definitions.py +49 -0
  483. data/vendor/lexbor/utils/lexbor/css/syntax/token_res.py +54 -0
  484. data/vendor/lexbor/utils/lexbor/css/syntax/tokenizer_code_map.py +36 -0
  485. data/vendor/lexbor/version +1 -0
  486. metadata +542 -0
@@ -0,0 +1,2603 @@
1
+ /*
2
+ * Copyright (C) 2018-2020 Alexander Borisov
3
+ *
4
+ * Author: Alexander Borisov <borisov@lexbor.com>
5
+ */
6
+
7
+ #include <string.h>
8
+ #include <float.h>
9
+
10
+ #include "lexbor/core/utils.h"
11
+ #include "lexbor/core/strtod.h"
12
+
13
+ #include "lexbor/css/syntax/state.h"
14
+ #include "lexbor/css/syntax/tokenizer/error.h"
15
+
16
+ #define LXB_CSS_SYNTAX_RES_NAME_MAP
17
+ #include "lexbor/css/syntax/res.h"
18
+
19
+ #define LEXBOR_STR_RES_MAP_HEX
20
+ #define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER
21
+ #include "lexbor/core/str_res.h"
22
+
23
+
24
+ #define LXB_CSS_SYNTAX_NEXT_CHUNK(_tkz, _status, _data, _end) \
25
+ do { \
26
+ _status = lxb_css_syntax_tokenizer_next_chunk(_tkz, &_data, &_end); \
27
+ if (_status != LXB_STATUS_OK) { \
28
+ return NULL; \
29
+ } \
30
+ } \
31
+ while (0)
32
+
33
+
34
+ #define LXB_CSS_SYNTAX_STR_APPEND_LEN(_tkz, _status, _begin, _length) \
35
+ do { \
36
+ _status = lxb_css_syntax_string_append(_tkz, _begin, _length); \
37
+ if (_status != LXB_STATUS_OK) { \
38
+ return NULL; \
39
+ } \
40
+ } \
41
+ while (0)
42
+
43
+ #define LXB_CSS_SYNTAX_STR_APPEND(_tkz, _status, _begin, _end) \
44
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(_tkz, _status, _begin, (_end - _begin))
45
+
46
+
47
+ lxb_status_t
48
+ lxb_css_syntax_tokenizer_next_chunk(lxb_css_syntax_tokenizer_t *tkz,
49
+ const lxb_char_t **data, const lxb_char_t **end);
50
+
51
+ lxb_status_t
52
+ lxb_css_syntax_state_tokens_realloc(lxb_css_syntax_tokenizer_t *tkz);
53
+
54
+
55
+ static const lxb_char_t *
56
+ lxb_css_syntax_state_consume_numeric(lxb_css_syntax_tokenizer_t *tkz,
57
+ lxb_css_syntax_token_t *token,
58
+ const lxb_char_t *data,
59
+ const lxb_char_t *end);
60
+
61
+ static const lxb_char_t *
62
+ lxb_css_syntax_state_decimal(lxb_css_syntax_tokenizer_t *tkz,
63
+ lxb_css_syntax_token_t *token,
64
+ lxb_char_t *buf_start, lxb_char_t *buf_end,
65
+ const lxb_char_t *data, const lxb_char_t *end);
66
+
67
+ static const lxb_char_t *
68
+ lxb_css_syntax_state_consume_numeric_name_start(lxb_css_syntax_tokenizer_t *tkz,
69
+ lxb_css_syntax_token_t *token,
70
+ const lxb_char_t *data,
71
+ const lxb_char_t *end);
72
+
73
+ static const lxb_char_t *
74
+ lxb_css_syntax_state_consume_ident(lxb_css_syntax_tokenizer_t *tkz,
75
+ lxb_css_syntax_token_t *token,
76
+ const lxb_char_t *data, const lxb_char_t *end);
77
+
78
+ static const lxb_char_t *
79
+ lxb_css_syntax_state_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
80
+ const lxb_char_t *data, const lxb_char_t *end);
81
+
82
+ static const lxb_char_t *
83
+ lxb_css_syntax_state_bad_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
84
+ const lxb_char_t *data, const lxb_char_t *end);
85
+
86
+ static const lxb_char_t *
87
+ lxb_css_syntax_state_escaped(lxb_css_syntax_tokenizer_t *tkz,
88
+ const lxb_char_t *data, const lxb_char_t **end);
89
+
90
+ static const lxb_char_t *
91
+ lxb_css_syntax_state_escaped_string(lxb_css_syntax_tokenizer_t *tkz,
92
+ const lxb_char_t *data, const lxb_char_t **end);
93
+
94
+
95
+ lxb_inline lxb_status_t
96
+ lxb_css_syntax_string_realloc(lxb_css_syntax_tokenizer_t *tkz, size_t upto)
97
+ {
98
+ size_t len = tkz->pos - tkz->start;
99
+ size_t size = (tkz->end - tkz->start) + upto;
100
+
101
+ lxb_char_t *tmp = lexbor_realloc(tkz->start, size);
102
+ if (tmp == NULL) {
103
+ tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
104
+ return tkz->status;
105
+ }
106
+
107
+ tkz->start = tmp;
108
+ tkz->pos = tmp + len;
109
+ tkz->end = tmp + size;
110
+
111
+ return LXB_STATUS_OK;
112
+ }
113
+
114
+ lxb_inline lxb_status_t
115
+ lxb_css_syntax_string_append(lxb_css_syntax_tokenizer_t *tkz,
116
+ const lxb_char_t *data, size_t length)
117
+ {
118
+ if ((size_t) (tkz->end - tkz->pos) <= length) {
119
+ if (lxb_css_syntax_string_realloc(tkz, length + 1024) != LXB_STATUS_OK) {
120
+ return tkz->status;
121
+ }
122
+ }
123
+
124
+ memcpy(tkz->pos, data, length);
125
+
126
+ tkz->pos += length;
127
+
128
+ return LXB_STATUS_OK;
129
+ }
130
+
131
+ lxb_inline lxb_status_t
132
+ lxb_css_syntax_state_string_term(lxb_css_syntax_tokenizer_t *tkz)
133
+ {
134
+ if (tkz->pos >= tkz->end) {
135
+ if (lxb_css_syntax_string_realloc(tkz, 1024) != LXB_STATUS_OK) {
136
+ return tkz->status;
137
+ }
138
+ }
139
+
140
+ *tkz->pos = 0x00;
141
+
142
+ return LXB_STATUS_OK;
143
+ }
144
+
145
+
146
+ lxb_inline const lxb_char_t *
147
+ lxb_css_syntax_state_string_set(lxb_css_syntax_tokenizer_t *tkz,
148
+ lxb_css_syntax_token_t *token,
149
+ const lxb_char_t *data)
150
+ {
151
+ if(lxb_css_syntax_state_string_term(tkz) != LXB_STATUS_OK) {
152
+ return NULL;
153
+ }
154
+
155
+ lxb_css_syntax_token_string(token)->data = tkz->start;
156
+ lxb_css_syntax_token_string(token)->length = tkz->pos - tkz->start;
157
+
158
+ tkz->pos = tkz->start;
159
+
160
+ return data;
161
+ }
162
+
163
+ lxb_inline const lxb_char_t *
164
+ lxb_css_syntax_state_dimension_set(lxb_css_syntax_tokenizer_t *tkz,
165
+ lxb_css_syntax_token_t *token,
166
+ const lxb_char_t *data)
167
+ {
168
+ if(lxb_css_syntax_state_string_term(tkz) != LXB_STATUS_OK) {
169
+ return NULL;
170
+ }
171
+
172
+ lxb_css_syntax_token_dimension_string(token)->data = tkz->start;
173
+ lxb_css_syntax_token_dimension_string(token)->length = tkz->pos - tkz->start;
174
+
175
+ tkz->pos = tkz->start;
176
+
177
+ return data;
178
+ }
179
+
180
+ static lxb_css_syntax_token_t *
181
+ lxb_css_syntax_tokenizer_token_append(lxb_css_syntax_tokenizer_t *tkz)
182
+ {
183
+ if (tkz->prepared == NULL) {
184
+ if (tkz->last >= tkz->tokens_end) {
185
+ tkz->status = lxb_css_syntax_state_tokens_realloc(tkz);
186
+ if (tkz->status != LXB_STATUS_OK) {
187
+ return NULL;
188
+ }
189
+ }
190
+
191
+ tkz->prepared = tkz->last;
192
+ tkz->prepared->cloned = false;
193
+
194
+ return tkz->last++;
195
+ }
196
+
197
+ lxb_css_syntax_token_t *first;
198
+ size_t length = tkz->last - tkz->prepared;
199
+
200
+ if ((tkz->last + length) >= tkz->tokens_end) {
201
+ tkz->status = lxb_css_syntax_state_tokens_realloc(tkz);
202
+ if (tkz->status != LXB_STATUS_OK) {
203
+ return NULL;
204
+ }
205
+ }
206
+
207
+ first = tkz->prepared;
208
+
209
+ memmove(&first[1], first, length * sizeof(lxb_css_syntax_token_t));
210
+
211
+ tkz->last++;
212
+ first->cloned = false;
213
+
214
+ return first;
215
+ }
216
+
217
+ lxb_status_t
218
+ lxb_css_syntax_state_tokens_realloc(lxb_css_syntax_tokenizer_t *tkz)
219
+ {
220
+ lxb_css_syntax_token_t *tokens;
221
+
222
+ static const unsigned length = 64;
223
+ size_t new_length = (tkz->tokens_end - tkz->tokens_begin) + length;
224
+
225
+ tokens = lexbor_calloc(new_length, sizeof(lxb_css_syntax_token_t));
226
+ if (tokens == NULL) {
227
+ return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
228
+ }
229
+
230
+ memcpy(tokens, tkz->token, (tkz->last - tkz->token)
231
+ * sizeof(lxb_css_syntax_token_t));
232
+
233
+ if (tkz->prepared != NULL) {
234
+ tkz->prepared = tokens + (tkz->prepared - tkz->token);
235
+ }
236
+
237
+ tkz->token = tokens;
238
+ tkz->last = tokens + (tkz->last - tkz->tokens_begin);
239
+
240
+ lexbor_free(tkz->tokens_begin);
241
+
242
+ tkz->tokens_begin = tokens;
243
+ tkz->tokens_end = tokens + new_length;
244
+
245
+ return LXB_STATUS_OK;
246
+ }
247
+
248
+ /*
249
+ * Delim
250
+ */
251
+ lxb_inline lxb_css_syntax_token_t *
252
+ lxb_css_syntax_list_append_delim(lxb_css_syntax_tokenizer_t *tkz,
253
+ const lxb_char_t *data,
254
+ const lxb_char_t *end, lxb_char_t ch)
255
+ {
256
+ lxb_css_syntax_token_t *delim;
257
+
258
+ delim = lxb_css_syntax_tokenizer_token_append(tkz);
259
+ if (delim == NULL) {
260
+ return NULL;
261
+ }
262
+
263
+ delim->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
264
+
265
+ lxb_css_syntax_token_base(delim)->begin = data;
266
+ lxb_css_syntax_token_base(delim)->end = end;
267
+ lxb_css_syntax_token_delim(delim)->character = ch;
268
+
269
+ return delim;
270
+ }
271
+
272
+ lxb_inline void
273
+ lxb_css_syntax_state_delim_set(lxb_css_syntax_token_t *token, const lxb_char_t *begin,
274
+ const lxb_char_t *end, lxb_char_t ch)
275
+ {
276
+ lxb_css_syntax_token_delim(token)->character = ch;
277
+ lxb_css_syntax_token_base(token)->begin = begin;
278
+ lxb_css_syntax_token_base(token)->end = end;
279
+
280
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
281
+ }
282
+
283
+ const lxb_char_t *
284
+ lxb_css_syntax_state_delim(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
285
+ const lxb_char_t *data, const lxb_char_t *end)
286
+ {
287
+ lxb_css_syntax_state_delim_set(token, data, data + 1, *data);
288
+
289
+ return data + 1;
290
+ }
291
+
292
+ /*
293
+ * Comment
294
+ */
295
+ const lxb_char_t *
296
+ lxb_css_syntax_state_comment(lxb_css_syntax_tokenizer_t *tkz,
297
+ lxb_css_syntax_token_t *token,
298
+ const lxb_char_t *data, const lxb_char_t *end)
299
+ {
300
+ lxb_status_t status;
301
+ const lxb_char_t *begin;
302
+
303
+ lxb_css_syntax_token_base(token)->begin = data;
304
+
305
+ /* Skip forward slash (/) */
306
+ data++;
307
+
308
+ if (data >= end) {
309
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
310
+ if (data >= end) {
311
+ goto delim;
312
+ }
313
+ }
314
+
315
+ /* U+002A ASTERISK (*) */
316
+ if (*data != 0x2A) {
317
+ goto delim;
318
+ }
319
+
320
+ begin = data + 1;
321
+
322
+ do {
323
+ data++;
324
+
325
+ if (data >= end) {
326
+ if (begin < data) {
327
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
328
+ }
329
+
330
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
331
+ if (data >= end) {
332
+ goto error;
333
+ }
334
+
335
+ begin = data;
336
+ }
337
+
338
+ switch (*data) {
339
+ case 0x00:
340
+ if (begin < data) {
341
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
342
+ }
343
+
344
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
345
+ lexbor_str_res_ansi_replacement_character,
346
+ sizeof(lexbor_str_res_ansi_replacement_character) - 1);
347
+ begin = data + 1;
348
+ break;
349
+
350
+ case 0x0D:
351
+ data++;
352
+
353
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
354
+
355
+ tkz->pos[-1] = '\n';
356
+
357
+ if (data >= end) {
358
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
359
+ if (data >= end) {
360
+ goto error;
361
+ }
362
+ }
363
+
364
+ if (*data != 0x0A) {
365
+ data--;
366
+ }
367
+
368
+ begin = data + 1;
369
+ break;
370
+
371
+ case 0x0C:
372
+ if (begin < data) {
373
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
374
+ }
375
+
376
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
377
+ (lxb_char_t *) "\n", 1);
378
+ begin = data + 1;
379
+ break;
380
+
381
+ /* U+002A ASTERISK (*) */
382
+ case 0x2A:
383
+ data++;
384
+
385
+ if (data >= end) {
386
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
387
+
388
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
389
+ if (data >= end) {
390
+ goto error;
391
+ }
392
+
393
+ if (*data == 0x2F) {
394
+ tkz->pos--;
395
+ *tkz->pos = 0x00;
396
+
397
+ data++;
398
+
399
+ goto done;
400
+ }
401
+
402
+ begin = data;
403
+ }
404
+
405
+ /* U+002F Forward slash (/) */
406
+ if (*data == 0x2F) {
407
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, (data - 1));
408
+
409
+ data++;
410
+
411
+ goto done;
412
+ }
413
+
414
+ data--;
415
+ break;
416
+ }
417
+ }
418
+ while (true);
419
+
420
+ done:
421
+
422
+ token->type = LXB_CSS_SYNTAX_TOKEN_COMMENT;
423
+
424
+ lxb_css_syntax_token_base(token)->end = data;
425
+ return lxb_css_syntax_state_string_set(tkz, token, data);
426
+
427
+ delim:
428
+
429
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
430
+
431
+ lxb_css_syntax_token_base(token)->end = lxb_css_syntax_token_base(token)->begin + 1;
432
+ lxb_css_syntax_token_delim(token)->character = '/';
433
+
434
+ return data;
435
+
436
+ error:
437
+
438
+ token->type = LXB_CSS_SYNTAX_TOKEN_COMMENT;
439
+
440
+ lxb_css_syntax_token_base(token)->end = data;
441
+
442
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, NULL,
443
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINCO);
444
+
445
+ return lxb_css_syntax_state_string_set(tkz, token, data);
446
+ }
447
+
448
+ /*
449
+ * Whitespace
450
+ */
451
+ const lxb_char_t *
452
+ lxb_css_syntax_state_whitespace(lxb_css_syntax_tokenizer_t *tkz,
453
+ lxb_css_syntax_token_t *token,
454
+ const lxb_char_t *data, const lxb_char_t *end)
455
+ {
456
+ lxb_status_t status;
457
+ const lxb_char_t *begin;
458
+
459
+ token->type = LXB_CSS_SYNTAX_TOKEN_WHITESPACE;
460
+
461
+ lxb_css_syntax_token_base(token)->begin = data;
462
+
463
+ begin = data;
464
+
465
+ do {
466
+ switch (*data) {
467
+ case 0x0D:
468
+ data++;
469
+
470
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
471
+
472
+ tkz->pos[-1] = '\n';
473
+
474
+ if (data >= end) {
475
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
476
+ if (data >= end) {
477
+ goto done;
478
+ }
479
+ }
480
+
481
+ if (*data != 0x0A) {
482
+ data--;
483
+ }
484
+
485
+ begin = data + 1;
486
+ break;
487
+
488
+ case 0x0C:
489
+ if (begin < data) {
490
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
491
+ }
492
+
493
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
494
+ (const lxb_char_t *) "\n", 1);
495
+ begin = data + 1;
496
+ break;
497
+
498
+ case 0x09:
499
+ case 0x20:
500
+ case 0x0A:
501
+ break;
502
+
503
+ default:
504
+ if (begin < data) {
505
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
506
+ }
507
+
508
+ lxb_css_syntax_token_base(token)->end = data;
509
+
510
+ return lxb_css_syntax_state_string_set(tkz, token, data);
511
+ }
512
+
513
+ data++;
514
+
515
+ if (data >= end) {
516
+ if (begin < data) {
517
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
518
+ }
519
+
520
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
521
+ if (data >= end) {
522
+ break;
523
+ }
524
+
525
+ begin = data;
526
+ }
527
+ }
528
+ while (true);
529
+
530
+ done:
531
+
532
+ lxb_css_syntax_token_base(token)->end = data;
533
+
534
+ return lxb_css_syntax_state_string_set(tkz, token, data);
535
+ }
536
+
537
+ /*
538
+ * String token for U+0022 Quotation Mark (") and U+0027 Apostrophe (')
539
+ */
540
+ const lxb_char_t *
541
+ lxb_css_syntax_state_string(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
542
+ const lxb_char_t *data, const lxb_char_t *end)
543
+ {
544
+ lxb_char_t mark;
545
+ lxb_status_t status;
546
+ const lxb_char_t *begin;
547
+
548
+ lxb_css_syntax_token_base(token)->begin = data;
549
+
550
+ mark = *data++;
551
+ begin = data;
552
+
553
+ for (;; data++) {
554
+ if (data >= end) {
555
+ if (begin < data) {
556
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
557
+ }
558
+
559
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
560
+ if (data >= end) {
561
+ goto error;
562
+ }
563
+
564
+ begin = data;
565
+ }
566
+
567
+ switch (*data) {
568
+ case 0x00:
569
+ if (begin < data) {
570
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
571
+ }
572
+
573
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
574
+ lexbor_str_res_ansi_replacement_character,
575
+ sizeof(lexbor_str_res_ansi_replacement_character) - 1);
576
+ begin = data + 1;
577
+ break;
578
+
579
+ /*
580
+ * U+000A LINE FEED
581
+ * U+000D CARRIAGE RETURN
582
+ * U+000C FORM FEED
583
+ */
584
+ case 0x0A:
585
+ case 0x0D:
586
+ case 0x0C:
587
+ if (begin < data) {
588
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
589
+ }
590
+
591
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
592
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_NEINST);
593
+
594
+ token->type = LXB_CSS_SYNTAX_TOKEN_BAD_STRING;
595
+
596
+ lxb_css_syntax_token_base(token)->end = data;
597
+
598
+ return lxb_css_syntax_state_string_set(tkz, token, data);
599
+
600
+ /* U+005C REVERSE SOLIDUS (\) */
601
+ case 0x5C:
602
+ if (begin < data) {
603
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
604
+ }
605
+
606
+ data++;
607
+
608
+ if (data >= end) {
609
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
610
+ if (data >= end) {
611
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
612
+ (const lxb_char_t *) "\\", 1);
613
+ goto error;
614
+ }
615
+ }
616
+
617
+ data = lxb_css_syntax_state_escaped_string(tkz, data, &end);
618
+ if (data == NULL) {
619
+ return NULL;
620
+ }
621
+
622
+ begin = data;
623
+
624
+ data--;
625
+ break;
626
+
627
+ default:
628
+ /* '"' or '\'' */
629
+ if (*data == mark) {
630
+ if (begin < data) {
631
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
632
+ }
633
+
634
+ token->type = LXB_CSS_SYNTAX_TOKEN_STRING;
635
+
636
+ lxb_css_syntax_token_base(token)->end = ++data;
637
+
638
+ return lxb_css_syntax_state_string_set(tkz, token, data);
639
+ }
640
+
641
+ break;
642
+ }
643
+ }
644
+
645
+ return data;
646
+
647
+ error:
648
+
649
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, NULL,
650
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINST);
651
+
652
+ token->type = LXB_CSS_SYNTAX_TOKEN_STRING;
653
+
654
+ lxb_css_syntax_token_base(token)->end = data;
655
+
656
+ return lxb_css_syntax_state_string_set(tkz, token, data);
657
+ }
658
+
659
+ /*
660
+ * U+0023 NUMBER SIGN (#)
661
+ */
662
+ const lxb_char_t *
663
+ lxb_css_syntax_state_hash(lxb_css_syntax_tokenizer_t *tkz,
664
+ lxb_css_syntax_token_t *token, const lxb_char_t *data,
665
+ const lxb_char_t *end)
666
+ {
667
+ lxb_char_t ch;
668
+ lxb_status_t status;
669
+ const lxb_char_t *begin;
670
+ lxb_css_syntax_token_t *delim;
671
+
672
+ lxb_css_syntax_token_base(token)->begin = data++;
673
+
674
+ if (data >= end) {
675
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
676
+ if (data >= end) {
677
+ goto delim;
678
+ }
679
+ }
680
+
681
+ if (lxb_css_syntax_res_name_map[*data] == 0x00) {
682
+ if (*data == 0x00) {
683
+ goto hash;
684
+ }
685
+
686
+ /* U+005C REVERSE SOLIDUS (\) */
687
+ if (*data != 0x5C) {
688
+ goto delim;
689
+ }
690
+
691
+ begin = data++;
692
+
693
+ if (data >= end) {
694
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
695
+ if (data >= end) {
696
+ goto push_delim;
697
+ }
698
+ }
699
+
700
+ ch = *data;
701
+
702
+ if (ch == 0x0A || ch == 0x0C || ch == 0x0D) {
703
+ goto push_delim;
704
+ }
705
+
706
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
707
+ if (data == NULL) {
708
+ return NULL;
709
+ }
710
+ }
711
+
712
+ hash:
713
+
714
+ token->type = LXB_CSS_SYNTAX_TOKEN_HASH;
715
+
716
+ return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
717
+
718
+ push_delim:
719
+
720
+ delim = lxb_css_syntax_list_append_delim(tkz, begin, begin + 1, '\\');
721
+ if (delim == NULL) {
722
+ return NULL;
723
+ }
724
+
725
+ delim:
726
+
727
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
728
+
729
+ lxb_css_syntax_token_base(token)->end = lxb_css_syntax_token_base(token)->begin + 1;
730
+ lxb_css_syntax_token_delim(token)->character = '#';
731
+
732
+ return data;
733
+ }
734
+
735
+ /*
736
+ * U+0028 LEFT PARENTHESIS (()
737
+ */
738
+ const lxb_char_t *
739
+ lxb_css_syntax_state_lparenthesis(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
740
+ const lxb_char_t *data, const lxb_char_t *end)
741
+ {
742
+ token->type = LXB_CSS_SYNTAX_TOKEN_L_PARENTHESIS;
743
+
744
+ lxb_css_syntax_token_base(token)->begin = data;
745
+ lxb_css_syntax_token_base(token)->end = ++data;
746
+
747
+ return data;
748
+ }
749
+
750
+ /*
751
+ * U+0029 RIGHT PARENTHESIS ())
752
+ */
753
+ const lxb_char_t *
754
+ lxb_css_syntax_state_rparenthesis(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
755
+ const lxb_char_t *data, const lxb_char_t *end)
756
+ {
757
+ token->type = LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS;
758
+
759
+ lxb_css_syntax_token_base(token)->begin = data;
760
+ lxb_css_syntax_token_base(token)->end = ++data;
761
+
762
+ return data;
763
+ }
764
+
765
+ /*
766
+ * U+002B PLUS SIGN (+)
767
+ */
768
+ const lxb_char_t *
769
+ lxb_css_syntax_state_plus(lxb_css_syntax_tokenizer_t *tkz,
770
+ lxb_css_syntax_token_t *token,
771
+ const lxb_char_t *data, const lxb_char_t *end)
772
+ {
773
+ lxb_status_t status;
774
+
775
+ lxb_css_syntax_token_base(token)->begin = data++;
776
+
777
+ if (data >= end) {
778
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
779
+ if (data >= end) {
780
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
781
+
782
+ lxb_css_syntax_token_base(token)->end = data;
783
+ lxb_css_syntax_token_delim(token)->character = '+';
784
+
785
+ return data;
786
+ }
787
+ }
788
+
789
+ return lxb_css_syntax_state_plus_process(tkz, token, data, end);
790
+ }
791
+
792
+ const lxb_char_t *
793
+ lxb_css_syntax_state_plus_process(lxb_css_syntax_tokenizer_t *tkz,
794
+ lxb_css_syntax_token_t *token,
795
+ const lxb_char_t *data, const lxb_char_t *end)
796
+ {
797
+ lxb_status_t status;
798
+ const lxb_char_t *begin;
799
+ lxb_css_syntax_token_t *delim;
800
+
801
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
802
+ if (*data >= 0x30 && *data <= 0x39) {
803
+ lxb_css_syntax_token_number(token)->have_sign = true;
804
+ return lxb_css_syntax_state_consume_numeric(tkz, token, data, end);
805
+ }
806
+
807
+ /* U+002E FULL STOP (.) */
808
+ if (*data == 0x2E) {
809
+ begin = data++;
810
+
811
+ if (data == end) {
812
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
813
+
814
+ if (data >= end || *data < 0x30 || *data > 0x39) {
815
+ goto push_delim;
816
+ }
817
+
818
+ lxb_css_syntax_token_number(token)->have_sign = true;
819
+
820
+ return lxb_css_syntax_state_decimal(tkz, token, tkz->buffer,
821
+ tkz->buffer + sizeof(tkz->buffer),
822
+ data, end);
823
+ }
824
+
825
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
826
+ if (*data >= 0x30 && *data <= 0x39) {
827
+ lxb_css_syntax_token_number(token)->have_sign = true;
828
+
829
+ return lxb_css_syntax_state_decimal(tkz, token, tkz->buffer,
830
+ tkz->buffer + sizeof(tkz->buffer),
831
+ data, end);
832
+ }
833
+
834
+ push_delim:
835
+
836
+ delim = lxb_css_syntax_list_append_delim(tkz, begin, begin + 1, '.');
837
+ if (delim == NULL) {
838
+ return NULL;
839
+ }
840
+ }
841
+
842
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
843
+
844
+ lxb_css_syntax_token_base(token)->end = lxb_css_syntax_token_base(token)->begin + 1;
845
+ lxb_css_syntax_token_delim(token)->character = '+';
846
+
847
+ return data;
848
+ }
849
+
850
+ /*
851
+ * U+002C COMMA (,)
852
+ */
853
+ const lxb_char_t *
854
+ lxb_css_syntax_state_comma(lxb_css_syntax_tokenizer_t *tkz,
855
+ lxb_css_syntax_token_t *token,
856
+ const lxb_char_t *data, const lxb_char_t *end)
857
+ {
858
+ token->type = LXB_CSS_SYNTAX_TOKEN_COMMA;
859
+
860
+ lxb_css_syntax_token_base(token)->begin = data;
861
+ lxb_css_syntax_token_base(token)->end = ++data;
862
+
863
+ return data;
864
+ }
865
+
866
+ /*
867
+ * U+002D HYPHEN-MINUS (-)
868
+ */
869
+ const lxb_char_t *
870
+ lxb_css_syntax_state_minus(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
871
+ const lxb_char_t *data, const lxb_char_t *end)
872
+ {
873
+ lxb_status_t status;
874
+
875
+ lxb_css_syntax_token_base(token)->begin = data++;
876
+
877
+ if (data >= end) {
878
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
879
+ if (data >= end) {
880
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
881
+
882
+ lxb_css_syntax_token_base(token)->end = data;
883
+ lxb_css_syntax_token_delim(token)->character = '-';
884
+
885
+ return data;
886
+ }
887
+ }
888
+
889
+ return lxb_css_syntax_state_minus_process(tkz, token, data, end);
890
+ }
891
+
892
+ const lxb_char_t *
893
+ lxb_css_syntax_state_minus_process(lxb_css_syntax_tokenizer_t *tkz,
894
+ lxb_css_syntax_token_t *token,
895
+ const lxb_char_t *data, const lxb_char_t *end)
896
+ {
897
+ lxb_char_t ch;
898
+ lxb_status_t status;
899
+ const lxb_char_t *begin, *second;
900
+ lxb_css_syntax_token_t *delim;
901
+ lxb_css_syntax_token_number_t *number;
902
+
903
+ unsigned minuses_len = 1;
904
+ static const lxb_char_t minuses[3] = "---";
905
+
906
+ /* Check for <number-token> */
907
+
908
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
909
+ if (*data >= 0x30 && *data <= 0x39) {
910
+ data = lxb_css_syntax_state_consume_numeric(tkz, token, data, end);
911
+
912
+ number = lxb_css_syntax_token_number(token);
913
+ number->num = -number->num;
914
+
915
+ lxb_css_syntax_token_number(token)->have_sign = true;
916
+
917
+ return data;
918
+ }
919
+
920
+ /* U+002E FULL STOP (.) */
921
+ if (*data == 0x2E) {
922
+ begin = data++;
923
+
924
+ if (data == end) {
925
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
926
+ if (data >= end) {
927
+ goto push_delim;
928
+ }
929
+ }
930
+
931
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
932
+ if (*data >= 0x30 && *data <= 0x39) {
933
+ data = lxb_css_syntax_state_decimal(tkz, token, tkz->buffer,
934
+ tkz->buffer + sizeof(tkz->buffer),
935
+ data, end);
936
+
937
+ number = lxb_css_syntax_token_number(token);
938
+ number->num = -number->num;
939
+
940
+ lxb_css_syntax_token_number(token)->have_sign = true;
941
+
942
+ return data;
943
+ }
944
+
945
+ push_delim:
946
+
947
+ delim = lxb_css_syntax_list_append_delim(tkz, begin, begin + 1, '.');
948
+ if (delim == NULL) {
949
+ return NULL;
950
+ }
951
+
952
+ goto delim;
953
+ }
954
+
955
+ second = data;
956
+
957
+ /* U+002D HYPHEN-MINUS (-) */
958
+ if (*data == 0x2D) {
959
+ data++;
960
+
961
+ /* Check for <CDC-token> */
962
+
963
+ if (data == end) {
964
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
965
+ if (data >= end) {
966
+ delim = lxb_css_syntax_list_append_delim(tkz, second,
967
+ second + 1, '-');
968
+ if (delim == NULL) {
969
+ return NULL;
970
+ }
971
+
972
+ goto delim;
973
+ }
974
+ }
975
+
976
+ if (*data == 0x2D) {
977
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, 3);
978
+ return lxb_css_syntax_state_ident_like_not_url(tkz, token, ++data, end);
979
+ }
980
+ else if (*data == 0x3E) {
981
+ token->type = LXB_CSS_SYNTAX_TOKEN_CDC;
982
+
983
+ lxb_css_syntax_token_base(token)->end = ++data;
984
+
985
+ return data;
986
+ }
987
+
988
+ minuses_len++;
989
+ }
990
+
991
+ /* Check for <ident-token> */
992
+
993
+ if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START
994
+ || *data == 0x00)
995
+ {
996
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len);
997
+
998
+ return lxb_css_syntax_state_ident_like_not_url(tkz, token, data, end);
999
+ }
1000
+
1001
+ /* U+005C REVERSE SOLIDUS (\) */
1002
+ if (*data == 0x5C) {
1003
+ begin = data++;
1004
+
1005
+ if (data == end) {
1006
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1007
+ if (data >= end) {
1008
+ goto delim_rev_solidus;
1009
+ }
1010
+
1011
+ ch = *data;
1012
+
1013
+ if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1014
+ goto ident;
1015
+ }
1016
+
1017
+ goto delim_rev_solidus;
1018
+ }
1019
+
1020
+ ch = *data;
1021
+
1022
+ if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1023
+ goto ident;
1024
+ }
1025
+
1026
+ delim_rev_solidus:
1027
+
1028
+ delim = lxb_css_syntax_list_append_delim(tkz, begin, begin + 1, '\\');
1029
+ if (delim == NULL) {
1030
+ return NULL;
1031
+ }
1032
+ }
1033
+
1034
+ if (minuses_len == 2) {
1035
+ delim = lxb_css_syntax_list_append_delim(tkz, second, NULL, '-');
1036
+ if (delim == NULL) {
1037
+ return NULL;
1038
+ }
1039
+ }
1040
+
1041
+ delim:
1042
+
1043
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1044
+
1045
+ lxb_css_syntax_token_base(token)->end = lxb_css_syntax_token_base(token)->begin + 1;
1046
+ lxb_css_syntax_token_delim(token)->character = '-';
1047
+
1048
+ return data;
1049
+
1050
+ ident:
1051
+
1052
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len);
1053
+
1054
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
1055
+ if (data == NULL) {
1056
+ return NULL;
1057
+ }
1058
+
1059
+ return lxb_css_syntax_state_ident_like_not_url(tkz, token, data, end);
1060
+ }
1061
+
1062
+ /*
1063
+ * U+002E FULL STOP (.)
1064
+ */
1065
+ const lxb_char_t *
1066
+ lxb_css_syntax_state_full_stop(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1067
+ const lxb_char_t *data, const lxb_char_t *end)
1068
+ {
1069
+ lxb_status_t status;
1070
+
1071
+ lxb_css_syntax_token_base(token)->begin = data;
1072
+ lxb_css_syntax_token_number(token)->have_sign = false;
1073
+
1074
+ data++;
1075
+
1076
+ if (data >= end) {
1077
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1078
+ if (data >= end) {
1079
+ goto delim;
1080
+ }
1081
+ }
1082
+
1083
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1084
+ if (*data >= 0x30 && *data <= 0x39) {
1085
+ return lxb_css_syntax_state_decimal(tkz, token, tkz->buffer,
1086
+ tkz->buffer + sizeof(tkz->buffer),
1087
+ data, end);
1088
+ }
1089
+
1090
+ delim:
1091
+
1092
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1093
+
1094
+ lxb_css_syntax_token_base(token)->end = lxb_css_syntax_token_base(token)->begin + 1;
1095
+ lxb_css_syntax_token_delim(token)->character = '.';
1096
+
1097
+ return data;
1098
+ }
1099
+
1100
+ /*
1101
+ * U+003A COLON (:)
1102
+ */
1103
+ const lxb_char_t *
1104
+ lxb_css_syntax_state_colon(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1105
+ const lxb_char_t *data, const lxb_char_t *end)
1106
+ {
1107
+ token->type = LXB_CSS_SYNTAX_TOKEN_COLON;
1108
+
1109
+ lxb_css_syntax_token_base(token)->begin = data;
1110
+ lxb_css_syntax_token_base(token)->end = ++data;
1111
+
1112
+ return data;
1113
+ }
1114
+
1115
+ /*
1116
+ * U+003B SEMICOLON (;)
1117
+ */
1118
+ const lxb_char_t *
1119
+ lxb_css_syntax_state_semicolon(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1120
+ const lxb_char_t *data, const lxb_char_t *end)
1121
+ {
1122
+ token->type = LXB_CSS_SYNTAX_TOKEN_SEMICOLON;
1123
+
1124
+ lxb_css_syntax_token_base(token)->begin = data;
1125
+ lxb_css_syntax_token_base(token)->end = ++data;
1126
+
1127
+ return data;
1128
+ }
1129
+
1130
+ /*
1131
+ * U+003C LESS-THAN SIGN (<)
1132
+ */
1133
+ const lxb_char_t *
1134
+ lxb_css_syntax_state_less_sign(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1135
+ const lxb_char_t *data, const lxb_char_t *end)
1136
+ {
1137
+ lxb_char_t ch;
1138
+ lxb_status_t status;
1139
+ const lxb_char_t *mark, *minus, *esc;
1140
+ lxb_css_syntax_token_t *delim, *ident;
1141
+
1142
+ lxb_css_syntax_token_base(token)->begin = data++;
1143
+
1144
+ if ((end - data) > 2) {
1145
+ if (data[0] == '!' && data[1] == '-' && data[2] == '-') {
1146
+ data += 3;
1147
+
1148
+ token->type = LXB_CSS_SYNTAX_TOKEN_CDO;
1149
+ lxb_css_syntax_token_base(token)->end = data;
1150
+
1151
+ return data;
1152
+ }
1153
+
1154
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1155
+
1156
+ lxb_css_syntax_token_base(token)->end = data;
1157
+ lxb_css_syntax_token_delim(token)->character = '<';
1158
+
1159
+ return data;
1160
+ }
1161
+
1162
+ if (data >= end) {
1163
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1164
+ if (data >= end) {
1165
+ goto delim;
1166
+ }
1167
+ }
1168
+
1169
+ /* U+0021 EXCLAMATION MARK */
1170
+ if (*data != 0x21) {
1171
+ goto delim;
1172
+ }
1173
+
1174
+ mark = ++data;
1175
+
1176
+ if (data == end) {
1177
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1178
+ if (data >= end) {
1179
+ goto delim_mark;
1180
+ }
1181
+ }
1182
+
1183
+ /* U+002D HYPHEN-MINUS */
1184
+ if (*data != 0x2D) {
1185
+ goto delim_mark;
1186
+ }
1187
+
1188
+ minus = ++data;
1189
+
1190
+ if (data == end) {
1191
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1192
+ if (data >= end) {
1193
+ goto delim_minus;
1194
+ }
1195
+ }
1196
+
1197
+ /* U+002D HYPHEN-MINUS */
1198
+ if (*data == 0x2D) {
1199
+ token->type = LXB_CSS_SYNTAX_TOKEN_CDO;
1200
+
1201
+ lxb_css_syntax_token_base(token)->end = ++data;
1202
+
1203
+ return data;
1204
+ }
1205
+
1206
+ if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START) {
1207
+ goto ident;
1208
+ }
1209
+
1210
+ /* U+005C REVERSE SOLIDUS (\) */
1211
+ if (*data == 0x5C) {
1212
+ esc = data++;
1213
+
1214
+ if (data == end) {
1215
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1216
+ if (data >= end) {
1217
+ goto delim_esc;
1218
+ }
1219
+
1220
+ ch = *data;
1221
+
1222
+ if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1223
+ ident = lxb_css_syntax_tokenizer_token_append(tkz);
1224
+ if (ident == NULL) {
1225
+ return NULL;
1226
+ }
1227
+
1228
+ lxb_css_syntax_token_base(ident)->begin = minus;
1229
+
1230
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
1231
+ (const lxb_char_t *) "-", 1);
1232
+
1233
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
1234
+ if (data == NULL) {
1235
+ return NULL;
1236
+ }
1237
+
1238
+ data = lxb_css_syntax_state_ident_like_not_url(tkz, ident,
1239
+ data, end);
1240
+ if (data == NULL) {
1241
+ return NULL;
1242
+ }
1243
+
1244
+ goto delim_mark;
1245
+ }
1246
+
1247
+ delim_esc:
1248
+
1249
+ delim = lxb_css_syntax_list_append_delim(tkz, esc, esc + 1, '\\');
1250
+ if (delim == NULL) {
1251
+ return NULL;
1252
+ }
1253
+
1254
+ goto delim_minus;
1255
+ }
1256
+
1257
+ ch = *data--;
1258
+
1259
+ if (ch == 0x0A || ch == 0x0C || ch == 0x0D) {
1260
+ goto delim_minus;
1261
+ }
1262
+
1263
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
1264
+ if (data == NULL) {
1265
+ return NULL;
1266
+ }
1267
+ }
1268
+ else if (*data != 0x00) {
1269
+ delim = lxb_css_syntax_list_append_delim(tkz, minus - 1, NULL, '-');
1270
+ if (delim == NULL) {
1271
+ return NULL;
1272
+ }
1273
+
1274
+ goto delim_mark;
1275
+ }
1276
+
1277
+ ident:
1278
+
1279
+ ident = lxb_css_syntax_tokenizer_token_append(tkz);
1280
+ if (ident == NULL) {
1281
+ return NULL;
1282
+ }
1283
+
1284
+ lxb_css_syntax_token_base(ident)->begin = minus;
1285
+
1286
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, (const lxb_char_t *) "-", 1);
1287
+
1288
+ data = lxb_css_syntax_state_ident_like_not_url(tkz, ident, data, end);
1289
+ if (data == NULL) {
1290
+ return NULL;
1291
+ }
1292
+
1293
+ goto delim_mark;
1294
+
1295
+ delim_minus:
1296
+
1297
+ delim = lxb_css_syntax_list_append_delim(tkz, minus - 1, minus, '-');
1298
+ if (delim == NULL) {
1299
+ return NULL;
1300
+ }
1301
+
1302
+ delim_mark:
1303
+
1304
+ delim = lxb_css_syntax_list_append_delim(tkz, mark - 1, mark, '!');
1305
+ if (delim == NULL) {
1306
+ return NULL;
1307
+ }
1308
+
1309
+ delim:
1310
+
1311
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1312
+
1313
+ lxb_css_syntax_token_base(token)->end = lxb_css_syntax_token_base(token)->begin + 1;
1314
+ lxb_css_syntax_token_delim(token)->character = '<';
1315
+
1316
+ return data;
1317
+ }
1318
+
1319
+ /*
1320
+ * U+0040 COMMERCIAL AT (@)
1321
+ */
1322
+ const lxb_char_t *
1323
+ lxb_css_syntax_state_at(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1324
+ const lxb_char_t *data, const lxb_char_t *end)
1325
+ {
1326
+ lxb_char_t ch;
1327
+ lxb_status_t status;
1328
+ const lxb_char_t *minus, *esc;
1329
+ lxb_css_syntax_token_t *delim;
1330
+
1331
+ unsigned minuses_len = 0;
1332
+ static const lxb_char_t minuses[2] = "--";
1333
+
1334
+ token->type = LXB_CSS_SYNTAX_TOKEN_AT_KEYWORD;
1335
+
1336
+ lxb_css_syntax_token_base(token)->begin = data++;
1337
+
1338
+ if (data >= end) {
1339
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1340
+ if (data >= end) {
1341
+ goto delim;
1342
+ }
1343
+ }
1344
+
1345
+ if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START) {
1346
+ return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
1347
+ }
1348
+
1349
+ minus = data;
1350
+
1351
+ /* U+002D HYPHEN-MINUS */
1352
+ if (*data == 0x2D) {
1353
+ data++;
1354
+
1355
+ if (data == end) {
1356
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1357
+ if (data >= end) {
1358
+ delim = lxb_css_syntax_list_append_delim(tkz, minus,
1359
+ minus + 1, '-');
1360
+ if (delim == NULL) {
1361
+ return NULL;
1362
+ }
1363
+
1364
+ goto delim;
1365
+ }
1366
+ }
1367
+
1368
+ if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START
1369
+ || *data == 0x00)
1370
+ {
1371
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, 1);
1372
+ return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
1373
+ }
1374
+ else if (*data == 0x2D) {
1375
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, 2);
1376
+ return lxb_css_syntax_state_consume_ident(tkz, token, ++data, end);
1377
+ }
1378
+
1379
+ minuses_len++;
1380
+ }
1381
+
1382
+ /* U+005C REVERSE SOLIDUS (\) */
1383
+ if (*data == 0x5C) {
1384
+ esc = ++data;
1385
+
1386
+ if (data == end) {
1387
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1388
+ if (data >= end) {
1389
+ goto delim_esc;
1390
+ }
1391
+ }
1392
+
1393
+ ch = *data;
1394
+
1395
+ if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1396
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len);
1397
+
1398
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
1399
+ if (data == NULL) {
1400
+ return NULL;
1401
+ }
1402
+
1403
+ return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
1404
+ }
1405
+
1406
+ goto delim_esc;
1407
+ }
1408
+ else if (*data != 0x00) {
1409
+ goto delim_minus;
1410
+ }
1411
+
1412
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len);
1413
+
1414
+ return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
1415
+
1416
+ delim_esc:
1417
+
1418
+ delim = lxb_css_syntax_list_append_delim(tkz, esc - 1, esc, '\\');
1419
+ if (delim == NULL) {
1420
+ return NULL;
1421
+ }
1422
+
1423
+ delim_minus:
1424
+
1425
+ if (minuses_len != 0) {
1426
+ delim = lxb_css_syntax_list_append_delim(tkz, minus, NULL, '-');
1427
+ if (delim == NULL) {
1428
+ return NULL;
1429
+ }
1430
+ }
1431
+
1432
+ delim:
1433
+
1434
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1435
+
1436
+ lxb_css_syntax_token_base(token)->end = lxb_css_syntax_token_base(token)->begin + 1;
1437
+ lxb_css_syntax_token_delim(token)->character = '@';
1438
+
1439
+ return data;
1440
+ }
1441
+
1442
+ /*
1443
+ * U+005B LEFT SQUARE BRACKET ([)
1444
+ */
1445
+ const lxb_char_t *
1446
+ lxb_css_syntax_state_ls_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1447
+ const lxb_char_t *data, const lxb_char_t *end)
1448
+ {
1449
+ token->type = LXB_CSS_SYNTAX_TOKEN_LS_BRACKET;
1450
+
1451
+ lxb_css_syntax_token_base(token)->begin = data;
1452
+ lxb_css_syntax_token_base(token)->end = ++data;
1453
+
1454
+ return data;
1455
+ }
1456
+
1457
+ /*
1458
+ * U+005C REVERSE SOLIDUS (\)
1459
+ */
1460
+ const lxb_char_t *
1461
+ lxb_css_syntax_state_rsolidus(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1462
+ const lxb_char_t *data, const lxb_char_t *end)
1463
+ {
1464
+ lxb_char_t ch;
1465
+ lxb_status_t status;
1466
+
1467
+ lxb_css_syntax_token_base(token)->begin = data++;
1468
+
1469
+ if (data >= end) {
1470
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1471
+ if (data >= end) {
1472
+ goto delim;
1473
+ }
1474
+ }
1475
+
1476
+ ch = *data;
1477
+
1478
+ if (ch == 0x0A || ch == 0x0C || ch == 0x0D) {
1479
+ goto delim;
1480
+ }
1481
+
1482
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
1483
+ if (data == NULL) {
1484
+ return NULL;
1485
+ }
1486
+
1487
+ return lxb_css_syntax_state_ident_like(tkz, token, data, end);
1488
+
1489
+ delim:
1490
+
1491
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1492
+
1493
+ lxb_css_syntax_token_base(token)->end = lxb_css_syntax_token_base(token)->begin + 1;
1494
+ lxb_css_syntax_token_delim(token)->character = '\\';
1495
+
1496
+ return data;
1497
+ }
1498
+
1499
+ /*
1500
+ * U+005D RIGHT SQUARE BRACKET (])
1501
+ */
1502
+ const lxb_char_t *
1503
+ lxb_css_syntax_state_rs_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1504
+ const lxb_char_t *data, const lxb_char_t *end)
1505
+ {
1506
+ token->type = LXB_CSS_SYNTAX_TOKEN_RS_BRACKET;
1507
+
1508
+ lxb_css_syntax_token_base(token)->begin = data;
1509
+ lxb_css_syntax_token_base(token)->end = ++data;
1510
+
1511
+ return data;
1512
+ }
1513
+
1514
+ /*
1515
+ * U+007B LEFT CURLY BRACKET ({)
1516
+ */
1517
+ const lxb_char_t *
1518
+ lxb_css_syntax_state_lc_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1519
+ const lxb_char_t *data, const lxb_char_t *end)
1520
+ {
1521
+ token->type = LXB_CSS_SYNTAX_TOKEN_LC_BRACKET;
1522
+
1523
+ lxb_css_syntax_token_base(token)->begin = data;
1524
+ lxb_css_syntax_token_base(token)->end = ++data;
1525
+
1526
+ return data;
1527
+ }
1528
+
1529
+ /*
1530
+ * U+007D RIGHT CURLY BRACKET (})
1531
+ */
1532
+ const lxb_char_t *
1533
+ lxb_css_syntax_state_rc_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1534
+ const lxb_char_t *data, const lxb_char_t *end)
1535
+ {
1536
+ token->type = LXB_CSS_SYNTAX_TOKEN_RC_BRACKET;
1537
+
1538
+ lxb_css_syntax_token_base(token)->begin = data;
1539
+ lxb_css_syntax_token_base(token)->end = ++data;
1540
+
1541
+ return data;
1542
+ }
1543
+
1544
+ /*
1545
+ * Numeric
1546
+ */
1547
+ lxb_inline void
1548
+ lxb_css_syntax_consume_numeric_set_int(lxb_css_syntax_tokenizer_t *tkz,
1549
+ lxb_css_syntax_token_t *token,
1550
+ const lxb_char_t *start, const lxb_char_t *end)
1551
+ {
1552
+ double num = lexbor_strtod_internal(start, (end - start), 0);
1553
+
1554
+ token->type = LXB_CSS_SYNTAX_TOKEN_NUMBER;
1555
+
1556
+ lxb_css_syntax_token_number(token)->is_float = false;
1557
+ lxb_css_syntax_token_number(token)->num = num;
1558
+ }
1559
+
1560
+ lxb_inline void
1561
+ lxb_css_syntax_consume_numeric_set_float(lxb_css_syntax_tokenizer_t *tkz,
1562
+ lxb_css_syntax_token_t *token,
1563
+ const lxb_char_t *start, const lxb_char_t *end,
1564
+ bool e_is_negative, int exponent, int e_digit)
1565
+ {
1566
+ if (e_is_negative) {
1567
+ exponent -= e_digit;
1568
+ }
1569
+ else {
1570
+ exponent += e_digit;
1571
+ }
1572
+
1573
+ double num = lexbor_strtod_internal(start, (end - start), exponent);
1574
+
1575
+ token->type = LXB_CSS_SYNTAX_TOKEN_NUMBER;
1576
+
1577
+ lxb_css_syntax_token_number(token)->num = num;
1578
+ lxb_css_syntax_token_number(token)->is_float = true;
1579
+ }
1580
+
1581
+ const lxb_char_t *
1582
+ lxb_css_syntax_state_consume_before_numeric(lxb_css_syntax_tokenizer_t *tkz,
1583
+ lxb_css_syntax_token_t *token,
1584
+ const lxb_char_t *data,
1585
+ const lxb_char_t *end)
1586
+ {
1587
+ lxb_css_syntax_token_base(token)->begin = data;
1588
+ lxb_css_syntax_token_number(token)->have_sign = false;
1589
+
1590
+ return lxb_css_syntax_state_consume_numeric(tkz, token, data, end);
1591
+ }
1592
+
1593
+ static const lxb_char_t *
1594
+ lxb_css_syntax_state_consume_numeric(lxb_css_syntax_tokenizer_t *tkz,
1595
+ lxb_css_syntax_token_t *token,
1596
+ const lxb_char_t *data,
1597
+ const lxb_char_t *end)
1598
+ {
1599
+ lxb_status_t status;
1600
+ lxb_css_syntax_token_t *delim;
1601
+
1602
+ lxb_char_t *buf_start = tkz->buffer;
1603
+ lxb_char_t *buf_end = buf_start + sizeof(tkz->buffer);
1604
+
1605
+ do {
1606
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1607
+ if (*data < 0x30 || *data > 0x39) {
1608
+ break;
1609
+ }
1610
+
1611
+ if (buf_start != buf_end) {
1612
+ *buf_start++ = *data;
1613
+ }
1614
+
1615
+ if (++data == end) {
1616
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1617
+ if (data >= end) {
1618
+ lxb_css_syntax_token_base(token)->end = data;
1619
+
1620
+ lxb_css_syntax_consume_numeric_set_int(tkz, token, tkz->buffer,
1621
+ buf_start);
1622
+ return data;
1623
+ }
1624
+ }
1625
+ }
1626
+ while (true);
1627
+
1628
+ lxb_css_syntax_token_base(token)->end = data;
1629
+
1630
+ /* U+002E FULL STOP (.) */
1631
+ if (*data != 0x2E) {
1632
+ lxb_css_syntax_consume_numeric_set_int(tkz, token, tkz->buffer,
1633
+ buf_start);
1634
+
1635
+ return lxb_css_syntax_state_consume_numeric_name_start(tkz, token,
1636
+ data, end);
1637
+ }
1638
+
1639
+ data++;
1640
+
1641
+ if (data == end) {
1642
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1643
+ if (data >= end) {
1644
+ goto delim;
1645
+ }
1646
+ }
1647
+
1648
+ if (*data >= 0x30 && *data <= 0x39) {
1649
+ return lxb_css_syntax_state_decimal(tkz, token, buf_start, buf_end,
1650
+ data, end);
1651
+ }
1652
+
1653
+ delim:
1654
+
1655
+ lxb_css_syntax_consume_numeric_set_int(tkz, token, tkz->buffer, buf_start);
1656
+
1657
+ delim = lxb_css_syntax_list_append_delim(tkz, data - 1, data, '.');
1658
+ if (delim == NULL) {
1659
+ return NULL;
1660
+ }
1661
+
1662
+ return data;
1663
+ }
1664
+
1665
+ static const lxb_char_t *
1666
+ lxb_css_syntax_state_decimal(lxb_css_syntax_tokenizer_t *tkz,
1667
+ lxb_css_syntax_token_t *token,
1668
+ lxb_char_t *buf_start, lxb_char_t *buf_end,
1669
+ const lxb_char_t *data, const lxb_char_t *end)
1670
+ {
1671
+ bool e_is_negative;
1672
+ int exponent, e_digit;
1673
+ lxb_char_t ch, by;
1674
+ lxb_status_t status;
1675
+ const lxb_char_t *last;
1676
+ lxb_css_syntax_token_t *delim, *t_str;
1677
+ lxb_css_syntax_token_string_t *str;
1678
+
1679
+ exponent = 0;
1680
+
1681
+ str = lxb_css_syntax_token_dimension_string(token);
1682
+ t_str = (lxb_css_syntax_token_t *) (void *) str;
1683
+
1684
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1685
+ do {
1686
+ if (buf_start != buf_end) {
1687
+ *buf_start++ = *data;
1688
+ exponent -= 1;
1689
+ }
1690
+
1691
+ data++;
1692
+
1693
+ if (data >= end) {
1694
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1695
+ if (data >= end) {
1696
+ lxb_css_syntax_token_base(token)->end = data;
1697
+
1698
+ lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer,
1699
+ buf_start, 0, exponent, 0);
1700
+ return data;
1701
+ }
1702
+ }
1703
+ }
1704
+ while (*data >= 0x30 && *data <= 0x39);
1705
+
1706
+ lxb_css_syntax_token_base(token)->end = data;
1707
+ lxb_css_syntax_token_base(str)->begin = data;
1708
+
1709
+ ch = *data;
1710
+
1711
+ /* U+0045 Latin Capital Letter (E) or U+0065 Latin Small Letter (e) */
1712
+ if (ch != 0x45 && ch != 0x65) {
1713
+ lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer,
1714
+ buf_start, 0, exponent, 0);
1715
+
1716
+ return lxb_css_syntax_state_consume_numeric_name_start(tkz, token,
1717
+ data, end);
1718
+ }
1719
+
1720
+ e_digit = 0;
1721
+ e_is_negative = false;
1722
+
1723
+ lxb_css_syntax_token_base(str)->end = ++data;
1724
+
1725
+ if (data == end) {
1726
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1727
+ if (data >= end) {
1728
+ lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer,
1729
+ buf_start, 0, exponent, 0);
1730
+
1731
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &ch, 1);
1732
+
1733
+ token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1734
+
1735
+ return lxb_css_syntax_state_dimension_set(tkz, token, data);
1736
+ }
1737
+ }
1738
+
1739
+ switch (*data) {
1740
+ /* U+002D HYPHEN-MINUS (-) */
1741
+ case 0x2D:
1742
+ e_is_negative = true;
1743
+ /* fall through */
1744
+
1745
+ /* U+002B PLUS SIGN (+) */
1746
+ case 0x2B:
1747
+ last = data++;
1748
+ by = *last;
1749
+
1750
+ if (data == end) {
1751
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1752
+ if (data >= end) {
1753
+ goto dimension;
1754
+ }
1755
+ }
1756
+
1757
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1758
+ if (*data < 0x30 || *data > 0x39) {
1759
+ goto dimension;
1760
+ }
1761
+
1762
+ break;
1763
+
1764
+ default:
1765
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1766
+ if (*data < 0x30 || *data > 0x39) {
1767
+ lxb_css_syntax_consume_numeric_set_float(tkz, token,
1768
+ tkz->buffer, buf_start,
1769
+ 0, exponent, 0);
1770
+
1771
+ token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1772
+
1773
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &ch, 1);
1774
+
1775
+ return lxb_css_syntax_state_consume_ident(tkz, t_str,
1776
+ data, end);
1777
+ }
1778
+
1779
+ break;
1780
+ }
1781
+
1782
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1783
+ do {
1784
+ e_digit = (*data - 0x30) + e_digit * 0x0A;
1785
+
1786
+ if (++data == end) {
1787
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1788
+ if (data >= end) {
1789
+ lxb_css_syntax_token_base(token)->end = data;
1790
+
1791
+ lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer, buf_start,
1792
+ e_is_negative, exponent, e_digit);
1793
+ return data;
1794
+ }
1795
+ }
1796
+ }
1797
+ while(*data >= 0x30 && *data <= 0x39);
1798
+
1799
+ lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer, buf_start,
1800
+ e_is_negative, exponent, e_digit);
1801
+
1802
+ return lxb_css_syntax_state_consume_numeric_name_start(tkz, token,
1803
+ data, end);
1804
+
1805
+ dimension:
1806
+
1807
+ lxb_css_syntax_consume_numeric_set_float(tkz, token,
1808
+ tkz->buffer, buf_start,
1809
+ 0, exponent, 0);
1810
+
1811
+ token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1812
+
1813
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &ch, 1);
1814
+
1815
+ if (by == '-') {
1816
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &by, 1);
1817
+
1818
+ return lxb_css_syntax_state_consume_ident(tkz, t_str, data, end);
1819
+ }
1820
+
1821
+ delim = lxb_css_syntax_list_append_delim(tkz, last, NULL, '+');
1822
+ if (delim == NULL) {
1823
+ return NULL;
1824
+ }
1825
+
1826
+ return lxb_css_syntax_state_dimension_set(tkz, token, data);
1827
+ }
1828
+
1829
+ static const lxb_char_t *
1830
+ lxb_css_syntax_state_consume_numeric_name_start(lxb_css_syntax_tokenizer_t *tkz,
1831
+ lxb_css_syntax_token_t *token,
1832
+ const lxb_char_t *data,
1833
+ const lxb_char_t *end)
1834
+ {
1835
+ bool have_minus;
1836
+ lxb_char_t ch;
1837
+ lxb_status_t status;
1838
+ const lxb_char_t *esc, *minus;
1839
+ lxb_css_syntax_token_t *delim, *t_str;
1840
+ lxb_css_syntax_token_string_t *str;
1841
+
1842
+ str = lxb_css_syntax_token_dimension_string(token);
1843
+ t_str = (lxb_css_syntax_token_t *) (void *) str;
1844
+
1845
+ lxb_css_syntax_token_base(t_str)->begin = data;
1846
+
1847
+ ch = *data;
1848
+
1849
+ if (lxb_css_syntax_res_name_map[ch] == LXB_CSS_SYNTAX_RES_NAME_START
1850
+ || ch == 0x00)
1851
+ {
1852
+ token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1853
+
1854
+ return lxb_css_syntax_state_consume_ident(tkz, t_str, data, end);
1855
+ }
1856
+
1857
+ /* U+0025 PERCENTAGE SIGN (%) */
1858
+ if (ch == 0x25) {
1859
+ token->type = LXB_CSS_SYNTAX_TOKEN_PERCENTAGE;
1860
+
1861
+ lxb_css_syntax_token_base(token)->end = ++data;
1862
+
1863
+ return data;
1864
+ }
1865
+
1866
+ have_minus = false;
1867
+ minus = data;
1868
+
1869
+ /* U+002D HYPHEN-MINUS */
1870
+ if (ch == 0x2D) {
1871
+ data++;
1872
+
1873
+ if (data >= end) {
1874
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1875
+ if (data >= end) {
1876
+ delim = lxb_css_syntax_list_append_delim(tkz, data - 1,
1877
+ data, '-');
1878
+ if (delim == NULL) {
1879
+ return NULL;
1880
+ }
1881
+
1882
+ return data;
1883
+ }
1884
+ }
1885
+
1886
+ ch = *data;
1887
+
1888
+ if (lxb_css_syntax_res_name_map[ch] == LXB_CSS_SYNTAX_RES_NAME_START
1889
+ || ch == 0x2D || ch == 0x00)
1890
+ {
1891
+ token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1892
+
1893
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
1894
+ (const lxb_char_t *) "-", 1);
1895
+
1896
+ return lxb_css_syntax_state_consume_ident(tkz, t_str, data, end);
1897
+ }
1898
+
1899
+ have_minus = true;
1900
+ }
1901
+
1902
+ esc = data;
1903
+
1904
+ /* U+005C REVERSE SOLIDUS (\) */
1905
+ if (ch == 0x5C) {
1906
+ data++;
1907
+
1908
+ if (data >= end) {
1909
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1910
+ if (data >= end) {
1911
+ goto delim_rev_solidus;
1912
+ }
1913
+ }
1914
+
1915
+ ch = *data;
1916
+
1917
+ if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1918
+ token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1919
+
1920
+ if (have_minus) {
1921
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
1922
+ (const lxb_char_t *) "-", 1);
1923
+ }
1924
+
1925
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
1926
+ if (data == NULL) {
1927
+ return NULL;
1928
+ }
1929
+
1930
+ return lxb_css_syntax_state_consume_ident(tkz, t_str, data, end);
1931
+ }
1932
+
1933
+ delim_rev_solidus:
1934
+
1935
+ delim = lxb_css_syntax_list_append_delim(tkz, esc, esc + 1, '\\');
1936
+ if (delim == NULL) {
1937
+ return NULL;
1938
+ }
1939
+
1940
+ if (have_minus) {
1941
+ delim = lxb_css_syntax_list_append_delim(tkz, minus,
1942
+ minus + 1, '-');
1943
+ if (delim == NULL) {
1944
+ return NULL;
1945
+ }
1946
+ }
1947
+
1948
+ return data;
1949
+ }
1950
+
1951
+ lxb_css_syntax_token_base(token)->end = minus;
1952
+
1953
+ if (have_minus) {
1954
+ delim = lxb_css_syntax_list_append_delim(tkz, minus, NULL, '-');
1955
+ if (delim == NULL) {
1956
+ return NULL;
1957
+ }
1958
+ }
1959
+
1960
+ return data;
1961
+ }
1962
+
1963
+ static const lxb_char_t *
1964
+ lxb_css_syntax_state_consume_ident(lxb_css_syntax_tokenizer_t *tkz,
1965
+ lxb_css_syntax_token_t *token,
1966
+ const lxb_char_t *data, const lxb_char_t *end)
1967
+ {
1968
+ lxb_status_t status;
1969
+ const lxb_char_t *begin, *last;
1970
+ lxb_css_syntax_token_t *delim;
1971
+
1972
+ begin = data;
1973
+
1974
+ for (;; data++) {
1975
+ if (data >= end) {
1976
+ if (begin < data) {
1977
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
1978
+ }
1979
+
1980
+ last = data;
1981
+
1982
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1983
+ if (data >= end) {
1984
+ lxb_css_syntax_token_base(token)->end = last;
1985
+
1986
+ return lxb_css_syntax_state_string_set(tkz, token, data);
1987
+ }
1988
+
1989
+ begin = data;
1990
+ }
1991
+
1992
+ if (lxb_css_syntax_res_name_map[*data] == 0x00) {
1993
+
1994
+ /* U+005C REVERSE SOLIDUS (\) */
1995
+ if (*data == 0x5C) {
1996
+ if (begin < data) {
1997
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
1998
+ }
1999
+
2000
+ begin = data;
2001
+ last = ++data;
2002
+
2003
+ if (data == end) {
2004
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2005
+ if (data >= end) {
2006
+ goto push_delim_last;
2007
+ }
2008
+ }
2009
+
2010
+ if (*data == 0x0A || *data == 0x0C || *data == 0x0D) {
2011
+ goto push_delim_last;
2012
+ }
2013
+
2014
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
2015
+ if (data == NULL) {
2016
+ return NULL;
2017
+ }
2018
+
2019
+ begin = data--;
2020
+ }
2021
+ else if (*data == 0x00) {
2022
+ if (begin < data) {
2023
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2024
+ }
2025
+
2026
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
2027
+ lexbor_str_res_ansi_replacement_character,
2028
+ sizeof(lexbor_str_res_ansi_replacement_character) - 1);
2029
+ begin = data + 1;
2030
+ }
2031
+ else {
2032
+ if (begin < data) {
2033
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2034
+ }
2035
+
2036
+ lxb_css_syntax_token_base(token)->end = data;
2037
+
2038
+ return lxb_css_syntax_state_string_set(tkz, token, data);
2039
+ }
2040
+ }
2041
+ }
2042
+
2043
+ return data;
2044
+
2045
+ push_delim_last:
2046
+
2047
+ lxb_css_syntax_token_base(token)->end = begin;
2048
+
2049
+ delim = lxb_css_syntax_list_append_delim(tkz, begin, last, '\\');
2050
+ if (delim == NULL) {
2051
+ return NULL;
2052
+ }
2053
+
2054
+ return lxb_css_syntax_state_string_set(tkz, token, data);
2055
+ }
2056
+
2057
+ const lxb_char_t *
2058
+ lxb_css_syntax_state_ident_like_begin(lxb_css_syntax_tokenizer_t *tkz,
2059
+ lxb_css_syntax_token_t *token,
2060
+ const lxb_char_t *data, const lxb_char_t *end)
2061
+ {
2062
+ lxb_css_syntax_token_base(token)->begin = data;
2063
+
2064
+ return lxb_css_syntax_state_ident_like(tkz, token, data, end);
2065
+ }
2066
+
2067
+ const lxb_char_t *
2068
+ lxb_css_syntax_state_ident_like(lxb_css_syntax_tokenizer_t *tkz,
2069
+ lxb_css_syntax_token_t *token,
2070
+ const lxb_char_t *data, const lxb_char_t *end)
2071
+ {
2072
+ lxb_char_t ch;
2073
+ lxb_status_t status;
2074
+ const lxb_char_t *begin, *ws_begin;
2075
+ lxb_css_syntax_token_t *ws;
2076
+ lxb_css_syntax_token_string_t *str, *ws_str;
2077
+ static const lxb_char_t url[] = "url";
2078
+
2079
+ data = lxb_css_syntax_state_consume_ident(tkz, token, data, end);
2080
+
2081
+ if (data >= end) {
2082
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2083
+ if (data >= end) {
2084
+ token->type = LXB_CSS_SYNTAX_TOKEN_IDENT;
2085
+ return data;
2086
+ }
2087
+ }
2088
+
2089
+ if (data < end && *data == '(') {
2090
+ lxb_css_syntax_token_base(token)->end = ++data;
2091
+
2092
+ str = lxb_css_syntax_token_string(token);
2093
+
2094
+ if (str->length == 3 && lexbor_str_data_casecmp(str->data, url)) {
2095
+ begin = data;
2096
+
2097
+ tkz->pos += str->length + 1;
2098
+ ws_begin = tkz->pos;
2099
+
2100
+ do {
2101
+ if (data >= end) {
2102
+ if (begin < data) {
2103
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2104
+ }
2105
+
2106
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2107
+ if (data >= end) {
2108
+ begin = data;
2109
+ goto with_ws;
2110
+ }
2111
+
2112
+ begin = data;
2113
+ }
2114
+
2115
+ ch = *data;
2116
+
2117
+ if (lexbor_utils_whitespace(ch, !=, &&)) {
2118
+ /* U+0022 QUOTATION MARK (") or U+0027 APOSTROPHE (') */
2119
+ if (ch == 0x22 || ch == 0x27) {
2120
+ goto with_ws;
2121
+ }
2122
+
2123
+ tkz->pos = tkz->start;
2124
+
2125
+ return lxb_css_syntax_state_url(tkz, token, data, end);
2126
+ }
2127
+
2128
+ data++;
2129
+ }
2130
+ while (true);
2131
+ }
2132
+
2133
+ token->type = LXB_CSS_SYNTAX_TOKEN_FUNCTION;
2134
+
2135
+ return data;
2136
+ }
2137
+
2138
+ token->type = LXB_CSS_SYNTAX_TOKEN_IDENT;
2139
+
2140
+ return data;
2141
+
2142
+ with_ws:
2143
+
2144
+ token->type = LXB_CSS_SYNTAX_TOKEN_FUNCTION;
2145
+
2146
+ if (ws_begin != tkz->pos || begin < data) {
2147
+ if (begin < data) {
2148
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2149
+ }
2150
+
2151
+ if (tkz->pos >= tkz->end) {
2152
+ if (lxb_css_syntax_string_realloc(tkz, 1024) != LXB_STATUS_OK) {
2153
+ return NULL;
2154
+ }
2155
+ }
2156
+
2157
+ str->data = tkz->start;
2158
+ *tkz->pos = 0x00;
2159
+
2160
+ ws = lxb_css_syntax_tokenizer_token_append(tkz);
2161
+ if (ws == NULL) {
2162
+ return NULL;
2163
+ }
2164
+
2165
+ ws->type = LXB_CSS_SYNTAX_TOKEN_WHITESPACE;
2166
+
2167
+ lxb_css_syntax_token_base(ws)->begin = begin;
2168
+ lxb_css_syntax_token_base(ws)->end = data;
2169
+
2170
+ ws_str = lxb_css_syntax_token_string(ws);
2171
+
2172
+ ws_str->data = tkz->start + str->length + 1;
2173
+ ws_str->length = tkz->pos - ws_str->data;
2174
+ }
2175
+
2176
+ tkz->pos = tkz->start;
2177
+
2178
+ return data;
2179
+ }
2180
+
2181
+ const lxb_char_t *
2182
+ lxb_css_syntax_state_ident_like_not_url_begin(lxb_css_syntax_tokenizer_t *tkz,
2183
+ lxb_css_syntax_token_t *token,
2184
+ const lxb_char_t *data, const lxb_char_t *end)
2185
+ {
2186
+ lxb_css_syntax_token_base(token)->begin = data;
2187
+
2188
+ return lxb_css_syntax_state_ident_like_not_url(tkz, token, data, end);
2189
+ }
2190
+
2191
+ const lxb_char_t *
2192
+ lxb_css_syntax_state_ident_like_not_url(lxb_css_syntax_tokenizer_t *tkz,
2193
+ lxb_css_syntax_token_t *token,
2194
+ const lxb_char_t *data, const lxb_char_t *end)
2195
+ {
2196
+ data = lxb_css_syntax_state_consume_ident(tkz, token, data, end);
2197
+ if (data == NULL) {
2198
+ return NULL;
2199
+ }
2200
+
2201
+ if (data < end && *data == '(') {
2202
+ token->type = LXB_CSS_SYNTAX_TOKEN_FUNCTION;
2203
+
2204
+ lxb_css_syntax_token_base(token)->end = ++data;
2205
+
2206
+ return data;
2207
+ }
2208
+
2209
+ token->type = LXB_CSS_SYNTAX_TOKEN_IDENT;
2210
+
2211
+ return data;
2212
+ }
2213
+
2214
+ /*
2215
+ * URL
2216
+ */
2217
+ static const lxb_char_t *
2218
+ lxb_css_syntax_state_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
2219
+ const lxb_char_t *data, const lxb_char_t *end)
2220
+ {
2221
+ lxb_char_t ch;
2222
+ lxb_status_t status;
2223
+ const lxb_char_t *begin;
2224
+
2225
+ status = LXB_STATUS_OK;
2226
+
2227
+ *tkz->pos = 0x00;
2228
+
2229
+ begin = data;
2230
+
2231
+ do {
2232
+ if (data >= end) {
2233
+ if (begin < data) {
2234
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2235
+ }
2236
+
2237
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2238
+ if (data >= end) {
2239
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2240
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINUR);
2241
+
2242
+ token->type = LXB_CSS_SYNTAX_TOKEN_URL;
2243
+
2244
+ lxb_css_syntax_token_base(token)->end = data;
2245
+
2246
+ return lxb_css_syntax_state_string_set(tkz, token, data);
2247
+ }
2248
+
2249
+ begin = data;
2250
+ }
2251
+
2252
+ switch (*data) {
2253
+ /* U+0000 NULL (\0) */
2254
+ case 0x00:
2255
+ if (begin < data) {
2256
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2257
+ }
2258
+
2259
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
2260
+ lexbor_str_res_ansi_replacement_character,
2261
+ sizeof(lexbor_str_res_ansi_replacement_character) - 1);
2262
+ begin = data + 1;
2263
+ break;
2264
+
2265
+ /* U+0029 RIGHT PARENTHESIS ()) */
2266
+ case 0x29:
2267
+ if (begin < data) {
2268
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2269
+ }
2270
+
2271
+ token->type = LXB_CSS_SYNTAX_TOKEN_URL;
2272
+
2273
+ lxb_css_syntax_token_base(token)->end = ++data;
2274
+
2275
+ return lxb_css_syntax_state_string_set(tkz, token, data);
2276
+
2277
+ /*
2278
+ * U+0022 QUOTATION MARK (")
2279
+ * U+0027 APOSTROPHE (')
2280
+ * U+0028 LEFT PARENTHESIS (()
2281
+ * U+000B LINE TABULATION
2282
+ * U+007F DELETE
2283
+ */
2284
+ case 0x22:
2285
+ case 0x27:
2286
+ case 0x28:
2287
+ case 0x0B:
2288
+ case 0x7F:
2289
+ if (begin < data) {
2290
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2291
+ }
2292
+
2293
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2294
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_QOINUR);
2295
+
2296
+ return lxb_css_syntax_state_bad_url(tkz, token, data + 1, end);
2297
+
2298
+ /* U+005C REVERSE SOLIDUS (\) */
2299
+ case 0x5C:
2300
+ if (begin < data) {
2301
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2302
+ }
2303
+
2304
+ begin = ++data;
2305
+
2306
+ if (data == end) {
2307
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2308
+ if (data >= end) {
2309
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2310
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_WRESINUR);
2311
+
2312
+ token->type = LXB_CSS_SYNTAX_TOKEN_BAD_URL;
2313
+
2314
+ lxb_css_syntax_token_base(token)->end = begin;
2315
+
2316
+ return lxb_css_syntax_state_string_set(tkz, token, data);
2317
+ }
2318
+ }
2319
+
2320
+ ch = *data;
2321
+
2322
+ if (ch == 0x0A || ch == 0x0C || ch == 0x0D) {
2323
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2324
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_WRESINUR);
2325
+
2326
+ lxb_css_syntax_token_base(token)->end = data;
2327
+
2328
+ return lxb_css_syntax_state_bad_url(tkz, token, data, end);
2329
+ }
2330
+
2331
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
2332
+ if (data == NULL) {
2333
+ return NULL;
2334
+ }
2335
+
2336
+ begin = data--;
2337
+
2338
+ break;
2339
+
2340
+ /*
2341
+ * U+0009 CHARACTER TABULATION (tab)
2342
+ * U+000A LINE FEED (LF)
2343
+ * U+000C FORM FEED (FF)
2344
+ * U+000D CARRIAGE RETURN (CR)
2345
+ * U+0020 SPACE
2346
+ */
2347
+ case 0x09:
2348
+ case 0x0A:
2349
+ case 0x0C:
2350
+ case 0x0D:
2351
+ case 0x20:
2352
+ if (begin < data) {
2353
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2354
+ }
2355
+
2356
+ lxb_css_syntax_token_base(token)->end = data;
2357
+
2358
+ begin = ++data;
2359
+
2360
+ do {
2361
+ if (data == end) {
2362
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2363
+ if (data >= end) {
2364
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2365
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINUR);
2366
+
2367
+ token->type = LXB_CSS_SYNTAX_TOKEN_BAD_URL;
2368
+
2369
+ lxb_css_syntax_token_base(token)->end = begin;
2370
+
2371
+ return lxb_css_syntax_state_string_set(tkz, token, data);
2372
+ }
2373
+ }
2374
+
2375
+ ch = *data;
2376
+
2377
+ if (lexbor_utils_whitespace(ch, !=, &&)) {
2378
+ /* U+0029 RIGHT PARENTHESIS ()) */
2379
+ if (*data == 0x29) {
2380
+ token->type = LXB_CSS_SYNTAX_TOKEN_URL;
2381
+
2382
+ lxb_css_syntax_token_base(token)->end = ++data;
2383
+
2384
+ return lxb_css_syntax_state_string_set(tkz, token, data);
2385
+ }
2386
+
2387
+ return lxb_css_syntax_state_bad_url(tkz, token,
2388
+ data, end);
2389
+ }
2390
+
2391
+ data++;
2392
+ }
2393
+ while (true);
2394
+
2395
+ default:
2396
+ /*
2397
+ * Inclusive:
2398
+ * U+0000 NULL and U+0008 BACKSPACE or
2399
+ * U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE
2400
+ */
2401
+ if ((*data >= 0x00 && *data <= 0x08)
2402
+ || (*data >= 0x0E && *data <= 0x1F))
2403
+ {
2404
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2405
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_QOINUR);
2406
+
2407
+ return lxb_css_syntax_state_bad_url(tkz, token,
2408
+ data + 1, end);
2409
+ }
2410
+
2411
+ break;
2412
+ }
2413
+
2414
+ data++;
2415
+ }
2416
+ while (true);
2417
+
2418
+ return data;
2419
+ }
2420
+
2421
+ /*
2422
+ * Bad URL
2423
+ */
2424
+ static const lxb_char_t *
2425
+ lxb_css_syntax_state_bad_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
2426
+ const lxb_char_t *data, const lxb_char_t *end)
2427
+ {
2428
+ lxb_status_t status;
2429
+
2430
+ token->type = LXB_CSS_SYNTAX_TOKEN_BAD_URL;
2431
+
2432
+ if(lxb_css_syntax_state_string_set(tkz, token, data) == NULL) {
2433
+ return NULL;
2434
+ }
2435
+
2436
+ do {
2437
+ if (data >= end) {
2438
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2439
+ if (data >= end) {
2440
+ lxb_css_syntax_token_base(token)->end = data;
2441
+ return data;
2442
+ }
2443
+ }
2444
+
2445
+ /* U+0029 RIGHT PARENTHESIS ()) */
2446
+ if (*data == 0x29) {
2447
+ lxb_css_syntax_token_base(token)->end = ++data;
2448
+ return data;
2449
+ }
2450
+ /* U+005C REVERSE SOLIDUS (\) */
2451
+ else if (*data == 0x5C) {
2452
+ data++;
2453
+ }
2454
+
2455
+ data++;
2456
+ }
2457
+ while (true);
2458
+
2459
+ return data;
2460
+ }
2461
+
2462
+ lxb_inline lxb_status_t
2463
+ lxb_css_syntax_string_append_rep(lxb_css_syntax_tokenizer_t *tkz)
2464
+ {
2465
+ return lxb_css_syntax_string_append(tkz, lexbor_str_res_ansi_replacement_character,
2466
+ sizeof(lexbor_str_res_ansi_replacement_character) - 1);
2467
+ }
2468
+
2469
+ static const lxb_char_t *
2470
+ lxb_css_syntax_state_escaped(lxb_css_syntax_tokenizer_t *tkz,
2471
+ const lxb_char_t *data, const lxb_char_t **end)
2472
+ {
2473
+ uint32_t cp;
2474
+ unsigned count;
2475
+ lxb_status_t status;
2476
+
2477
+ cp = 0;
2478
+
2479
+ for (count = 0; count < 6; count++, data++) {
2480
+ if (data >= *end) {
2481
+ status = lxb_css_syntax_tokenizer_next_chunk(tkz, &data, end);
2482
+ if (status != LXB_STATUS_OK) {
2483
+ return NULL;
2484
+ }
2485
+
2486
+ if (data >= *end) {
2487
+ if (count == 0) {
2488
+ return *end;
2489
+ }
2490
+
2491
+ break;
2492
+ }
2493
+ }
2494
+
2495
+ if (lexbor_str_res_map_hex[*data] == 0xFF) {
2496
+ if (count == 0) {
2497
+ if (*data == 0x00) {
2498
+ status = lxb_css_syntax_string_append_rep(tkz);
2499
+ if (status != LXB_STATUS_OK) {
2500
+ return NULL;
2501
+ }
2502
+
2503
+ return data + 1;
2504
+ }
2505
+
2506
+ status = lxb_css_syntax_string_append(tkz, data, 1);
2507
+ if (status != LXB_STATUS_OK) {
2508
+ return NULL;
2509
+ }
2510
+
2511
+ return data + 1;
2512
+ }
2513
+
2514
+ switch (*data) {
2515
+ case 0x0D:
2516
+ data++;
2517
+
2518
+ status = lxb_css_syntax_tokenizer_next_chunk(tkz, &data,
2519
+ end);
2520
+ if (status != LXB_STATUS_OK) {
2521
+ return NULL;
2522
+ }
2523
+
2524
+ if (data >= *end) {
2525
+ break;
2526
+ }
2527
+
2528
+ if (*data == 0x0A) {
2529
+ data++;
2530
+ }
2531
+
2532
+ break;
2533
+
2534
+ case 0x09:
2535
+ case 0x20:
2536
+ case 0x0A:
2537
+ case 0x0C:
2538
+ data++;
2539
+ break;
2540
+ }
2541
+
2542
+ break;
2543
+ }
2544
+
2545
+ cp <<= 4;
2546
+ cp |= lexbor_str_res_map_hex[*data];
2547
+ }
2548
+
2549
+ if ((tkz->end - tkz->pos) < 5) {
2550
+ if (lxb_css_syntax_string_realloc(tkz, 1024) != LXB_STATUS_OK) {
2551
+ return NULL;
2552
+ }
2553
+ }
2554
+
2555
+ lxb_css_syntax_codepoint_to_ascii(tkz, cp);
2556
+
2557
+ return data;
2558
+ }
2559
+
2560
+ static const lxb_char_t *
2561
+ lxb_css_syntax_state_escaped_string(lxb_css_syntax_tokenizer_t *tkz,
2562
+ const lxb_char_t *data, const lxb_char_t **end)
2563
+ {
2564
+ lxb_status_t status;
2565
+
2566
+ /* U+000D CARRIAGE RETURN */
2567
+ if (*data == 0x0D) {
2568
+ data++;
2569
+
2570
+ if (data >= *end) {
2571
+ status = lxb_css_syntax_tokenizer_next_chunk(tkz, &data, end);
2572
+ if (status != LXB_STATUS_OK) {
2573
+ return NULL;
2574
+ }
2575
+
2576
+ if (data >= *end) {
2577
+ return data;
2578
+ }
2579
+ }
2580
+
2581
+ /* U+000A LINE FEED */
2582
+ if (*data == 0x0A) {
2583
+ data++;
2584
+ }
2585
+
2586
+ return data;
2587
+ }
2588
+
2589
+ if (*data == 0x00) {
2590
+ status = lxb_css_syntax_string_append_rep(tkz);
2591
+ if (status != LXB_STATUS_OK) {
2592
+ return NULL;
2593
+ }
2594
+
2595
+ return data + 1;
2596
+ }
2597
+
2598
+ if (*data == 0x0A || *data == 0x0C) {
2599
+ return data + 1;
2600
+ }
2601
+
2602
+ return lxb_css_syntax_state_escaped(tkz, data, end);
2603
+ }