nokolexbor 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (486) hide show
  1. checksums.yaml +7 -0
  2. data/ext/nokolexbor/config.h +186 -0
  3. data/ext/nokolexbor/extconf.rb +131 -0
  4. data/ext/nokolexbor/libxml/HTMLparser.h +320 -0
  5. data/ext/nokolexbor/libxml/SAX2.h +173 -0
  6. data/ext/nokolexbor/libxml/chvalid.h +230 -0
  7. data/ext/nokolexbor/libxml/debugXML.h +217 -0
  8. data/ext/nokolexbor/libxml/dict.h +81 -0
  9. data/ext/nokolexbor/libxml/encoding.h +232 -0
  10. data/ext/nokolexbor/libxml/entities.h +153 -0
  11. data/ext/nokolexbor/libxml/globals.h +529 -0
  12. data/ext/nokolexbor/libxml/hash.h +236 -0
  13. data/ext/nokolexbor/libxml/list.h +137 -0
  14. data/ext/nokolexbor/libxml/parser.h +1264 -0
  15. data/ext/nokolexbor/libxml/parserInternals.h +641 -0
  16. data/ext/nokolexbor/libxml/pattern.h +100 -0
  17. data/ext/nokolexbor/libxml/threads.h +94 -0
  18. data/ext/nokolexbor/libxml/tree.h +1315 -0
  19. data/ext/nokolexbor/libxml/uri.h +94 -0
  20. data/ext/nokolexbor/libxml/valid.h +448 -0
  21. data/ext/nokolexbor/libxml/xmlIO.h +369 -0
  22. data/ext/nokolexbor/libxml/xmlautomata.h +146 -0
  23. data/ext/nokolexbor/libxml/xmlerror.h +919 -0
  24. data/ext/nokolexbor/libxml/xmlexports.h +79 -0
  25. data/ext/nokolexbor/libxml/xmlmemory.h +226 -0
  26. data/ext/nokolexbor/libxml/xmlregexp.h +222 -0
  27. data/ext/nokolexbor/libxml/xmlstring.h +140 -0
  28. data/ext/nokolexbor/libxml/xmlversion.h +526 -0
  29. data/ext/nokolexbor/libxml/xpath.h +575 -0
  30. data/ext/nokolexbor/libxml/xpathInternals.h +632 -0
  31. data/ext/nokolexbor/libxml/xpointer.h +137 -0
  32. data/ext/nokolexbor/libxml.h +76 -0
  33. data/ext/nokolexbor/memory.c +39 -0
  34. data/ext/nokolexbor/nl_document.c +51 -0
  35. data/ext/nokolexbor/nl_node.c +790 -0
  36. data/ext/nokolexbor/nl_node_set.c +368 -0
  37. data/ext/nokolexbor/nl_xpath_context.c +200 -0
  38. data/ext/nokolexbor/nokolexbor.c +63 -0
  39. data/ext/nokolexbor/nokolexbor.h +37 -0
  40. data/ext/nokolexbor/private/buf.h +70 -0
  41. data/ext/nokolexbor/private/dict.h +11 -0
  42. data/ext/nokolexbor/private/enc.h +17 -0
  43. data/ext/nokolexbor/private/error.h +21 -0
  44. data/ext/nokolexbor/private/globals.h +9 -0
  45. data/ext/nokolexbor/private/memory.h +9 -0
  46. data/ext/nokolexbor/private/parser.h +27 -0
  47. data/ext/nokolexbor/private/string.h +9 -0
  48. data/ext/nokolexbor/private/threads.h +50 -0
  49. data/ext/nokolexbor/private/tree.h +18 -0
  50. data/ext/nokolexbor/private/xpath.h +7 -0
  51. data/ext/nokolexbor/timsort.h +601 -0
  52. data/ext/nokolexbor/xml_SAX2.c +80 -0
  53. data/ext/nokolexbor/xml_buf.c +363 -0
  54. data/ext/nokolexbor/xml_chvalid.c +334 -0
  55. data/ext/nokolexbor/xml_dict.c +1264 -0
  56. data/ext/nokolexbor/xml_encoding.c +124 -0
  57. data/ext/nokolexbor/xml_error.c +134 -0
  58. data/ext/nokolexbor/xml_globals.c +1085 -0
  59. data/ext/nokolexbor/xml_hash.c +1141 -0
  60. data/ext/nokolexbor/xml_memory.c +203 -0
  61. data/ext/nokolexbor/xml_parser.c +127 -0
  62. data/ext/nokolexbor/xml_parserInternals.c +338 -0
  63. data/ext/nokolexbor/xml_pattern.c +2375 -0
  64. data/ext/nokolexbor/xml_string.c +1051 -0
  65. data/ext/nokolexbor/xml_threads.c +881 -0
  66. data/ext/nokolexbor/xml_tree.c +148 -0
  67. data/ext/nokolexbor/xml_xpath.c +14743 -0
  68. data/lib/nokolexbor/attribute.rb +18 -0
  69. data/lib/nokolexbor/document.rb +6 -0
  70. data/lib/nokolexbor/node.rb +264 -0
  71. data/lib/nokolexbor/node_set.rb +124 -0
  72. data/lib/nokolexbor/version.rb +5 -0
  73. data/lib/nokolexbor/xpath_context.rb +14 -0
  74. data/lib/nokolexbor.rb +17 -0
  75. data/patches/0001-lexbor-support-text-pseudo-element.patch +137 -0
  76. data/patches/0002-lexbor-match-id-class-case-sensitive.patch +22 -0
  77. data/patches/0003-lexbor-attach-template-content-to-self.patch +13 -0
  78. data/vendor/lexbor/CMakeLists.txt +331 -0
  79. data/vendor/lexbor/config.cmake +890 -0
  80. data/vendor/lexbor/feature.cmake +134 -0
  81. data/vendor/lexbor/source/lexbor/core/array.c +208 -0
  82. data/vendor/lexbor/source/lexbor/core/array.h +100 -0
  83. data/vendor/lexbor/source/lexbor/core/array_obj.c +216 -0
  84. data/vendor/lexbor/source/lexbor/core/array_obj.h +134 -0
  85. data/vendor/lexbor/source/lexbor/core/avl.c +442 -0
  86. data/vendor/lexbor/source/lexbor/core/avl.h +82 -0
  87. data/vendor/lexbor/source/lexbor/core/base.h +86 -0
  88. data/vendor/lexbor/source/lexbor/core/bst.c +468 -0
  89. data/vendor/lexbor/source/lexbor/core/bst.h +108 -0
  90. data/vendor/lexbor/source/lexbor/core/bst_map.c +238 -0
  91. data/vendor/lexbor/source/lexbor/core/bst_map.h +87 -0
  92. data/vendor/lexbor/source/lexbor/core/config.cmake +12 -0
  93. data/vendor/lexbor/source/lexbor/core/conv.c +203 -0
  94. data/vendor/lexbor/source/lexbor/core/conv.h +53 -0
  95. data/vendor/lexbor/source/lexbor/core/core.h +35 -0
  96. data/vendor/lexbor/source/lexbor/core/def.h +57 -0
  97. data/vendor/lexbor/source/lexbor/core/diyfp.c +153 -0
  98. data/vendor/lexbor/source/lexbor/core/diyfp.h +258 -0
  99. data/vendor/lexbor/source/lexbor/core/dobject.c +187 -0
  100. data/vendor/lexbor/source/lexbor/core/dobject.h +92 -0
  101. data/vendor/lexbor/source/lexbor/core/dtoa.c +404 -0
  102. data/vendor/lexbor/source/lexbor/core/dtoa.h +28 -0
  103. data/vendor/lexbor/source/lexbor/core/fs.h +60 -0
  104. data/vendor/lexbor/source/lexbor/core/hash.c +476 -0
  105. data/vendor/lexbor/source/lexbor/core/hash.h +218 -0
  106. data/vendor/lexbor/source/lexbor/core/in.c +267 -0
  107. data/vendor/lexbor/source/lexbor/core/in.h +172 -0
  108. data/vendor/lexbor/source/lexbor/core/lexbor.h +35 -0
  109. data/vendor/lexbor/source/lexbor/core/mem.c +228 -0
  110. data/vendor/lexbor/source/lexbor/core/mem.h +141 -0
  111. data/vendor/lexbor/source/lexbor/core/mraw.c +428 -0
  112. data/vendor/lexbor/source/lexbor/core/mraw.h +114 -0
  113. data/vendor/lexbor/source/lexbor/core/perf.h +45 -0
  114. data/vendor/lexbor/source/lexbor/core/plog.c +73 -0
  115. data/vendor/lexbor/source/lexbor/core/plog.h +102 -0
  116. data/vendor/lexbor/source/lexbor/core/print.c +168 -0
  117. data/vendor/lexbor/source/lexbor/core/print.h +39 -0
  118. data/vendor/lexbor/source/lexbor/core/sbst.h +59 -0
  119. data/vendor/lexbor/source/lexbor/core/serialize.c +27 -0
  120. data/vendor/lexbor/source/lexbor/core/serialize.h +32 -0
  121. data/vendor/lexbor/source/lexbor/core/shs.c +118 -0
  122. data/vendor/lexbor/source/lexbor/core/shs.h +82 -0
  123. data/vendor/lexbor/source/lexbor/core/str.c +617 -0
  124. data/vendor/lexbor/source/lexbor/core/str.h +247 -0
  125. data/vendor/lexbor/source/lexbor/core/str_res.h +369 -0
  126. data/vendor/lexbor/source/lexbor/core/strtod.c +326 -0
  127. data/vendor/lexbor/source/lexbor/core/strtod.h +28 -0
  128. data/vendor/lexbor/source/lexbor/core/types.h +39 -0
  129. data/vendor/lexbor/source/lexbor/core/utils.c +43 -0
  130. data/vendor/lexbor/source/lexbor/core/utils.h +36 -0
  131. data/vendor/lexbor/source/lexbor/css/base.h +44 -0
  132. data/vendor/lexbor/source/lexbor/css/config.cmake +2 -0
  133. data/vendor/lexbor/source/lexbor/css/css.h +25 -0
  134. data/vendor/lexbor/source/lexbor/css/log.c +336 -0
  135. data/vendor/lexbor/source/lexbor/css/log.h +103 -0
  136. data/vendor/lexbor/source/lexbor/css/node.h +29 -0
  137. data/vendor/lexbor/source/lexbor/css/parser.c +473 -0
  138. data/vendor/lexbor/source/lexbor/css/parser.h +368 -0
  139. data/vendor/lexbor/source/lexbor/css/selectors/base.h +48 -0
  140. data/vendor/lexbor/source/lexbor/css/selectors/pseudo.c +91 -0
  141. data/vendor/lexbor/source/lexbor/css/selectors/pseudo.h +66 -0
  142. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_const.h +109 -0
  143. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_res.h +302 -0
  144. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.c +279 -0
  145. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.h +85 -0
  146. data/vendor/lexbor/source/lexbor/css/selectors/selector.c +927 -0
  147. data/vendor/lexbor/source/lexbor/css/selectors/selector.h +200 -0
  148. data/vendor/lexbor/source/lexbor/css/selectors/selectors.c +340 -0
  149. data/vendor/lexbor/source/lexbor/css/selectors/selectors.h +137 -0
  150. data/vendor/lexbor/source/lexbor/css/selectors/state.c +1718 -0
  151. data/vendor/lexbor/source/lexbor/css/selectors/state.h +79 -0
  152. data/vendor/lexbor/source/lexbor/css/stylesheet.h +37 -0
  153. data/vendor/lexbor/source/lexbor/css/syntax/anb.c +443 -0
  154. data/vendor/lexbor/source/lexbor/css/syntax/anb.h +45 -0
  155. data/vendor/lexbor/source/lexbor/css/syntax/base.h +33 -0
  156. data/vendor/lexbor/source/lexbor/css/syntax/parser.c +9 -0
  157. data/vendor/lexbor/source/lexbor/css/syntax/parser.h +25 -0
  158. data/vendor/lexbor/source/lexbor/css/syntax/res.h +48 -0
  159. data/vendor/lexbor/source/lexbor/css/syntax/state.c +2603 -0
  160. data/vendor/lexbor/source/lexbor/css/syntax/state.h +140 -0
  161. data/vendor/lexbor/source/lexbor/css/syntax/state_res.h +273 -0
  162. data/vendor/lexbor/source/lexbor/css/syntax/syntax.c +67 -0
  163. data/vendor/lexbor/source/lexbor/css/syntax/token.c +618 -0
  164. data/vendor/lexbor/source/lexbor/css/syntax/token.h +298 -0
  165. data/vendor/lexbor/source/lexbor/css/syntax/token_res.h +68 -0
  166. data/vendor/lexbor/source/lexbor/css/syntax/tokenizer/error.c +30 -0
  167. data/vendor/lexbor/source/lexbor/css/syntax/tokenizer/error.h +58 -0
  168. data/vendor/lexbor/source/lexbor/css/syntax/tokenizer.c +278 -0
  169. data/vendor/lexbor/source/lexbor/css/syntax/tokenizer.h +121 -0
  170. data/vendor/lexbor/source/lexbor/dom/base.h +32 -0
  171. data/vendor/lexbor/source/lexbor/dom/collection.c +97 -0
  172. data/vendor/lexbor/source/lexbor/dom/collection.h +112 -0
  173. data/vendor/lexbor/source/lexbor/dom/config.cmake +3 -0
  174. data/vendor/lexbor/source/lexbor/dom/dom.h +29 -0
  175. data/vendor/lexbor/source/lexbor/dom/exception.c +18 -0
  176. data/vendor/lexbor/source/lexbor/dom/exception.h +73 -0
  177. data/vendor/lexbor/source/lexbor/dom/interface.c +110 -0
  178. data/vendor/lexbor/source/lexbor/dom/interface.h +88 -0
  179. data/vendor/lexbor/source/lexbor/dom/interfaces/attr.c +445 -0
  180. data/vendor/lexbor/source/lexbor/dom/interfaces/attr.h +152 -0
  181. data/vendor/lexbor/source/lexbor/dom/interfaces/attr_const.h +62 -0
  182. data/vendor/lexbor/source/lexbor/dom/interfaces/attr_res.h +143 -0
  183. data/vendor/lexbor/source/lexbor/dom/interfaces/cdata_section.c +55 -0
  184. data/vendor/lexbor/source/lexbor/dom/interfaces/cdata_section.h +38 -0
  185. data/vendor/lexbor/source/lexbor/dom/interfaces/character_data.c +110 -0
  186. data/vendor/lexbor/source/lexbor/dom/interfaces/character_data.h +51 -0
  187. data/vendor/lexbor/source/lexbor/dom/interfaces/comment.c +64 -0
  188. data/vendor/lexbor/source/lexbor/dom/interfaces/comment.h +42 -0
  189. data/vendor/lexbor/source/lexbor/dom/interfaces/document.c +536 -0
  190. data/vendor/lexbor/source/lexbor/dom/interfaces/document.h +243 -0
  191. data/vendor/lexbor/source/lexbor/dom/interfaces/document_fragment.c +36 -0
  192. data/vendor/lexbor/source/lexbor/dom/interfaces/document_fragment.h +36 -0
  193. data/vendor/lexbor/source/lexbor/dom/interfaces/document_type.c +125 -0
  194. data/vendor/lexbor/source/lexbor/dom/interfaces/document_type.h +108 -0
  195. data/vendor/lexbor/source/lexbor/dom/interfaces/element.c +1411 -0
  196. data/vendor/lexbor/source/lexbor/dom/interfaces/element.h +319 -0
  197. data/vendor/lexbor/source/lexbor/dom/interfaces/event_target.c +32 -0
  198. data/vendor/lexbor/source/lexbor/dom/interfaces/event_target.h +34 -0
  199. data/vendor/lexbor/source/lexbor/dom/interfaces/node.c +661 -0
  200. data/vendor/lexbor/source/lexbor/dom/interfaces/node.h +192 -0
  201. data/vendor/lexbor/source/lexbor/dom/interfaces/processing_instruction.c +87 -0
  202. data/vendor/lexbor/source/lexbor/dom/interfaces/processing_instruction.h +66 -0
  203. data/vendor/lexbor/source/lexbor/dom/interfaces/shadow_root.c +36 -0
  204. data/vendor/lexbor/source/lexbor/dom/interfaces/shadow_root.h +44 -0
  205. data/vendor/lexbor/source/lexbor/dom/interfaces/text.c +63 -0
  206. data/vendor/lexbor/source/lexbor/dom/interfaces/text.h +42 -0
  207. data/vendor/lexbor/source/lexbor/encoding/base.h +218 -0
  208. data/vendor/lexbor/source/lexbor/encoding/big5.c +42839 -0
  209. data/vendor/lexbor/source/lexbor/encoding/config.cmake +12 -0
  210. data/vendor/lexbor/source/lexbor/encoding/const.h +65 -0
  211. data/vendor/lexbor/source/lexbor/encoding/decode.c +3193 -0
  212. data/vendor/lexbor/source/lexbor/encoding/decode.h +370 -0
  213. data/vendor/lexbor/source/lexbor/encoding/encode.c +1931 -0
  214. data/vendor/lexbor/source/lexbor/encoding/encode.h +377 -0
  215. data/vendor/lexbor/source/lexbor/encoding/encoding.c +252 -0
  216. data/vendor/lexbor/source/lexbor/encoding/encoding.h +475 -0
  217. data/vendor/lexbor/source/lexbor/encoding/euc_kr.c +53883 -0
  218. data/vendor/lexbor/source/lexbor/encoding/gb18030.c +47905 -0
  219. data/vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c +159 -0
  220. data/vendor/lexbor/source/lexbor/encoding/jis0208.c +22477 -0
  221. data/vendor/lexbor/source/lexbor/encoding/jis0212.c +15787 -0
  222. data/vendor/lexbor/source/lexbor/encoding/multi.h +53 -0
  223. data/vendor/lexbor/source/lexbor/encoding/range.c +71 -0
  224. data/vendor/lexbor/source/lexbor/encoding/range.h +34 -0
  225. data/vendor/lexbor/source/lexbor/encoding/res.c +222 -0
  226. data/vendor/lexbor/source/lexbor/encoding/res.h +34 -0
  227. data/vendor/lexbor/source/lexbor/encoding/single.c +13748 -0
  228. data/vendor/lexbor/source/lexbor/encoding/single.h +116 -0
  229. data/vendor/lexbor/source/lexbor/html/base.h +44 -0
  230. data/vendor/lexbor/source/lexbor/html/config.cmake +3 -0
  231. data/vendor/lexbor/source/lexbor/html/encoding.c +574 -0
  232. data/vendor/lexbor/source/lexbor/html/encoding.h +106 -0
  233. data/vendor/lexbor/source/lexbor/html/html.h +107 -0
  234. data/vendor/lexbor/source/lexbor/html/interface.c +165 -0
  235. data/vendor/lexbor/source/lexbor/html/interface.h +186 -0
  236. data/vendor/lexbor/source/lexbor/html/interface_res.h +4449 -0
  237. data/vendor/lexbor/source/lexbor/html/interfaces/anchor_element.c +36 -0
  238. data/vendor/lexbor/source/lexbor/html/interfaces/anchor_element.h +34 -0
  239. data/vendor/lexbor/source/lexbor/html/interfaces/area_element.c +36 -0
  240. data/vendor/lexbor/source/lexbor/html/interfaces/area_element.h +34 -0
  241. data/vendor/lexbor/source/lexbor/html/interfaces/audio_element.c +36 -0
  242. data/vendor/lexbor/source/lexbor/html/interfaces/audio_element.h +34 -0
  243. data/vendor/lexbor/source/lexbor/html/interfaces/base_element.c +36 -0
  244. data/vendor/lexbor/source/lexbor/html/interfaces/base_element.h +34 -0
  245. data/vendor/lexbor/source/lexbor/html/interfaces/body_element.c +36 -0
  246. data/vendor/lexbor/source/lexbor/html/interfaces/body_element.h +34 -0
  247. data/vendor/lexbor/source/lexbor/html/interfaces/br_element.c +36 -0
  248. data/vendor/lexbor/source/lexbor/html/interfaces/br_element.h +34 -0
  249. data/vendor/lexbor/source/lexbor/html/interfaces/button_element.c +36 -0
  250. data/vendor/lexbor/source/lexbor/html/interfaces/button_element.h +34 -0
  251. data/vendor/lexbor/source/lexbor/html/interfaces/canvas_element.c +36 -0
  252. data/vendor/lexbor/source/lexbor/html/interfaces/canvas_element.h +34 -0
  253. data/vendor/lexbor/source/lexbor/html/interfaces/d_list_element.c +36 -0
  254. data/vendor/lexbor/source/lexbor/html/interfaces/d_list_element.h +34 -0
  255. data/vendor/lexbor/source/lexbor/html/interfaces/data_element.c +36 -0
  256. data/vendor/lexbor/source/lexbor/html/interfaces/data_element.h +34 -0
  257. data/vendor/lexbor/source/lexbor/html/interfaces/data_list_element.c +36 -0
  258. data/vendor/lexbor/source/lexbor/html/interfaces/data_list_element.h +34 -0
  259. data/vendor/lexbor/source/lexbor/html/interfaces/details_element.c +36 -0
  260. data/vendor/lexbor/source/lexbor/html/interfaces/details_element.h +34 -0
  261. data/vendor/lexbor/source/lexbor/html/interfaces/dialog_element.c +36 -0
  262. data/vendor/lexbor/source/lexbor/html/interfaces/dialog_element.h +34 -0
  263. data/vendor/lexbor/source/lexbor/html/interfaces/directory_element.c +36 -0
  264. data/vendor/lexbor/source/lexbor/html/interfaces/directory_element.h +34 -0
  265. data/vendor/lexbor/source/lexbor/html/interfaces/div_element.c +36 -0
  266. data/vendor/lexbor/source/lexbor/html/interfaces/div_element.h +34 -0
  267. data/vendor/lexbor/source/lexbor/html/interfaces/document.c +444 -0
  268. data/vendor/lexbor/source/lexbor/html/interfaces/document.h +256 -0
  269. data/vendor/lexbor/source/lexbor/html/interfaces/element.c +64 -0
  270. data/vendor/lexbor/source/lexbor/html/interfaces/element.h +54 -0
  271. data/vendor/lexbor/source/lexbor/html/interfaces/embed_element.c +36 -0
  272. data/vendor/lexbor/source/lexbor/html/interfaces/embed_element.h +34 -0
  273. data/vendor/lexbor/source/lexbor/html/interfaces/field_set_element.c +36 -0
  274. data/vendor/lexbor/source/lexbor/html/interfaces/field_set_element.h +34 -0
  275. data/vendor/lexbor/source/lexbor/html/interfaces/font_element.c +36 -0
  276. data/vendor/lexbor/source/lexbor/html/interfaces/font_element.h +34 -0
  277. data/vendor/lexbor/source/lexbor/html/interfaces/form_element.c +36 -0
  278. data/vendor/lexbor/source/lexbor/html/interfaces/form_element.h +34 -0
  279. data/vendor/lexbor/source/lexbor/html/interfaces/frame_element.c +36 -0
  280. data/vendor/lexbor/source/lexbor/html/interfaces/frame_element.h +34 -0
  281. data/vendor/lexbor/source/lexbor/html/interfaces/frame_set_element.c +36 -0
  282. data/vendor/lexbor/source/lexbor/html/interfaces/frame_set_element.h +34 -0
  283. data/vendor/lexbor/source/lexbor/html/interfaces/head_element.c +36 -0
  284. data/vendor/lexbor/source/lexbor/html/interfaces/head_element.h +34 -0
  285. data/vendor/lexbor/source/lexbor/html/interfaces/heading_element.c +36 -0
  286. data/vendor/lexbor/source/lexbor/html/interfaces/heading_element.h +34 -0
  287. data/vendor/lexbor/source/lexbor/html/interfaces/hr_element.c +36 -0
  288. data/vendor/lexbor/source/lexbor/html/interfaces/hr_element.h +34 -0
  289. data/vendor/lexbor/source/lexbor/html/interfaces/html_element.c +36 -0
  290. data/vendor/lexbor/source/lexbor/html/interfaces/html_element.h +34 -0
  291. data/vendor/lexbor/source/lexbor/html/interfaces/iframe_element.c +36 -0
  292. data/vendor/lexbor/source/lexbor/html/interfaces/iframe_element.h +34 -0
  293. data/vendor/lexbor/source/lexbor/html/interfaces/image_element.c +36 -0
  294. data/vendor/lexbor/source/lexbor/html/interfaces/image_element.h +34 -0
  295. data/vendor/lexbor/source/lexbor/html/interfaces/input_element.c +36 -0
  296. data/vendor/lexbor/source/lexbor/html/interfaces/input_element.h +34 -0
  297. data/vendor/lexbor/source/lexbor/html/interfaces/label_element.c +36 -0
  298. data/vendor/lexbor/source/lexbor/html/interfaces/label_element.h +34 -0
  299. data/vendor/lexbor/source/lexbor/html/interfaces/legend_element.c +36 -0
  300. data/vendor/lexbor/source/lexbor/html/interfaces/legend_element.h +34 -0
  301. data/vendor/lexbor/source/lexbor/html/interfaces/li_element.c +36 -0
  302. data/vendor/lexbor/source/lexbor/html/interfaces/li_element.h +34 -0
  303. data/vendor/lexbor/source/lexbor/html/interfaces/link_element.c +36 -0
  304. data/vendor/lexbor/source/lexbor/html/interfaces/link_element.h +34 -0
  305. data/vendor/lexbor/source/lexbor/html/interfaces/map_element.c +36 -0
  306. data/vendor/lexbor/source/lexbor/html/interfaces/map_element.h +34 -0
  307. data/vendor/lexbor/source/lexbor/html/interfaces/marquee_element.c +36 -0
  308. data/vendor/lexbor/source/lexbor/html/interfaces/marquee_element.h +34 -0
  309. data/vendor/lexbor/source/lexbor/html/interfaces/media_element.c +36 -0
  310. data/vendor/lexbor/source/lexbor/html/interfaces/media_element.h +34 -0
  311. data/vendor/lexbor/source/lexbor/html/interfaces/menu_element.c +36 -0
  312. data/vendor/lexbor/source/lexbor/html/interfaces/menu_element.h +34 -0
  313. data/vendor/lexbor/source/lexbor/html/interfaces/meta_element.c +36 -0
  314. data/vendor/lexbor/source/lexbor/html/interfaces/meta_element.h +34 -0
  315. data/vendor/lexbor/source/lexbor/html/interfaces/meter_element.c +36 -0
  316. data/vendor/lexbor/source/lexbor/html/interfaces/meter_element.h +34 -0
  317. data/vendor/lexbor/source/lexbor/html/interfaces/mod_element.c +36 -0
  318. data/vendor/lexbor/source/lexbor/html/interfaces/mod_element.h +34 -0
  319. data/vendor/lexbor/source/lexbor/html/interfaces/o_list_element.c +36 -0
  320. data/vendor/lexbor/source/lexbor/html/interfaces/o_list_element.h +34 -0
  321. data/vendor/lexbor/source/lexbor/html/interfaces/object_element.c +36 -0
  322. data/vendor/lexbor/source/lexbor/html/interfaces/object_element.h +34 -0
  323. data/vendor/lexbor/source/lexbor/html/interfaces/opt_group_element.c +36 -0
  324. data/vendor/lexbor/source/lexbor/html/interfaces/opt_group_element.h +34 -0
  325. data/vendor/lexbor/source/lexbor/html/interfaces/option_element.c +36 -0
  326. data/vendor/lexbor/source/lexbor/html/interfaces/option_element.h +34 -0
  327. data/vendor/lexbor/source/lexbor/html/interfaces/output_element.c +36 -0
  328. data/vendor/lexbor/source/lexbor/html/interfaces/output_element.h +34 -0
  329. data/vendor/lexbor/source/lexbor/html/interfaces/paragraph_element.c +36 -0
  330. data/vendor/lexbor/source/lexbor/html/interfaces/paragraph_element.h +34 -0
  331. data/vendor/lexbor/source/lexbor/html/interfaces/param_element.c +36 -0
  332. data/vendor/lexbor/source/lexbor/html/interfaces/param_element.h +34 -0
  333. data/vendor/lexbor/source/lexbor/html/interfaces/picture_element.c +36 -0
  334. data/vendor/lexbor/source/lexbor/html/interfaces/picture_element.h +34 -0
  335. data/vendor/lexbor/source/lexbor/html/interfaces/pre_element.c +36 -0
  336. data/vendor/lexbor/source/lexbor/html/interfaces/pre_element.h +34 -0
  337. data/vendor/lexbor/source/lexbor/html/interfaces/progress_element.c +36 -0
  338. data/vendor/lexbor/source/lexbor/html/interfaces/progress_element.h +34 -0
  339. data/vendor/lexbor/source/lexbor/html/interfaces/quote_element.c +36 -0
  340. data/vendor/lexbor/source/lexbor/html/interfaces/quote_element.h +34 -0
  341. data/vendor/lexbor/source/lexbor/html/interfaces/script_element.c +36 -0
  342. data/vendor/lexbor/source/lexbor/html/interfaces/script_element.h +34 -0
  343. data/vendor/lexbor/source/lexbor/html/interfaces/select_element.c +36 -0
  344. data/vendor/lexbor/source/lexbor/html/interfaces/select_element.h +34 -0
  345. data/vendor/lexbor/source/lexbor/html/interfaces/slot_element.c +36 -0
  346. data/vendor/lexbor/source/lexbor/html/interfaces/slot_element.h +34 -0
  347. data/vendor/lexbor/source/lexbor/html/interfaces/source_element.c +36 -0
  348. data/vendor/lexbor/source/lexbor/html/interfaces/source_element.h +34 -0
  349. data/vendor/lexbor/source/lexbor/html/interfaces/span_element.c +36 -0
  350. data/vendor/lexbor/source/lexbor/html/interfaces/span_element.h +34 -0
  351. data/vendor/lexbor/source/lexbor/html/interfaces/style_element.c +36 -0
  352. data/vendor/lexbor/source/lexbor/html/interfaces/style_element.h +34 -0
  353. data/vendor/lexbor/source/lexbor/html/interfaces/table_caption_element.c +36 -0
  354. data/vendor/lexbor/source/lexbor/html/interfaces/table_caption_element.h +34 -0
  355. data/vendor/lexbor/source/lexbor/html/interfaces/table_cell_element.c +36 -0
  356. data/vendor/lexbor/source/lexbor/html/interfaces/table_cell_element.h +34 -0
  357. data/vendor/lexbor/source/lexbor/html/interfaces/table_col_element.c +36 -0
  358. data/vendor/lexbor/source/lexbor/html/interfaces/table_col_element.h +34 -0
  359. data/vendor/lexbor/source/lexbor/html/interfaces/table_element.c +36 -0
  360. data/vendor/lexbor/source/lexbor/html/interfaces/table_element.h +34 -0
  361. data/vendor/lexbor/source/lexbor/html/interfaces/table_row_element.c +36 -0
  362. data/vendor/lexbor/source/lexbor/html/interfaces/table_row_element.h +34 -0
  363. data/vendor/lexbor/source/lexbor/html/interfaces/table_section_element.c +36 -0
  364. data/vendor/lexbor/source/lexbor/html/interfaces/table_section_element.h +34 -0
  365. data/vendor/lexbor/source/lexbor/html/interfaces/template_element.c +46 -0
  366. data/vendor/lexbor/source/lexbor/html/interfaces/template_element.h +38 -0
  367. data/vendor/lexbor/source/lexbor/html/interfaces/text_area_element.c +36 -0
  368. data/vendor/lexbor/source/lexbor/html/interfaces/text_area_element.h +34 -0
  369. data/vendor/lexbor/source/lexbor/html/interfaces/time_element.c +36 -0
  370. data/vendor/lexbor/source/lexbor/html/interfaces/time_element.h +34 -0
  371. data/vendor/lexbor/source/lexbor/html/interfaces/title_element.c +133 -0
  372. data/vendor/lexbor/source/lexbor/html/interfaces/title_element.h +42 -0
  373. data/vendor/lexbor/source/lexbor/html/interfaces/track_element.c +36 -0
  374. data/vendor/lexbor/source/lexbor/html/interfaces/track_element.h +34 -0
  375. data/vendor/lexbor/source/lexbor/html/interfaces/u_list_element.c +36 -0
  376. data/vendor/lexbor/source/lexbor/html/interfaces/u_list_element.h +34 -0
  377. data/vendor/lexbor/source/lexbor/html/interfaces/unknown_element.c +36 -0
  378. data/vendor/lexbor/source/lexbor/html/interfaces/unknown_element.h +34 -0
  379. data/vendor/lexbor/source/lexbor/html/interfaces/video_element.c +36 -0
  380. data/vendor/lexbor/source/lexbor/html/interfaces/video_element.h +34 -0
  381. data/vendor/lexbor/source/lexbor/html/interfaces/window.c +36 -0
  382. data/vendor/lexbor/source/lexbor/html/interfaces/window.h +34 -0
  383. data/vendor/lexbor/source/lexbor/html/node.c +14 -0
  384. data/vendor/lexbor/source/lexbor/html/node.h +67 -0
  385. data/vendor/lexbor/source/lexbor/html/parser.c +469 -0
  386. data/vendor/lexbor/source/lexbor/html/parser.h +170 -0
  387. data/vendor/lexbor/source/lexbor/html/serialize.c +1510 -0
  388. data/vendor/lexbor/source/lexbor/html/serialize.h +93 -0
  389. data/vendor/lexbor/source/lexbor/html/tag.h +103 -0
  390. data/vendor/lexbor/source/lexbor/html/tag_res.h +2262 -0
  391. data/vendor/lexbor/source/lexbor/html/token.c +386 -0
  392. data/vendor/lexbor/source/lexbor/html/token.h +130 -0
  393. data/vendor/lexbor/source/lexbor/html/token_attr.c +44 -0
  394. data/vendor/lexbor/source/lexbor/html/token_attr.h +67 -0
  395. data/vendor/lexbor/source/lexbor/html/tokenizer/error.c +28 -0
  396. data/vendor/lexbor/source/lexbor/html/tokenizer/error.h +141 -0
  397. data/vendor/lexbor/source/lexbor/html/tokenizer/res.h +4956 -0
  398. data/vendor/lexbor/source/lexbor/html/tokenizer/state.c +2171 -0
  399. data/vendor/lexbor/source/lexbor/html/tokenizer/state.h +225 -0
  400. data/vendor/lexbor/source/lexbor/html/tokenizer/state_comment.c +489 -0
  401. data/vendor/lexbor/source/lexbor/html/tokenizer/state_comment.h +27 -0
  402. data/vendor/lexbor/source/lexbor/html/tokenizer/state_doctype.c +1654 -0
  403. data/vendor/lexbor/source/lexbor/html/tokenizer/state_doctype.h +27 -0
  404. data/vendor/lexbor/source/lexbor/html/tokenizer/state_rawtext.c +303 -0
  405. data/vendor/lexbor/source/lexbor/html/tokenizer/state_rawtext.h +32 -0
  406. data/vendor/lexbor/source/lexbor/html/tokenizer/state_rcdata.c +311 -0
  407. data/vendor/lexbor/source/lexbor/html/tokenizer/state_rcdata.h +32 -0
  408. data/vendor/lexbor/source/lexbor/html/tokenizer/state_script.c +1209 -0
  409. data/vendor/lexbor/source/lexbor/html/tokenizer/state_script.h +32 -0
  410. data/vendor/lexbor/source/lexbor/html/tokenizer.c +499 -0
  411. data/vendor/lexbor/source/lexbor/html/tokenizer.h +343 -0
  412. data/vendor/lexbor/source/lexbor/html/tree/active_formatting.c +241 -0
  413. data/vendor/lexbor/source/lexbor/html/tree/active_formatting.h +117 -0
  414. data/vendor/lexbor/source/lexbor/html/tree/error.c +26 -0
  415. data/vendor/lexbor/source/lexbor/html/tree/error.h +114 -0
  416. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_after_body.c +62 -0
  417. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_after_frameset.c +63 -0
  418. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_body.c +82 -0
  419. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_frameset.c +88 -0
  420. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/after_head.c +222 -0
  421. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/before_head.c +144 -0
  422. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/before_html.c +166 -0
  423. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/foreign_content.c +358 -0
  424. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_body.c +1974 -0
  425. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_caption.c +158 -0
  426. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_cell.c +187 -0
  427. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_column_group.c +194 -0
  428. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_frameset.c +149 -0
  429. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_head.c +374 -0
  430. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_head_noscript.c +121 -0
  431. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_row.c +211 -0
  432. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_select.c +341 -0
  433. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_select_in_table.c +115 -0
  434. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_table.c +451 -0
  435. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_table_body.c +208 -0
  436. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_table_text.c +127 -0
  437. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_template.c +189 -0
  438. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/initial.c +411 -0
  439. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/text.c +61 -0
  440. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode.h +135 -0
  441. data/vendor/lexbor/source/lexbor/html/tree/open_elements.c +251 -0
  442. data/vendor/lexbor/source/lexbor/html/tree/open_elements.h +105 -0
  443. data/vendor/lexbor/source/lexbor/html/tree/template_insertion.c +10 -0
  444. data/vendor/lexbor/source/lexbor/html/tree/template_insertion.h +100 -0
  445. data/vendor/lexbor/source/lexbor/html/tree.c +1726 -0
  446. data/vendor/lexbor/source/lexbor/html/tree.h +431 -0
  447. data/vendor/lexbor/source/lexbor/html/tree_res.h +111 -0
  448. data/vendor/lexbor/source/lexbor/ns/base.h +32 -0
  449. data/vendor/lexbor/source/lexbor/ns/config.cmake +2 -0
  450. data/vendor/lexbor/source/lexbor/ns/const.h +37 -0
  451. data/vendor/lexbor/source/lexbor/ns/ns.c +154 -0
  452. data/vendor/lexbor/source/lexbor/ns/ns.h +66 -0
  453. data/vendor/lexbor/source/lexbor/ns/res.h +97 -0
  454. data/vendor/lexbor/source/lexbor/ports/posix/config.cmake +11 -0
  455. data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/fs.c +236 -0
  456. data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/memory.c +33 -0
  457. data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/perf.c +158 -0
  458. data/vendor/lexbor/source/lexbor/ports/windows_nt/config.cmake +18 -0
  459. data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/fs.c +239 -0
  460. data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/memory.c +33 -0
  461. data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/perf.c +81 -0
  462. data/vendor/lexbor/source/lexbor/selectors/base.h +30 -0
  463. data/vendor/lexbor/source/lexbor/selectors/config.cmake +2 -0
  464. data/vendor/lexbor/source/lexbor/selectors/selectors.c +1591 -0
  465. data/vendor/lexbor/source/lexbor/selectors/selectors.h +71 -0
  466. data/vendor/lexbor/source/lexbor/tag/base.h +32 -0
  467. data/vendor/lexbor/source/lexbor/tag/config.cmake +2 -0
  468. data/vendor/lexbor/source/lexbor/tag/const.h +225 -0
  469. data/vendor/lexbor/source/lexbor/tag/res.h +562 -0
  470. data/vendor/lexbor/source/lexbor/tag/tag.c +144 -0
  471. data/vendor/lexbor/source/lexbor/tag/tag.h +123 -0
  472. data/vendor/lexbor/source/lexbor/utils/base.h +32 -0
  473. data/vendor/lexbor/source/lexbor/utils/config.cmake +2 -0
  474. data/vendor/lexbor/source/lexbor/utils/http.c +534 -0
  475. data/vendor/lexbor/source/lexbor/utils/http.h +90 -0
  476. data/vendor/lexbor/source/lexbor/utils/utils.h +15 -0
  477. data/vendor/lexbor/source/lexbor/utils/warc.c +817 -0
  478. data/vendor/lexbor/source/lexbor/utils/warc.h +126 -0
  479. data/vendor/lexbor/utils/lexbor/css/selectors/pseudo.py +231 -0
  480. data/vendor/lexbor/utils/lexbor/css/selectors/tmp/const.h +21 -0
  481. data/vendor/lexbor/utils/lexbor/css/selectors/tmp/res.h +26 -0
  482. data/vendor/lexbor/utils/lexbor/css/syntax/definitions.py +49 -0
  483. data/vendor/lexbor/utils/lexbor/css/syntax/token_res.py +54 -0
  484. data/vendor/lexbor/utils/lexbor/css/syntax/tokenizer_code_map.py +36 -0
  485. data/vendor/lexbor/version +1 -0
  486. metadata +542 -0
@@ -0,0 +1,2603 @@
1
+ /*
2
+ * Copyright (C) 2018-2020 Alexander Borisov
3
+ *
4
+ * Author: Alexander Borisov <borisov@lexbor.com>
5
+ */
6
+
7
+ #include <string.h>
8
+ #include <float.h>
9
+
10
+ #include "lexbor/core/utils.h"
11
+ #include "lexbor/core/strtod.h"
12
+
13
+ #include "lexbor/css/syntax/state.h"
14
+ #include "lexbor/css/syntax/tokenizer/error.h"
15
+
16
+ #define LXB_CSS_SYNTAX_RES_NAME_MAP
17
+ #include "lexbor/css/syntax/res.h"
18
+
19
+ #define LEXBOR_STR_RES_MAP_HEX
20
+ #define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER
21
+ #include "lexbor/core/str_res.h"
22
+
23
+
24
+ #define LXB_CSS_SYNTAX_NEXT_CHUNK(_tkz, _status, _data, _end) \
25
+ do { \
26
+ _status = lxb_css_syntax_tokenizer_next_chunk(_tkz, &_data, &_end); \
27
+ if (_status != LXB_STATUS_OK) { \
28
+ return NULL; \
29
+ } \
30
+ } \
31
+ while (0)
32
+
33
+
34
+ #define LXB_CSS_SYNTAX_STR_APPEND_LEN(_tkz, _status, _begin, _length) \
35
+ do { \
36
+ _status = lxb_css_syntax_string_append(_tkz, _begin, _length); \
37
+ if (_status != LXB_STATUS_OK) { \
38
+ return NULL; \
39
+ } \
40
+ } \
41
+ while (0)
42
+
43
+ #define LXB_CSS_SYNTAX_STR_APPEND(_tkz, _status, _begin, _end) \
44
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(_tkz, _status, _begin, (_end - _begin))
45
+
46
+
47
+ lxb_status_t
48
+ lxb_css_syntax_tokenizer_next_chunk(lxb_css_syntax_tokenizer_t *tkz,
49
+ const lxb_char_t **data, const lxb_char_t **end);
50
+
51
+ lxb_status_t
52
+ lxb_css_syntax_state_tokens_realloc(lxb_css_syntax_tokenizer_t *tkz);
53
+
54
+
55
+ static const lxb_char_t *
56
+ lxb_css_syntax_state_consume_numeric(lxb_css_syntax_tokenizer_t *tkz,
57
+ lxb_css_syntax_token_t *token,
58
+ const lxb_char_t *data,
59
+ const lxb_char_t *end);
60
+
61
+ static const lxb_char_t *
62
+ lxb_css_syntax_state_decimal(lxb_css_syntax_tokenizer_t *tkz,
63
+ lxb_css_syntax_token_t *token,
64
+ lxb_char_t *buf_start, lxb_char_t *buf_end,
65
+ const lxb_char_t *data, const lxb_char_t *end);
66
+
67
+ static const lxb_char_t *
68
+ lxb_css_syntax_state_consume_numeric_name_start(lxb_css_syntax_tokenizer_t *tkz,
69
+ lxb_css_syntax_token_t *token,
70
+ const lxb_char_t *data,
71
+ const lxb_char_t *end);
72
+
73
+ static const lxb_char_t *
74
+ lxb_css_syntax_state_consume_ident(lxb_css_syntax_tokenizer_t *tkz,
75
+ lxb_css_syntax_token_t *token,
76
+ const lxb_char_t *data, const lxb_char_t *end);
77
+
78
+ static const lxb_char_t *
79
+ lxb_css_syntax_state_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
80
+ const lxb_char_t *data, const lxb_char_t *end);
81
+
82
+ static const lxb_char_t *
83
+ lxb_css_syntax_state_bad_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
84
+ const lxb_char_t *data, const lxb_char_t *end);
85
+
86
+ static const lxb_char_t *
87
+ lxb_css_syntax_state_escaped(lxb_css_syntax_tokenizer_t *tkz,
88
+ const lxb_char_t *data, const lxb_char_t **end);
89
+
90
+ static const lxb_char_t *
91
+ lxb_css_syntax_state_escaped_string(lxb_css_syntax_tokenizer_t *tkz,
92
+ const lxb_char_t *data, const lxb_char_t **end);
93
+
94
+
95
+ lxb_inline lxb_status_t
96
+ lxb_css_syntax_string_realloc(lxb_css_syntax_tokenizer_t *tkz, size_t upto)
97
+ {
98
+ size_t len = tkz->pos - tkz->start;
99
+ size_t size = (tkz->end - tkz->start) + upto;
100
+
101
+ lxb_char_t *tmp = lexbor_realloc(tkz->start, size);
102
+ if (tmp == NULL) {
103
+ tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
104
+ return tkz->status;
105
+ }
106
+
107
+ tkz->start = tmp;
108
+ tkz->pos = tmp + len;
109
+ tkz->end = tmp + size;
110
+
111
+ return LXB_STATUS_OK;
112
+ }
113
+
114
+ lxb_inline lxb_status_t
115
+ lxb_css_syntax_string_append(lxb_css_syntax_tokenizer_t *tkz,
116
+ const lxb_char_t *data, size_t length)
117
+ {
118
+ if ((size_t) (tkz->end - tkz->pos) <= length) {
119
+ if (lxb_css_syntax_string_realloc(tkz, length + 1024) != LXB_STATUS_OK) {
120
+ return tkz->status;
121
+ }
122
+ }
123
+
124
+ memcpy(tkz->pos, data, length);
125
+
126
+ tkz->pos += length;
127
+
128
+ return LXB_STATUS_OK;
129
+ }
130
+
131
+ lxb_inline lxb_status_t
132
+ lxb_css_syntax_state_string_term(lxb_css_syntax_tokenizer_t *tkz)
133
+ {
134
+ if (tkz->pos >= tkz->end) {
135
+ if (lxb_css_syntax_string_realloc(tkz, 1024) != LXB_STATUS_OK) {
136
+ return tkz->status;
137
+ }
138
+ }
139
+
140
+ *tkz->pos = 0x00;
141
+
142
+ return LXB_STATUS_OK;
143
+ }
144
+
145
+
146
+ lxb_inline const lxb_char_t *
147
+ lxb_css_syntax_state_string_set(lxb_css_syntax_tokenizer_t *tkz,
148
+ lxb_css_syntax_token_t *token,
149
+ const lxb_char_t *data)
150
+ {
151
+ if(lxb_css_syntax_state_string_term(tkz) != LXB_STATUS_OK) {
152
+ return NULL;
153
+ }
154
+
155
+ lxb_css_syntax_token_string(token)->data = tkz->start;
156
+ lxb_css_syntax_token_string(token)->length = tkz->pos - tkz->start;
157
+
158
+ tkz->pos = tkz->start;
159
+
160
+ return data;
161
+ }
162
+
163
+ lxb_inline const lxb_char_t *
164
+ lxb_css_syntax_state_dimension_set(lxb_css_syntax_tokenizer_t *tkz,
165
+ lxb_css_syntax_token_t *token,
166
+ const lxb_char_t *data)
167
+ {
168
+ if(lxb_css_syntax_state_string_term(tkz) != LXB_STATUS_OK) {
169
+ return NULL;
170
+ }
171
+
172
+ lxb_css_syntax_token_dimension_string(token)->data = tkz->start;
173
+ lxb_css_syntax_token_dimension_string(token)->length = tkz->pos - tkz->start;
174
+
175
+ tkz->pos = tkz->start;
176
+
177
+ return data;
178
+ }
179
+
180
+ static lxb_css_syntax_token_t *
181
+ lxb_css_syntax_tokenizer_token_append(lxb_css_syntax_tokenizer_t *tkz)
182
+ {
183
+ if (tkz->prepared == NULL) {
184
+ if (tkz->last >= tkz->tokens_end) {
185
+ tkz->status = lxb_css_syntax_state_tokens_realloc(tkz);
186
+ if (tkz->status != LXB_STATUS_OK) {
187
+ return NULL;
188
+ }
189
+ }
190
+
191
+ tkz->prepared = tkz->last;
192
+ tkz->prepared->cloned = false;
193
+
194
+ return tkz->last++;
195
+ }
196
+
197
+ lxb_css_syntax_token_t *first;
198
+ size_t length = tkz->last - tkz->prepared;
199
+
200
+ if ((tkz->last + length) >= tkz->tokens_end) {
201
+ tkz->status = lxb_css_syntax_state_tokens_realloc(tkz);
202
+ if (tkz->status != LXB_STATUS_OK) {
203
+ return NULL;
204
+ }
205
+ }
206
+
207
+ first = tkz->prepared;
208
+
209
+ memmove(&first[1], first, length * sizeof(lxb_css_syntax_token_t));
210
+
211
+ tkz->last++;
212
+ first->cloned = false;
213
+
214
+ return first;
215
+ }
216
+
217
+ lxb_status_t
218
+ lxb_css_syntax_state_tokens_realloc(lxb_css_syntax_tokenizer_t *tkz)
219
+ {
220
+ lxb_css_syntax_token_t *tokens;
221
+
222
+ static const unsigned length = 64;
223
+ size_t new_length = (tkz->tokens_end - tkz->tokens_begin) + length;
224
+
225
+ tokens = lexbor_calloc(new_length, sizeof(lxb_css_syntax_token_t));
226
+ if (tokens == NULL) {
227
+ return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
228
+ }
229
+
230
+ memcpy(tokens, tkz->token, (tkz->last - tkz->token)
231
+ * sizeof(lxb_css_syntax_token_t));
232
+
233
+ if (tkz->prepared != NULL) {
234
+ tkz->prepared = tokens + (tkz->prepared - tkz->token);
235
+ }
236
+
237
+ tkz->token = tokens;
238
+ tkz->last = tokens + (tkz->last - tkz->tokens_begin);
239
+
240
+ lexbor_free(tkz->tokens_begin);
241
+
242
+ tkz->tokens_begin = tokens;
243
+ tkz->tokens_end = tokens + new_length;
244
+
245
+ return LXB_STATUS_OK;
246
+ }
247
+
248
+ /*
249
+ * Delim
250
+ */
251
+ lxb_inline lxb_css_syntax_token_t *
252
+ lxb_css_syntax_list_append_delim(lxb_css_syntax_tokenizer_t *tkz,
253
+ const lxb_char_t *data,
254
+ const lxb_char_t *end, lxb_char_t ch)
255
+ {
256
+ lxb_css_syntax_token_t *delim;
257
+
258
+ delim = lxb_css_syntax_tokenizer_token_append(tkz);
259
+ if (delim == NULL) {
260
+ return NULL;
261
+ }
262
+
263
+ delim->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
264
+
265
+ lxb_css_syntax_token_base(delim)->begin = data;
266
+ lxb_css_syntax_token_base(delim)->end = end;
267
+ lxb_css_syntax_token_delim(delim)->character = ch;
268
+
269
+ return delim;
270
+ }
271
+
272
+ lxb_inline void
273
+ lxb_css_syntax_state_delim_set(lxb_css_syntax_token_t *token, const lxb_char_t *begin,
274
+ const lxb_char_t *end, lxb_char_t ch)
275
+ {
276
+ lxb_css_syntax_token_delim(token)->character = ch;
277
+ lxb_css_syntax_token_base(token)->begin = begin;
278
+ lxb_css_syntax_token_base(token)->end = end;
279
+
280
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
281
+ }
282
+
283
+ const lxb_char_t *
284
+ lxb_css_syntax_state_delim(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
285
+ const lxb_char_t *data, const lxb_char_t *end)
286
+ {
287
+ lxb_css_syntax_state_delim_set(token, data, data + 1, *data);
288
+
289
+ return data + 1;
290
+ }
291
+
292
+ /*
293
+ * Comment
294
+ */
295
+ const lxb_char_t *
296
+ lxb_css_syntax_state_comment(lxb_css_syntax_tokenizer_t *tkz,
297
+ lxb_css_syntax_token_t *token,
298
+ const lxb_char_t *data, const lxb_char_t *end)
299
+ {
300
+ lxb_status_t status;
301
+ const lxb_char_t *begin;
302
+
303
+ lxb_css_syntax_token_base(token)->begin = data;
304
+
305
+ /* Skip forward slash (/) */
306
+ data++;
307
+
308
+ if (data >= end) {
309
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
310
+ if (data >= end) {
311
+ goto delim;
312
+ }
313
+ }
314
+
315
+ /* U+002A ASTERISK (*) */
316
+ if (*data != 0x2A) {
317
+ goto delim;
318
+ }
319
+
320
+ begin = data + 1;
321
+
322
+ do {
323
+ data++;
324
+
325
+ if (data >= end) {
326
+ if (begin < data) {
327
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
328
+ }
329
+
330
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
331
+ if (data >= end) {
332
+ goto error;
333
+ }
334
+
335
+ begin = data;
336
+ }
337
+
338
+ switch (*data) {
339
+ case 0x00:
340
+ if (begin < data) {
341
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
342
+ }
343
+
344
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
345
+ lexbor_str_res_ansi_replacement_character,
346
+ sizeof(lexbor_str_res_ansi_replacement_character) - 1);
347
+ begin = data + 1;
348
+ break;
349
+
350
+ case 0x0D:
351
+ data++;
352
+
353
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
354
+
355
+ tkz->pos[-1] = '\n';
356
+
357
+ if (data >= end) {
358
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
359
+ if (data >= end) {
360
+ goto error;
361
+ }
362
+ }
363
+
364
+ if (*data != 0x0A) {
365
+ data--;
366
+ }
367
+
368
+ begin = data + 1;
369
+ break;
370
+
371
+ case 0x0C:
372
+ if (begin < data) {
373
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
374
+ }
375
+
376
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
377
+ (lxb_char_t *) "\n", 1);
378
+ begin = data + 1;
379
+ break;
380
+
381
+ /* U+002A ASTERISK (*) */
382
+ case 0x2A:
383
+ data++;
384
+
385
+ if (data >= end) {
386
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
387
+
388
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
389
+ if (data >= end) {
390
+ goto error;
391
+ }
392
+
393
+ if (*data == 0x2F) {
394
+ tkz->pos--;
395
+ *tkz->pos = 0x00;
396
+
397
+ data++;
398
+
399
+ goto done;
400
+ }
401
+
402
+ begin = data;
403
+ }
404
+
405
+ /* U+002F Forward slash (/) */
406
+ if (*data == 0x2F) {
407
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, (data - 1));
408
+
409
+ data++;
410
+
411
+ goto done;
412
+ }
413
+
414
+ data--;
415
+ break;
416
+ }
417
+ }
418
+ while (true);
419
+
420
+ done:
421
+
422
+ token->type = LXB_CSS_SYNTAX_TOKEN_COMMENT;
423
+
424
+ lxb_css_syntax_token_base(token)->end = data;
425
+ return lxb_css_syntax_state_string_set(tkz, token, data);
426
+
427
+ delim:
428
+
429
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
430
+
431
+ lxb_css_syntax_token_base(token)->end = lxb_css_syntax_token_base(token)->begin + 1;
432
+ lxb_css_syntax_token_delim(token)->character = '/';
433
+
434
+ return data;
435
+
436
+ error:
437
+
438
+ token->type = LXB_CSS_SYNTAX_TOKEN_COMMENT;
439
+
440
+ lxb_css_syntax_token_base(token)->end = data;
441
+
442
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, NULL,
443
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINCO);
444
+
445
+ return lxb_css_syntax_state_string_set(tkz, token, data);
446
+ }
447
+
448
+ /*
449
+ * Whitespace
450
+ */
451
+ const lxb_char_t *
452
+ lxb_css_syntax_state_whitespace(lxb_css_syntax_tokenizer_t *tkz,
453
+ lxb_css_syntax_token_t *token,
454
+ const lxb_char_t *data, const lxb_char_t *end)
455
+ {
456
+ lxb_status_t status;
457
+ const lxb_char_t *begin;
458
+
459
+ token->type = LXB_CSS_SYNTAX_TOKEN_WHITESPACE;
460
+
461
+ lxb_css_syntax_token_base(token)->begin = data;
462
+
463
+ begin = data;
464
+
465
+ do {
466
+ switch (*data) {
467
+ case 0x0D:
468
+ data++;
469
+
470
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
471
+
472
+ tkz->pos[-1] = '\n';
473
+
474
+ if (data >= end) {
475
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
476
+ if (data >= end) {
477
+ goto done;
478
+ }
479
+ }
480
+
481
+ if (*data != 0x0A) {
482
+ data--;
483
+ }
484
+
485
+ begin = data + 1;
486
+ break;
487
+
488
+ case 0x0C:
489
+ if (begin < data) {
490
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
491
+ }
492
+
493
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
494
+ (const lxb_char_t *) "\n", 1);
495
+ begin = data + 1;
496
+ break;
497
+
498
+ case 0x09:
499
+ case 0x20:
500
+ case 0x0A:
501
+ break;
502
+
503
+ default:
504
+ if (begin < data) {
505
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
506
+ }
507
+
508
+ lxb_css_syntax_token_base(token)->end = data;
509
+
510
+ return lxb_css_syntax_state_string_set(tkz, token, data);
511
+ }
512
+
513
+ data++;
514
+
515
+ if (data >= end) {
516
+ if (begin < data) {
517
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
518
+ }
519
+
520
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
521
+ if (data >= end) {
522
+ break;
523
+ }
524
+
525
+ begin = data;
526
+ }
527
+ }
528
+ while (true);
529
+
530
+ done:
531
+
532
+ lxb_css_syntax_token_base(token)->end = data;
533
+
534
+ return lxb_css_syntax_state_string_set(tkz, token, data);
535
+ }
536
+
537
+ /*
538
+ * String token for U+0022 Quotation Mark (") and U+0027 Apostrophe (')
539
+ */
540
+ const lxb_char_t *
541
+ lxb_css_syntax_state_string(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
542
+ const lxb_char_t *data, const lxb_char_t *end)
543
+ {
544
+ lxb_char_t mark;
545
+ lxb_status_t status;
546
+ const lxb_char_t *begin;
547
+
548
+ lxb_css_syntax_token_base(token)->begin = data;
549
+
550
+ mark = *data++;
551
+ begin = data;
552
+
553
+ for (;; data++) {
554
+ if (data >= end) {
555
+ if (begin < data) {
556
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
557
+ }
558
+
559
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
560
+ if (data >= end) {
561
+ goto error;
562
+ }
563
+
564
+ begin = data;
565
+ }
566
+
567
+ switch (*data) {
568
+ case 0x00:
569
+ if (begin < data) {
570
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
571
+ }
572
+
573
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
574
+ lexbor_str_res_ansi_replacement_character,
575
+ sizeof(lexbor_str_res_ansi_replacement_character) - 1);
576
+ begin = data + 1;
577
+ break;
578
+
579
+ /*
580
+ * U+000A LINE FEED
581
+ * U+000D CARRIAGE RETURN
582
+ * U+000C FORM FEED
583
+ */
584
+ case 0x0A:
585
+ case 0x0D:
586
+ case 0x0C:
587
+ if (begin < data) {
588
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
589
+ }
590
+
591
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
592
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_NEINST);
593
+
594
+ token->type = LXB_CSS_SYNTAX_TOKEN_BAD_STRING;
595
+
596
+ lxb_css_syntax_token_base(token)->end = data;
597
+
598
+ return lxb_css_syntax_state_string_set(tkz, token, data);
599
+
600
+ /* U+005C REVERSE SOLIDUS (\) */
601
+ case 0x5C:
602
+ if (begin < data) {
603
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
604
+ }
605
+
606
+ data++;
607
+
608
+ if (data >= end) {
609
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
610
+ if (data >= end) {
611
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
612
+ (const lxb_char_t *) "\\", 1);
613
+ goto error;
614
+ }
615
+ }
616
+
617
+ data = lxb_css_syntax_state_escaped_string(tkz, data, &end);
618
+ if (data == NULL) {
619
+ return NULL;
620
+ }
621
+
622
+ begin = data;
623
+
624
+ data--;
625
+ break;
626
+
627
+ default:
628
+ /* '"' or '\'' */
629
+ if (*data == mark) {
630
+ if (begin < data) {
631
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
632
+ }
633
+
634
+ token->type = LXB_CSS_SYNTAX_TOKEN_STRING;
635
+
636
+ lxb_css_syntax_token_base(token)->end = ++data;
637
+
638
+ return lxb_css_syntax_state_string_set(tkz, token, data);
639
+ }
640
+
641
+ break;
642
+ }
643
+ }
644
+
645
+ return data;
646
+
647
+ error:
648
+
649
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, NULL,
650
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINST);
651
+
652
+ token->type = LXB_CSS_SYNTAX_TOKEN_STRING;
653
+
654
+ lxb_css_syntax_token_base(token)->end = data;
655
+
656
+ return lxb_css_syntax_state_string_set(tkz, token, data);
657
+ }
658
+
659
+ /*
660
+ * U+0023 NUMBER SIGN (#)
661
+ */
662
+ const lxb_char_t *
663
+ lxb_css_syntax_state_hash(lxb_css_syntax_tokenizer_t *tkz,
664
+ lxb_css_syntax_token_t *token, const lxb_char_t *data,
665
+ const lxb_char_t *end)
666
+ {
667
+ lxb_char_t ch;
668
+ lxb_status_t status;
669
+ const lxb_char_t *begin;
670
+ lxb_css_syntax_token_t *delim;
671
+
672
+ lxb_css_syntax_token_base(token)->begin = data++;
673
+
674
+ if (data >= end) {
675
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
676
+ if (data >= end) {
677
+ goto delim;
678
+ }
679
+ }
680
+
681
+ if (lxb_css_syntax_res_name_map[*data] == 0x00) {
682
+ if (*data == 0x00) {
683
+ goto hash;
684
+ }
685
+
686
+ /* U+005C REVERSE SOLIDUS (\) */
687
+ if (*data != 0x5C) {
688
+ goto delim;
689
+ }
690
+
691
+ begin = data++;
692
+
693
+ if (data >= end) {
694
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
695
+ if (data >= end) {
696
+ goto push_delim;
697
+ }
698
+ }
699
+
700
+ ch = *data;
701
+
702
+ if (ch == 0x0A || ch == 0x0C || ch == 0x0D) {
703
+ goto push_delim;
704
+ }
705
+
706
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
707
+ if (data == NULL) {
708
+ return NULL;
709
+ }
710
+ }
711
+
712
+ hash:
713
+
714
+ token->type = LXB_CSS_SYNTAX_TOKEN_HASH;
715
+
716
+ return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
717
+
718
+ push_delim:
719
+
720
+ delim = lxb_css_syntax_list_append_delim(tkz, begin, begin + 1, '\\');
721
+ if (delim == NULL) {
722
+ return NULL;
723
+ }
724
+
725
+ delim:
726
+
727
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
728
+
729
+ lxb_css_syntax_token_base(token)->end = lxb_css_syntax_token_base(token)->begin + 1;
730
+ lxb_css_syntax_token_delim(token)->character = '#';
731
+
732
+ return data;
733
+ }
734
+
735
+ /*
736
+ * U+0028 LEFT PARENTHESIS (()
737
+ */
738
+ const lxb_char_t *
739
+ lxb_css_syntax_state_lparenthesis(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
740
+ const lxb_char_t *data, const lxb_char_t *end)
741
+ {
742
+ token->type = LXB_CSS_SYNTAX_TOKEN_L_PARENTHESIS;
743
+
744
+ lxb_css_syntax_token_base(token)->begin = data;
745
+ lxb_css_syntax_token_base(token)->end = ++data;
746
+
747
+ return data;
748
+ }
749
+
750
+ /*
751
+ * U+0029 RIGHT PARENTHESIS ())
752
+ */
753
+ const lxb_char_t *
754
+ lxb_css_syntax_state_rparenthesis(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
755
+ const lxb_char_t *data, const lxb_char_t *end)
756
+ {
757
+ token->type = LXB_CSS_SYNTAX_TOKEN_R_PARENTHESIS;
758
+
759
+ lxb_css_syntax_token_base(token)->begin = data;
760
+ lxb_css_syntax_token_base(token)->end = ++data;
761
+
762
+ return data;
763
+ }
764
+
765
+ /*
766
+ * U+002B PLUS SIGN (+)
767
+ */
768
+ const lxb_char_t *
769
+ lxb_css_syntax_state_plus(lxb_css_syntax_tokenizer_t *tkz,
770
+ lxb_css_syntax_token_t *token,
771
+ const lxb_char_t *data, const lxb_char_t *end)
772
+ {
773
+ lxb_status_t status;
774
+
775
+ lxb_css_syntax_token_base(token)->begin = data++;
776
+
777
+ if (data >= end) {
778
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
779
+ if (data >= end) {
780
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
781
+
782
+ lxb_css_syntax_token_base(token)->end = data;
783
+ lxb_css_syntax_token_delim(token)->character = '+';
784
+
785
+ return data;
786
+ }
787
+ }
788
+
789
+ return lxb_css_syntax_state_plus_process(tkz, token, data, end);
790
+ }
791
+
792
+ const lxb_char_t *
793
+ lxb_css_syntax_state_plus_process(lxb_css_syntax_tokenizer_t *tkz,
794
+ lxb_css_syntax_token_t *token,
795
+ const lxb_char_t *data, const lxb_char_t *end)
796
+ {
797
+ lxb_status_t status;
798
+ const lxb_char_t *begin;
799
+ lxb_css_syntax_token_t *delim;
800
+
801
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
802
+ if (*data >= 0x30 && *data <= 0x39) {
803
+ lxb_css_syntax_token_number(token)->have_sign = true;
804
+ return lxb_css_syntax_state_consume_numeric(tkz, token, data, end);
805
+ }
806
+
807
+ /* U+002E FULL STOP (.) */
808
+ if (*data == 0x2E) {
809
+ begin = data++;
810
+
811
+ if (data == end) {
812
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
813
+
814
+ if (data >= end || *data < 0x30 || *data > 0x39) {
815
+ goto push_delim;
816
+ }
817
+
818
+ lxb_css_syntax_token_number(token)->have_sign = true;
819
+
820
+ return lxb_css_syntax_state_decimal(tkz, token, tkz->buffer,
821
+ tkz->buffer + sizeof(tkz->buffer),
822
+ data, end);
823
+ }
824
+
825
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
826
+ if (*data >= 0x30 && *data <= 0x39) {
827
+ lxb_css_syntax_token_number(token)->have_sign = true;
828
+
829
+ return lxb_css_syntax_state_decimal(tkz, token, tkz->buffer,
830
+ tkz->buffer + sizeof(tkz->buffer),
831
+ data, end);
832
+ }
833
+
834
+ push_delim:
835
+
836
+ delim = lxb_css_syntax_list_append_delim(tkz, begin, begin + 1, '.');
837
+ if (delim == NULL) {
838
+ return NULL;
839
+ }
840
+ }
841
+
842
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
843
+
844
+ lxb_css_syntax_token_base(token)->end = lxb_css_syntax_token_base(token)->begin + 1;
845
+ lxb_css_syntax_token_delim(token)->character = '+';
846
+
847
+ return data;
848
+ }
849
+
850
+ /*
851
+ * U+002C COMMA (,)
852
+ */
853
+ const lxb_char_t *
854
+ lxb_css_syntax_state_comma(lxb_css_syntax_tokenizer_t *tkz,
855
+ lxb_css_syntax_token_t *token,
856
+ const lxb_char_t *data, const lxb_char_t *end)
857
+ {
858
+ token->type = LXB_CSS_SYNTAX_TOKEN_COMMA;
859
+
860
+ lxb_css_syntax_token_base(token)->begin = data;
861
+ lxb_css_syntax_token_base(token)->end = ++data;
862
+
863
+ return data;
864
+ }
865
+
866
+ /*
867
+ * U+002D HYPHEN-MINUS (-)
868
+ */
869
+ const lxb_char_t *
870
+ lxb_css_syntax_state_minus(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
871
+ const lxb_char_t *data, const lxb_char_t *end)
872
+ {
873
+ lxb_status_t status;
874
+
875
+ lxb_css_syntax_token_base(token)->begin = data++;
876
+
877
+ if (data >= end) {
878
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
879
+ if (data >= end) {
880
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
881
+
882
+ lxb_css_syntax_token_base(token)->end = data;
883
+ lxb_css_syntax_token_delim(token)->character = '-';
884
+
885
+ return data;
886
+ }
887
+ }
888
+
889
+ return lxb_css_syntax_state_minus_process(tkz, token, data, end);
890
+ }
891
+
892
+ const lxb_char_t *
893
+ lxb_css_syntax_state_minus_process(lxb_css_syntax_tokenizer_t *tkz,
894
+ lxb_css_syntax_token_t *token,
895
+ const lxb_char_t *data, const lxb_char_t *end)
896
+ {
897
+ lxb_char_t ch;
898
+ lxb_status_t status;
899
+ const lxb_char_t *begin, *second;
900
+ lxb_css_syntax_token_t *delim;
901
+ lxb_css_syntax_token_number_t *number;
902
+
903
+ unsigned minuses_len = 1;
904
+ static const lxb_char_t minuses[3] = "---";
905
+
906
+ /* Check for <number-token> */
907
+
908
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
909
+ if (*data >= 0x30 && *data <= 0x39) {
910
+ data = lxb_css_syntax_state_consume_numeric(tkz, token, data, end);
911
+
912
+ number = lxb_css_syntax_token_number(token);
913
+ number->num = -number->num;
914
+
915
+ lxb_css_syntax_token_number(token)->have_sign = true;
916
+
917
+ return data;
918
+ }
919
+
920
+ /* U+002E FULL STOP (.) */
921
+ if (*data == 0x2E) {
922
+ begin = data++;
923
+
924
+ if (data == end) {
925
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
926
+ if (data >= end) {
927
+ goto push_delim;
928
+ }
929
+ }
930
+
931
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
932
+ if (*data >= 0x30 && *data <= 0x39) {
933
+ data = lxb_css_syntax_state_decimal(tkz, token, tkz->buffer,
934
+ tkz->buffer + sizeof(tkz->buffer),
935
+ data, end);
936
+
937
+ number = lxb_css_syntax_token_number(token);
938
+ number->num = -number->num;
939
+
940
+ lxb_css_syntax_token_number(token)->have_sign = true;
941
+
942
+ return data;
943
+ }
944
+
945
+ push_delim:
946
+
947
+ delim = lxb_css_syntax_list_append_delim(tkz, begin, begin + 1, '.');
948
+ if (delim == NULL) {
949
+ return NULL;
950
+ }
951
+
952
+ goto delim;
953
+ }
954
+
955
+ second = data;
956
+
957
+ /* U+002D HYPHEN-MINUS (-) */
958
+ if (*data == 0x2D) {
959
+ data++;
960
+
961
+ /* Check for <CDC-token> */
962
+
963
+ if (data == end) {
964
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
965
+ if (data >= end) {
966
+ delim = lxb_css_syntax_list_append_delim(tkz, second,
967
+ second + 1, '-');
968
+ if (delim == NULL) {
969
+ return NULL;
970
+ }
971
+
972
+ goto delim;
973
+ }
974
+ }
975
+
976
+ if (*data == 0x2D) {
977
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, 3);
978
+ return lxb_css_syntax_state_ident_like_not_url(tkz, token, ++data, end);
979
+ }
980
+ else if (*data == 0x3E) {
981
+ token->type = LXB_CSS_SYNTAX_TOKEN_CDC;
982
+
983
+ lxb_css_syntax_token_base(token)->end = ++data;
984
+
985
+ return data;
986
+ }
987
+
988
+ minuses_len++;
989
+ }
990
+
991
+ /* Check for <ident-token> */
992
+
993
+ if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START
994
+ || *data == 0x00)
995
+ {
996
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len);
997
+
998
+ return lxb_css_syntax_state_ident_like_not_url(tkz, token, data, end);
999
+ }
1000
+
1001
+ /* U+005C REVERSE SOLIDUS (\) */
1002
+ if (*data == 0x5C) {
1003
+ begin = data++;
1004
+
1005
+ if (data == end) {
1006
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1007
+ if (data >= end) {
1008
+ goto delim_rev_solidus;
1009
+ }
1010
+
1011
+ ch = *data;
1012
+
1013
+ if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1014
+ goto ident;
1015
+ }
1016
+
1017
+ goto delim_rev_solidus;
1018
+ }
1019
+
1020
+ ch = *data;
1021
+
1022
+ if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1023
+ goto ident;
1024
+ }
1025
+
1026
+ delim_rev_solidus:
1027
+
1028
+ delim = lxb_css_syntax_list_append_delim(tkz, begin, begin + 1, '\\');
1029
+ if (delim == NULL) {
1030
+ return NULL;
1031
+ }
1032
+ }
1033
+
1034
+ if (minuses_len == 2) {
1035
+ delim = lxb_css_syntax_list_append_delim(tkz, second, NULL, '-');
1036
+ if (delim == NULL) {
1037
+ return NULL;
1038
+ }
1039
+ }
1040
+
1041
+ delim:
1042
+
1043
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1044
+
1045
+ lxb_css_syntax_token_base(token)->end = lxb_css_syntax_token_base(token)->begin + 1;
1046
+ lxb_css_syntax_token_delim(token)->character = '-';
1047
+
1048
+ return data;
1049
+
1050
+ ident:
1051
+
1052
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len);
1053
+
1054
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
1055
+ if (data == NULL) {
1056
+ return NULL;
1057
+ }
1058
+
1059
+ return lxb_css_syntax_state_ident_like_not_url(tkz, token, data, end);
1060
+ }
1061
+
1062
+ /*
1063
+ * U+002E FULL STOP (.)
1064
+ */
1065
+ const lxb_char_t *
1066
+ lxb_css_syntax_state_full_stop(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1067
+ const lxb_char_t *data, const lxb_char_t *end)
1068
+ {
1069
+ lxb_status_t status;
1070
+
1071
+ lxb_css_syntax_token_base(token)->begin = data;
1072
+ lxb_css_syntax_token_number(token)->have_sign = false;
1073
+
1074
+ data++;
1075
+
1076
+ if (data >= end) {
1077
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1078
+ if (data >= end) {
1079
+ goto delim;
1080
+ }
1081
+ }
1082
+
1083
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1084
+ if (*data >= 0x30 && *data <= 0x39) {
1085
+ return lxb_css_syntax_state_decimal(tkz, token, tkz->buffer,
1086
+ tkz->buffer + sizeof(tkz->buffer),
1087
+ data, end);
1088
+ }
1089
+
1090
+ delim:
1091
+
1092
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1093
+
1094
+ lxb_css_syntax_token_base(token)->end = lxb_css_syntax_token_base(token)->begin + 1;
1095
+ lxb_css_syntax_token_delim(token)->character = '.';
1096
+
1097
+ return data;
1098
+ }
1099
+
1100
+ /*
1101
+ * U+003A COLON (:)
1102
+ */
1103
+ const lxb_char_t *
1104
+ lxb_css_syntax_state_colon(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1105
+ const lxb_char_t *data, const lxb_char_t *end)
1106
+ {
1107
+ token->type = LXB_CSS_SYNTAX_TOKEN_COLON;
1108
+
1109
+ lxb_css_syntax_token_base(token)->begin = data;
1110
+ lxb_css_syntax_token_base(token)->end = ++data;
1111
+
1112
+ return data;
1113
+ }
1114
+
1115
+ /*
1116
+ * U+003B SEMICOLON (;)
1117
+ */
1118
+ const lxb_char_t *
1119
+ lxb_css_syntax_state_semicolon(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1120
+ const lxb_char_t *data, const lxb_char_t *end)
1121
+ {
1122
+ token->type = LXB_CSS_SYNTAX_TOKEN_SEMICOLON;
1123
+
1124
+ lxb_css_syntax_token_base(token)->begin = data;
1125
+ lxb_css_syntax_token_base(token)->end = ++data;
1126
+
1127
+ return data;
1128
+ }
1129
+
1130
+ /*
1131
+ * U+003C LESS-THAN SIGN (<)
1132
+ */
1133
+ const lxb_char_t *
1134
+ lxb_css_syntax_state_less_sign(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1135
+ const lxb_char_t *data, const lxb_char_t *end)
1136
+ {
1137
+ lxb_char_t ch;
1138
+ lxb_status_t status;
1139
+ const lxb_char_t *mark, *minus, *esc;
1140
+ lxb_css_syntax_token_t *delim, *ident;
1141
+
1142
+ lxb_css_syntax_token_base(token)->begin = data++;
1143
+
1144
+ if ((end - data) > 2) {
1145
+ if (data[0] == '!' && data[1] == '-' && data[2] == '-') {
1146
+ data += 3;
1147
+
1148
+ token->type = LXB_CSS_SYNTAX_TOKEN_CDO;
1149
+ lxb_css_syntax_token_base(token)->end = data;
1150
+
1151
+ return data;
1152
+ }
1153
+
1154
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1155
+
1156
+ lxb_css_syntax_token_base(token)->end = data;
1157
+ lxb_css_syntax_token_delim(token)->character = '<';
1158
+
1159
+ return data;
1160
+ }
1161
+
1162
+ if (data >= end) {
1163
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1164
+ if (data >= end) {
1165
+ goto delim;
1166
+ }
1167
+ }
1168
+
1169
+ /* U+0021 EXCLAMATION MARK */
1170
+ if (*data != 0x21) {
1171
+ goto delim;
1172
+ }
1173
+
1174
+ mark = ++data;
1175
+
1176
+ if (data == end) {
1177
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1178
+ if (data >= end) {
1179
+ goto delim_mark;
1180
+ }
1181
+ }
1182
+
1183
+ /* U+002D HYPHEN-MINUS */
1184
+ if (*data != 0x2D) {
1185
+ goto delim_mark;
1186
+ }
1187
+
1188
+ minus = ++data;
1189
+
1190
+ if (data == end) {
1191
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1192
+ if (data >= end) {
1193
+ goto delim_minus;
1194
+ }
1195
+ }
1196
+
1197
+ /* U+002D HYPHEN-MINUS */
1198
+ if (*data == 0x2D) {
1199
+ token->type = LXB_CSS_SYNTAX_TOKEN_CDO;
1200
+
1201
+ lxb_css_syntax_token_base(token)->end = ++data;
1202
+
1203
+ return data;
1204
+ }
1205
+
1206
+ if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START) {
1207
+ goto ident;
1208
+ }
1209
+
1210
+ /* U+005C REVERSE SOLIDUS (\) */
1211
+ if (*data == 0x5C) {
1212
+ esc = data++;
1213
+
1214
+ if (data == end) {
1215
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1216
+ if (data >= end) {
1217
+ goto delim_esc;
1218
+ }
1219
+
1220
+ ch = *data;
1221
+
1222
+ if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1223
+ ident = lxb_css_syntax_tokenizer_token_append(tkz);
1224
+ if (ident == NULL) {
1225
+ return NULL;
1226
+ }
1227
+
1228
+ lxb_css_syntax_token_base(ident)->begin = minus;
1229
+
1230
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
1231
+ (const lxb_char_t *) "-", 1);
1232
+
1233
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
1234
+ if (data == NULL) {
1235
+ return NULL;
1236
+ }
1237
+
1238
+ data = lxb_css_syntax_state_ident_like_not_url(tkz, ident,
1239
+ data, end);
1240
+ if (data == NULL) {
1241
+ return NULL;
1242
+ }
1243
+
1244
+ goto delim_mark;
1245
+ }
1246
+
1247
+ delim_esc:
1248
+
1249
+ delim = lxb_css_syntax_list_append_delim(tkz, esc, esc + 1, '\\');
1250
+ if (delim == NULL) {
1251
+ return NULL;
1252
+ }
1253
+
1254
+ goto delim_minus;
1255
+ }
1256
+
1257
+ ch = *data--;
1258
+
1259
+ if (ch == 0x0A || ch == 0x0C || ch == 0x0D) {
1260
+ goto delim_minus;
1261
+ }
1262
+
1263
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
1264
+ if (data == NULL) {
1265
+ return NULL;
1266
+ }
1267
+ }
1268
+ else if (*data != 0x00) {
1269
+ delim = lxb_css_syntax_list_append_delim(tkz, minus - 1, NULL, '-');
1270
+ if (delim == NULL) {
1271
+ return NULL;
1272
+ }
1273
+
1274
+ goto delim_mark;
1275
+ }
1276
+
1277
+ ident:
1278
+
1279
+ ident = lxb_css_syntax_tokenizer_token_append(tkz);
1280
+ if (ident == NULL) {
1281
+ return NULL;
1282
+ }
1283
+
1284
+ lxb_css_syntax_token_base(ident)->begin = minus;
1285
+
1286
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, (const lxb_char_t *) "-", 1);
1287
+
1288
+ data = lxb_css_syntax_state_ident_like_not_url(tkz, ident, data, end);
1289
+ if (data == NULL) {
1290
+ return NULL;
1291
+ }
1292
+
1293
+ goto delim_mark;
1294
+
1295
+ delim_minus:
1296
+
1297
+ delim = lxb_css_syntax_list_append_delim(tkz, minus - 1, minus, '-');
1298
+ if (delim == NULL) {
1299
+ return NULL;
1300
+ }
1301
+
1302
+ delim_mark:
1303
+
1304
+ delim = lxb_css_syntax_list_append_delim(tkz, mark - 1, mark, '!');
1305
+ if (delim == NULL) {
1306
+ return NULL;
1307
+ }
1308
+
1309
+ delim:
1310
+
1311
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1312
+
1313
+ lxb_css_syntax_token_base(token)->end = lxb_css_syntax_token_base(token)->begin + 1;
1314
+ lxb_css_syntax_token_delim(token)->character = '<';
1315
+
1316
+ return data;
1317
+ }
1318
+
1319
+ /*
1320
+ * U+0040 COMMERCIAL AT (@)
1321
+ */
1322
+ const lxb_char_t *
1323
+ lxb_css_syntax_state_at(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1324
+ const lxb_char_t *data, const lxb_char_t *end)
1325
+ {
1326
+ lxb_char_t ch;
1327
+ lxb_status_t status;
1328
+ const lxb_char_t *minus, *esc;
1329
+ lxb_css_syntax_token_t *delim;
1330
+
1331
+ unsigned minuses_len = 0;
1332
+ static const lxb_char_t minuses[2] = "--";
1333
+
1334
+ token->type = LXB_CSS_SYNTAX_TOKEN_AT_KEYWORD;
1335
+
1336
+ lxb_css_syntax_token_base(token)->begin = data++;
1337
+
1338
+ if (data >= end) {
1339
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1340
+ if (data >= end) {
1341
+ goto delim;
1342
+ }
1343
+ }
1344
+
1345
+ if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START) {
1346
+ return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
1347
+ }
1348
+
1349
+ minus = data;
1350
+
1351
+ /* U+002D HYPHEN-MINUS */
1352
+ if (*data == 0x2D) {
1353
+ data++;
1354
+
1355
+ if (data == end) {
1356
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1357
+ if (data >= end) {
1358
+ delim = lxb_css_syntax_list_append_delim(tkz, minus,
1359
+ minus + 1, '-');
1360
+ if (delim == NULL) {
1361
+ return NULL;
1362
+ }
1363
+
1364
+ goto delim;
1365
+ }
1366
+ }
1367
+
1368
+ if (lxb_css_syntax_res_name_map[*data] == LXB_CSS_SYNTAX_RES_NAME_START
1369
+ || *data == 0x00)
1370
+ {
1371
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, 1);
1372
+ return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
1373
+ }
1374
+ else if (*data == 0x2D) {
1375
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, 2);
1376
+ return lxb_css_syntax_state_consume_ident(tkz, token, ++data, end);
1377
+ }
1378
+
1379
+ minuses_len++;
1380
+ }
1381
+
1382
+ /* U+005C REVERSE SOLIDUS (\) */
1383
+ if (*data == 0x5C) {
1384
+ esc = ++data;
1385
+
1386
+ if (data == end) {
1387
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1388
+ if (data >= end) {
1389
+ goto delim_esc;
1390
+ }
1391
+ }
1392
+
1393
+ ch = *data;
1394
+
1395
+ if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1396
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len);
1397
+
1398
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
1399
+ if (data == NULL) {
1400
+ return NULL;
1401
+ }
1402
+
1403
+ return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
1404
+ }
1405
+
1406
+ goto delim_esc;
1407
+ }
1408
+ else if (*data != 0x00) {
1409
+ goto delim_minus;
1410
+ }
1411
+
1412
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, minuses, minuses_len);
1413
+
1414
+ return lxb_css_syntax_state_consume_ident(tkz, token, data, end);
1415
+
1416
+ delim_esc:
1417
+
1418
+ delim = lxb_css_syntax_list_append_delim(tkz, esc - 1, esc, '\\');
1419
+ if (delim == NULL) {
1420
+ return NULL;
1421
+ }
1422
+
1423
+ delim_minus:
1424
+
1425
+ if (minuses_len != 0) {
1426
+ delim = lxb_css_syntax_list_append_delim(tkz, minus, NULL, '-');
1427
+ if (delim == NULL) {
1428
+ return NULL;
1429
+ }
1430
+ }
1431
+
1432
+ delim:
1433
+
1434
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1435
+
1436
+ lxb_css_syntax_token_base(token)->end = lxb_css_syntax_token_base(token)->begin + 1;
1437
+ lxb_css_syntax_token_delim(token)->character = '@';
1438
+
1439
+ return data;
1440
+ }
1441
+
1442
+ /*
1443
+ * U+005B LEFT SQUARE BRACKET ([)
1444
+ */
1445
+ const lxb_char_t *
1446
+ lxb_css_syntax_state_ls_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1447
+ const lxb_char_t *data, const lxb_char_t *end)
1448
+ {
1449
+ token->type = LXB_CSS_SYNTAX_TOKEN_LS_BRACKET;
1450
+
1451
+ lxb_css_syntax_token_base(token)->begin = data;
1452
+ lxb_css_syntax_token_base(token)->end = ++data;
1453
+
1454
+ return data;
1455
+ }
1456
+
1457
+ /*
1458
+ * U+005C REVERSE SOLIDUS (\)
1459
+ */
1460
+ const lxb_char_t *
1461
+ lxb_css_syntax_state_rsolidus(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1462
+ const lxb_char_t *data, const lxb_char_t *end)
1463
+ {
1464
+ lxb_char_t ch;
1465
+ lxb_status_t status;
1466
+
1467
+ lxb_css_syntax_token_base(token)->begin = data++;
1468
+
1469
+ if (data >= end) {
1470
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1471
+ if (data >= end) {
1472
+ goto delim;
1473
+ }
1474
+ }
1475
+
1476
+ ch = *data;
1477
+
1478
+ if (ch == 0x0A || ch == 0x0C || ch == 0x0D) {
1479
+ goto delim;
1480
+ }
1481
+
1482
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
1483
+ if (data == NULL) {
1484
+ return NULL;
1485
+ }
1486
+
1487
+ return lxb_css_syntax_state_ident_like(tkz, token, data, end);
1488
+
1489
+ delim:
1490
+
1491
+ token->type = LXB_CSS_SYNTAX_TOKEN_DELIM;
1492
+
1493
+ lxb_css_syntax_token_base(token)->end = lxb_css_syntax_token_base(token)->begin + 1;
1494
+ lxb_css_syntax_token_delim(token)->character = '\\';
1495
+
1496
+ return data;
1497
+ }
1498
+
1499
+ /*
1500
+ * U+005D RIGHT SQUARE BRACKET (])
1501
+ */
1502
+ const lxb_char_t *
1503
+ lxb_css_syntax_state_rs_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1504
+ const lxb_char_t *data, const lxb_char_t *end)
1505
+ {
1506
+ token->type = LXB_CSS_SYNTAX_TOKEN_RS_BRACKET;
1507
+
1508
+ lxb_css_syntax_token_base(token)->begin = data;
1509
+ lxb_css_syntax_token_base(token)->end = ++data;
1510
+
1511
+ return data;
1512
+ }
1513
+
1514
+ /*
1515
+ * U+007B LEFT CURLY BRACKET ({)
1516
+ */
1517
+ const lxb_char_t *
1518
+ lxb_css_syntax_state_lc_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1519
+ const lxb_char_t *data, const lxb_char_t *end)
1520
+ {
1521
+ token->type = LXB_CSS_SYNTAX_TOKEN_LC_BRACKET;
1522
+
1523
+ lxb_css_syntax_token_base(token)->begin = data;
1524
+ lxb_css_syntax_token_base(token)->end = ++data;
1525
+
1526
+ return data;
1527
+ }
1528
+
1529
+ /*
1530
+ * U+007D RIGHT CURLY BRACKET (})
1531
+ */
1532
+ const lxb_char_t *
1533
+ lxb_css_syntax_state_rc_bracket(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
1534
+ const lxb_char_t *data, const lxb_char_t *end)
1535
+ {
1536
+ token->type = LXB_CSS_SYNTAX_TOKEN_RC_BRACKET;
1537
+
1538
+ lxb_css_syntax_token_base(token)->begin = data;
1539
+ lxb_css_syntax_token_base(token)->end = ++data;
1540
+
1541
+ return data;
1542
+ }
1543
+
1544
+ /*
1545
+ * Numeric
1546
+ */
1547
+ lxb_inline void
1548
+ lxb_css_syntax_consume_numeric_set_int(lxb_css_syntax_tokenizer_t *tkz,
1549
+ lxb_css_syntax_token_t *token,
1550
+ const lxb_char_t *start, const lxb_char_t *end)
1551
+ {
1552
+ double num = lexbor_strtod_internal(start, (end - start), 0);
1553
+
1554
+ token->type = LXB_CSS_SYNTAX_TOKEN_NUMBER;
1555
+
1556
+ lxb_css_syntax_token_number(token)->is_float = false;
1557
+ lxb_css_syntax_token_number(token)->num = num;
1558
+ }
1559
+
1560
+ lxb_inline void
1561
+ lxb_css_syntax_consume_numeric_set_float(lxb_css_syntax_tokenizer_t *tkz,
1562
+ lxb_css_syntax_token_t *token,
1563
+ const lxb_char_t *start, const lxb_char_t *end,
1564
+ bool e_is_negative, int exponent, int e_digit)
1565
+ {
1566
+ if (e_is_negative) {
1567
+ exponent -= e_digit;
1568
+ }
1569
+ else {
1570
+ exponent += e_digit;
1571
+ }
1572
+
1573
+ double num = lexbor_strtod_internal(start, (end - start), exponent);
1574
+
1575
+ token->type = LXB_CSS_SYNTAX_TOKEN_NUMBER;
1576
+
1577
+ lxb_css_syntax_token_number(token)->num = num;
1578
+ lxb_css_syntax_token_number(token)->is_float = true;
1579
+ }
1580
+
1581
+ const lxb_char_t *
1582
+ lxb_css_syntax_state_consume_before_numeric(lxb_css_syntax_tokenizer_t *tkz,
1583
+ lxb_css_syntax_token_t *token,
1584
+ const lxb_char_t *data,
1585
+ const lxb_char_t *end)
1586
+ {
1587
+ lxb_css_syntax_token_base(token)->begin = data;
1588
+ lxb_css_syntax_token_number(token)->have_sign = false;
1589
+
1590
+ return lxb_css_syntax_state_consume_numeric(tkz, token, data, end);
1591
+ }
1592
+
1593
+ static const lxb_char_t *
1594
+ lxb_css_syntax_state_consume_numeric(lxb_css_syntax_tokenizer_t *tkz,
1595
+ lxb_css_syntax_token_t *token,
1596
+ const lxb_char_t *data,
1597
+ const lxb_char_t *end)
1598
+ {
1599
+ lxb_status_t status;
1600
+ lxb_css_syntax_token_t *delim;
1601
+
1602
+ lxb_char_t *buf_start = tkz->buffer;
1603
+ lxb_char_t *buf_end = buf_start + sizeof(tkz->buffer);
1604
+
1605
+ do {
1606
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1607
+ if (*data < 0x30 || *data > 0x39) {
1608
+ break;
1609
+ }
1610
+
1611
+ if (buf_start != buf_end) {
1612
+ *buf_start++ = *data;
1613
+ }
1614
+
1615
+ if (++data == end) {
1616
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1617
+ if (data >= end) {
1618
+ lxb_css_syntax_token_base(token)->end = data;
1619
+
1620
+ lxb_css_syntax_consume_numeric_set_int(tkz, token, tkz->buffer,
1621
+ buf_start);
1622
+ return data;
1623
+ }
1624
+ }
1625
+ }
1626
+ while (true);
1627
+
1628
+ lxb_css_syntax_token_base(token)->end = data;
1629
+
1630
+ /* U+002E FULL STOP (.) */
1631
+ if (*data != 0x2E) {
1632
+ lxb_css_syntax_consume_numeric_set_int(tkz, token, tkz->buffer,
1633
+ buf_start);
1634
+
1635
+ return lxb_css_syntax_state_consume_numeric_name_start(tkz, token,
1636
+ data, end);
1637
+ }
1638
+
1639
+ data++;
1640
+
1641
+ if (data == end) {
1642
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1643
+ if (data >= end) {
1644
+ goto delim;
1645
+ }
1646
+ }
1647
+
1648
+ if (*data >= 0x30 && *data <= 0x39) {
1649
+ return lxb_css_syntax_state_decimal(tkz, token, buf_start, buf_end,
1650
+ data, end);
1651
+ }
1652
+
1653
+ delim:
1654
+
1655
+ lxb_css_syntax_consume_numeric_set_int(tkz, token, tkz->buffer, buf_start);
1656
+
1657
+ delim = lxb_css_syntax_list_append_delim(tkz, data - 1, data, '.');
1658
+ if (delim == NULL) {
1659
+ return NULL;
1660
+ }
1661
+
1662
+ return data;
1663
+ }
1664
+
1665
+ static const lxb_char_t *
1666
+ lxb_css_syntax_state_decimal(lxb_css_syntax_tokenizer_t *tkz,
1667
+ lxb_css_syntax_token_t *token,
1668
+ lxb_char_t *buf_start, lxb_char_t *buf_end,
1669
+ const lxb_char_t *data, const lxb_char_t *end)
1670
+ {
1671
+ bool e_is_negative;
1672
+ int exponent, e_digit;
1673
+ lxb_char_t ch, by;
1674
+ lxb_status_t status;
1675
+ const lxb_char_t *last;
1676
+ lxb_css_syntax_token_t *delim, *t_str;
1677
+ lxb_css_syntax_token_string_t *str;
1678
+
1679
+ exponent = 0;
1680
+
1681
+ str = lxb_css_syntax_token_dimension_string(token);
1682
+ t_str = (lxb_css_syntax_token_t *) (void *) str;
1683
+
1684
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1685
+ do {
1686
+ if (buf_start != buf_end) {
1687
+ *buf_start++ = *data;
1688
+ exponent -= 1;
1689
+ }
1690
+
1691
+ data++;
1692
+
1693
+ if (data >= end) {
1694
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1695
+ if (data >= end) {
1696
+ lxb_css_syntax_token_base(token)->end = data;
1697
+
1698
+ lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer,
1699
+ buf_start, 0, exponent, 0);
1700
+ return data;
1701
+ }
1702
+ }
1703
+ }
1704
+ while (*data >= 0x30 && *data <= 0x39);
1705
+
1706
+ lxb_css_syntax_token_base(token)->end = data;
1707
+ lxb_css_syntax_token_base(str)->begin = data;
1708
+
1709
+ ch = *data;
1710
+
1711
+ /* U+0045 Latin Capital Letter (E) or U+0065 Latin Small Letter (e) */
1712
+ if (ch != 0x45 && ch != 0x65) {
1713
+ lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer,
1714
+ buf_start, 0, exponent, 0);
1715
+
1716
+ return lxb_css_syntax_state_consume_numeric_name_start(tkz, token,
1717
+ data, end);
1718
+ }
1719
+
1720
+ e_digit = 0;
1721
+ e_is_negative = false;
1722
+
1723
+ lxb_css_syntax_token_base(str)->end = ++data;
1724
+
1725
+ if (data == end) {
1726
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1727
+ if (data >= end) {
1728
+ lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer,
1729
+ buf_start, 0, exponent, 0);
1730
+
1731
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &ch, 1);
1732
+
1733
+ token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1734
+
1735
+ return lxb_css_syntax_state_dimension_set(tkz, token, data);
1736
+ }
1737
+ }
1738
+
1739
+ switch (*data) {
1740
+ /* U+002D HYPHEN-MINUS (-) */
1741
+ case 0x2D:
1742
+ e_is_negative = true;
1743
+ /* fall through */
1744
+
1745
+ /* U+002B PLUS SIGN (+) */
1746
+ case 0x2B:
1747
+ last = data++;
1748
+ by = *last;
1749
+
1750
+ if (data == end) {
1751
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1752
+ if (data >= end) {
1753
+ goto dimension;
1754
+ }
1755
+ }
1756
+
1757
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1758
+ if (*data < 0x30 || *data > 0x39) {
1759
+ goto dimension;
1760
+ }
1761
+
1762
+ break;
1763
+
1764
+ default:
1765
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1766
+ if (*data < 0x30 || *data > 0x39) {
1767
+ lxb_css_syntax_consume_numeric_set_float(tkz, token,
1768
+ tkz->buffer, buf_start,
1769
+ 0, exponent, 0);
1770
+
1771
+ token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1772
+
1773
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &ch, 1);
1774
+
1775
+ return lxb_css_syntax_state_consume_ident(tkz, t_str,
1776
+ data, end);
1777
+ }
1778
+
1779
+ break;
1780
+ }
1781
+
1782
+ /* U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) */
1783
+ do {
1784
+ e_digit = (*data - 0x30) + e_digit * 0x0A;
1785
+
1786
+ if (++data == end) {
1787
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1788
+ if (data >= end) {
1789
+ lxb_css_syntax_token_base(token)->end = data;
1790
+
1791
+ lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer, buf_start,
1792
+ e_is_negative, exponent, e_digit);
1793
+ return data;
1794
+ }
1795
+ }
1796
+ }
1797
+ while(*data >= 0x30 && *data <= 0x39);
1798
+
1799
+ lxb_css_syntax_consume_numeric_set_float(tkz, token, tkz->buffer, buf_start,
1800
+ e_is_negative, exponent, e_digit);
1801
+
1802
+ return lxb_css_syntax_state_consume_numeric_name_start(tkz, token,
1803
+ data, end);
1804
+
1805
+ dimension:
1806
+
1807
+ lxb_css_syntax_consume_numeric_set_float(tkz, token,
1808
+ tkz->buffer, buf_start,
1809
+ 0, exponent, 0);
1810
+
1811
+ token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1812
+
1813
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &ch, 1);
1814
+
1815
+ if (by == '-') {
1816
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status, &by, 1);
1817
+
1818
+ return lxb_css_syntax_state_consume_ident(tkz, t_str, data, end);
1819
+ }
1820
+
1821
+ delim = lxb_css_syntax_list_append_delim(tkz, last, NULL, '+');
1822
+ if (delim == NULL) {
1823
+ return NULL;
1824
+ }
1825
+
1826
+ return lxb_css_syntax_state_dimension_set(tkz, token, data);
1827
+ }
1828
+
1829
+ static const lxb_char_t *
1830
+ lxb_css_syntax_state_consume_numeric_name_start(lxb_css_syntax_tokenizer_t *tkz,
1831
+ lxb_css_syntax_token_t *token,
1832
+ const lxb_char_t *data,
1833
+ const lxb_char_t *end)
1834
+ {
1835
+ bool have_minus;
1836
+ lxb_char_t ch;
1837
+ lxb_status_t status;
1838
+ const lxb_char_t *esc, *minus;
1839
+ lxb_css_syntax_token_t *delim, *t_str;
1840
+ lxb_css_syntax_token_string_t *str;
1841
+
1842
+ str = lxb_css_syntax_token_dimension_string(token);
1843
+ t_str = (lxb_css_syntax_token_t *) (void *) str;
1844
+
1845
+ lxb_css_syntax_token_base(t_str)->begin = data;
1846
+
1847
+ ch = *data;
1848
+
1849
+ if (lxb_css_syntax_res_name_map[ch] == LXB_CSS_SYNTAX_RES_NAME_START
1850
+ || ch == 0x00)
1851
+ {
1852
+ token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1853
+
1854
+ return lxb_css_syntax_state_consume_ident(tkz, t_str, data, end);
1855
+ }
1856
+
1857
+ /* U+0025 PERCENTAGE SIGN (%) */
1858
+ if (ch == 0x25) {
1859
+ token->type = LXB_CSS_SYNTAX_TOKEN_PERCENTAGE;
1860
+
1861
+ lxb_css_syntax_token_base(token)->end = ++data;
1862
+
1863
+ return data;
1864
+ }
1865
+
1866
+ have_minus = false;
1867
+ minus = data;
1868
+
1869
+ /* U+002D HYPHEN-MINUS */
1870
+ if (ch == 0x2D) {
1871
+ data++;
1872
+
1873
+ if (data >= end) {
1874
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1875
+ if (data >= end) {
1876
+ delim = lxb_css_syntax_list_append_delim(tkz, data - 1,
1877
+ data, '-');
1878
+ if (delim == NULL) {
1879
+ return NULL;
1880
+ }
1881
+
1882
+ return data;
1883
+ }
1884
+ }
1885
+
1886
+ ch = *data;
1887
+
1888
+ if (lxb_css_syntax_res_name_map[ch] == LXB_CSS_SYNTAX_RES_NAME_START
1889
+ || ch == 0x2D || ch == 0x00)
1890
+ {
1891
+ token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1892
+
1893
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
1894
+ (const lxb_char_t *) "-", 1);
1895
+
1896
+ return lxb_css_syntax_state_consume_ident(tkz, t_str, data, end);
1897
+ }
1898
+
1899
+ have_minus = true;
1900
+ }
1901
+
1902
+ esc = data;
1903
+
1904
+ /* U+005C REVERSE SOLIDUS (\) */
1905
+ if (ch == 0x5C) {
1906
+ data++;
1907
+
1908
+ if (data >= end) {
1909
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1910
+ if (data >= end) {
1911
+ goto delim_rev_solidus;
1912
+ }
1913
+ }
1914
+
1915
+ ch = *data;
1916
+
1917
+ if (ch != 0x0A && ch != 0x0C && ch != 0x0D) {
1918
+ token->type = LXB_CSS_SYNTAX_TOKEN_DIMENSION;
1919
+
1920
+ if (have_minus) {
1921
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
1922
+ (const lxb_char_t *) "-", 1);
1923
+ }
1924
+
1925
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
1926
+ if (data == NULL) {
1927
+ return NULL;
1928
+ }
1929
+
1930
+ return lxb_css_syntax_state_consume_ident(tkz, t_str, data, end);
1931
+ }
1932
+
1933
+ delim_rev_solidus:
1934
+
1935
+ delim = lxb_css_syntax_list_append_delim(tkz, esc, esc + 1, '\\');
1936
+ if (delim == NULL) {
1937
+ return NULL;
1938
+ }
1939
+
1940
+ if (have_minus) {
1941
+ delim = lxb_css_syntax_list_append_delim(tkz, minus,
1942
+ minus + 1, '-');
1943
+ if (delim == NULL) {
1944
+ return NULL;
1945
+ }
1946
+ }
1947
+
1948
+ return data;
1949
+ }
1950
+
1951
+ lxb_css_syntax_token_base(token)->end = minus;
1952
+
1953
+ if (have_minus) {
1954
+ delim = lxb_css_syntax_list_append_delim(tkz, minus, NULL, '-');
1955
+ if (delim == NULL) {
1956
+ return NULL;
1957
+ }
1958
+ }
1959
+
1960
+ return data;
1961
+ }
1962
+
1963
+ static const lxb_char_t *
1964
+ lxb_css_syntax_state_consume_ident(lxb_css_syntax_tokenizer_t *tkz,
1965
+ lxb_css_syntax_token_t *token,
1966
+ const lxb_char_t *data, const lxb_char_t *end)
1967
+ {
1968
+ lxb_status_t status;
1969
+ const lxb_char_t *begin, *last;
1970
+ lxb_css_syntax_token_t *delim;
1971
+
1972
+ begin = data;
1973
+
1974
+ for (;; data++) {
1975
+ if (data >= end) {
1976
+ if (begin < data) {
1977
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
1978
+ }
1979
+
1980
+ last = data;
1981
+
1982
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
1983
+ if (data >= end) {
1984
+ lxb_css_syntax_token_base(token)->end = last;
1985
+
1986
+ return lxb_css_syntax_state_string_set(tkz, token, data);
1987
+ }
1988
+
1989
+ begin = data;
1990
+ }
1991
+
1992
+ if (lxb_css_syntax_res_name_map[*data] == 0x00) {
1993
+
1994
+ /* U+005C REVERSE SOLIDUS (\) */
1995
+ if (*data == 0x5C) {
1996
+ if (begin < data) {
1997
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
1998
+ }
1999
+
2000
+ begin = data;
2001
+ last = ++data;
2002
+
2003
+ if (data == end) {
2004
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2005
+ if (data >= end) {
2006
+ goto push_delim_last;
2007
+ }
2008
+ }
2009
+
2010
+ if (*data == 0x0A || *data == 0x0C || *data == 0x0D) {
2011
+ goto push_delim_last;
2012
+ }
2013
+
2014
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
2015
+ if (data == NULL) {
2016
+ return NULL;
2017
+ }
2018
+
2019
+ begin = data--;
2020
+ }
2021
+ else if (*data == 0x00) {
2022
+ if (begin < data) {
2023
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2024
+ }
2025
+
2026
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
2027
+ lexbor_str_res_ansi_replacement_character,
2028
+ sizeof(lexbor_str_res_ansi_replacement_character) - 1);
2029
+ begin = data + 1;
2030
+ }
2031
+ else {
2032
+ if (begin < data) {
2033
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2034
+ }
2035
+
2036
+ lxb_css_syntax_token_base(token)->end = data;
2037
+
2038
+ return lxb_css_syntax_state_string_set(tkz, token, data);
2039
+ }
2040
+ }
2041
+ }
2042
+
2043
+ return data;
2044
+
2045
+ push_delim_last:
2046
+
2047
+ lxb_css_syntax_token_base(token)->end = begin;
2048
+
2049
+ delim = lxb_css_syntax_list_append_delim(tkz, begin, last, '\\');
2050
+ if (delim == NULL) {
2051
+ return NULL;
2052
+ }
2053
+
2054
+ return lxb_css_syntax_state_string_set(tkz, token, data);
2055
+ }
2056
+
2057
+ const lxb_char_t *
2058
+ lxb_css_syntax_state_ident_like_begin(lxb_css_syntax_tokenizer_t *tkz,
2059
+ lxb_css_syntax_token_t *token,
2060
+ const lxb_char_t *data, const lxb_char_t *end)
2061
+ {
2062
+ lxb_css_syntax_token_base(token)->begin = data;
2063
+
2064
+ return lxb_css_syntax_state_ident_like(tkz, token, data, end);
2065
+ }
2066
+
2067
+ const lxb_char_t *
2068
+ lxb_css_syntax_state_ident_like(lxb_css_syntax_tokenizer_t *tkz,
2069
+ lxb_css_syntax_token_t *token,
2070
+ const lxb_char_t *data, const lxb_char_t *end)
2071
+ {
2072
+ lxb_char_t ch;
2073
+ lxb_status_t status;
2074
+ const lxb_char_t *begin, *ws_begin;
2075
+ lxb_css_syntax_token_t *ws;
2076
+ lxb_css_syntax_token_string_t *str, *ws_str;
2077
+ static const lxb_char_t url[] = "url";
2078
+
2079
+ data = lxb_css_syntax_state_consume_ident(tkz, token, data, end);
2080
+
2081
+ if (data >= end) {
2082
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2083
+ if (data >= end) {
2084
+ token->type = LXB_CSS_SYNTAX_TOKEN_IDENT;
2085
+ return data;
2086
+ }
2087
+ }
2088
+
2089
+ if (data < end && *data == '(') {
2090
+ lxb_css_syntax_token_base(token)->end = ++data;
2091
+
2092
+ str = lxb_css_syntax_token_string(token);
2093
+
2094
+ if (str->length == 3 && lexbor_str_data_casecmp(str->data, url)) {
2095
+ begin = data;
2096
+
2097
+ tkz->pos += str->length + 1;
2098
+ ws_begin = tkz->pos;
2099
+
2100
+ do {
2101
+ if (data >= end) {
2102
+ if (begin < data) {
2103
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2104
+ }
2105
+
2106
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2107
+ if (data >= end) {
2108
+ begin = data;
2109
+ goto with_ws;
2110
+ }
2111
+
2112
+ begin = data;
2113
+ }
2114
+
2115
+ ch = *data;
2116
+
2117
+ if (lexbor_utils_whitespace(ch, !=, &&)) {
2118
+ /* U+0022 QUOTATION MARK (") or U+0027 APOSTROPHE (') */
2119
+ if (ch == 0x22 || ch == 0x27) {
2120
+ goto with_ws;
2121
+ }
2122
+
2123
+ tkz->pos = tkz->start;
2124
+
2125
+ return lxb_css_syntax_state_url(tkz, token, data, end);
2126
+ }
2127
+
2128
+ data++;
2129
+ }
2130
+ while (true);
2131
+ }
2132
+
2133
+ token->type = LXB_CSS_SYNTAX_TOKEN_FUNCTION;
2134
+
2135
+ return data;
2136
+ }
2137
+
2138
+ token->type = LXB_CSS_SYNTAX_TOKEN_IDENT;
2139
+
2140
+ return data;
2141
+
2142
+ with_ws:
2143
+
2144
+ token->type = LXB_CSS_SYNTAX_TOKEN_FUNCTION;
2145
+
2146
+ if (ws_begin != tkz->pos || begin < data) {
2147
+ if (begin < data) {
2148
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2149
+ }
2150
+
2151
+ if (tkz->pos >= tkz->end) {
2152
+ if (lxb_css_syntax_string_realloc(tkz, 1024) != LXB_STATUS_OK) {
2153
+ return NULL;
2154
+ }
2155
+ }
2156
+
2157
+ str->data = tkz->start;
2158
+ *tkz->pos = 0x00;
2159
+
2160
+ ws = lxb_css_syntax_tokenizer_token_append(tkz);
2161
+ if (ws == NULL) {
2162
+ return NULL;
2163
+ }
2164
+
2165
+ ws->type = LXB_CSS_SYNTAX_TOKEN_WHITESPACE;
2166
+
2167
+ lxb_css_syntax_token_base(ws)->begin = begin;
2168
+ lxb_css_syntax_token_base(ws)->end = data;
2169
+
2170
+ ws_str = lxb_css_syntax_token_string(ws);
2171
+
2172
+ ws_str->data = tkz->start + str->length + 1;
2173
+ ws_str->length = tkz->pos - ws_str->data;
2174
+ }
2175
+
2176
+ tkz->pos = tkz->start;
2177
+
2178
+ return data;
2179
+ }
2180
+
2181
+ const lxb_char_t *
2182
+ lxb_css_syntax_state_ident_like_not_url_begin(lxb_css_syntax_tokenizer_t *tkz,
2183
+ lxb_css_syntax_token_t *token,
2184
+ const lxb_char_t *data, const lxb_char_t *end)
2185
+ {
2186
+ lxb_css_syntax_token_base(token)->begin = data;
2187
+
2188
+ return lxb_css_syntax_state_ident_like_not_url(tkz, token, data, end);
2189
+ }
2190
+
2191
+ const lxb_char_t *
2192
+ lxb_css_syntax_state_ident_like_not_url(lxb_css_syntax_tokenizer_t *tkz,
2193
+ lxb_css_syntax_token_t *token,
2194
+ const lxb_char_t *data, const lxb_char_t *end)
2195
+ {
2196
+ data = lxb_css_syntax_state_consume_ident(tkz, token, data, end);
2197
+ if (data == NULL) {
2198
+ return NULL;
2199
+ }
2200
+
2201
+ if (data < end && *data == '(') {
2202
+ token->type = LXB_CSS_SYNTAX_TOKEN_FUNCTION;
2203
+
2204
+ lxb_css_syntax_token_base(token)->end = ++data;
2205
+
2206
+ return data;
2207
+ }
2208
+
2209
+ token->type = LXB_CSS_SYNTAX_TOKEN_IDENT;
2210
+
2211
+ return data;
2212
+ }
2213
+
2214
+ /*
2215
+ * URL
2216
+ */
2217
+ static const lxb_char_t *
2218
+ lxb_css_syntax_state_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
2219
+ const lxb_char_t *data, const lxb_char_t *end)
2220
+ {
2221
+ lxb_char_t ch;
2222
+ lxb_status_t status;
2223
+ const lxb_char_t *begin;
2224
+
2225
+ status = LXB_STATUS_OK;
2226
+
2227
+ *tkz->pos = 0x00;
2228
+
2229
+ begin = data;
2230
+
2231
+ do {
2232
+ if (data >= end) {
2233
+ if (begin < data) {
2234
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2235
+ }
2236
+
2237
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2238
+ if (data >= end) {
2239
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2240
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINUR);
2241
+
2242
+ token->type = LXB_CSS_SYNTAX_TOKEN_URL;
2243
+
2244
+ lxb_css_syntax_token_base(token)->end = data;
2245
+
2246
+ return lxb_css_syntax_state_string_set(tkz, token, data);
2247
+ }
2248
+
2249
+ begin = data;
2250
+ }
2251
+
2252
+ switch (*data) {
2253
+ /* U+0000 NULL (\0) */
2254
+ case 0x00:
2255
+ if (begin < data) {
2256
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2257
+ }
2258
+
2259
+ LXB_CSS_SYNTAX_STR_APPEND_LEN(tkz, status,
2260
+ lexbor_str_res_ansi_replacement_character,
2261
+ sizeof(lexbor_str_res_ansi_replacement_character) - 1);
2262
+ begin = data + 1;
2263
+ break;
2264
+
2265
+ /* U+0029 RIGHT PARENTHESIS ()) */
2266
+ case 0x29:
2267
+ if (begin < data) {
2268
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2269
+ }
2270
+
2271
+ token->type = LXB_CSS_SYNTAX_TOKEN_URL;
2272
+
2273
+ lxb_css_syntax_token_base(token)->end = ++data;
2274
+
2275
+ return lxb_css_syntax_state_string_set(tkz, token, data);
2276
+
2277
+ /*
2278
+ * U+0022 QUOTATION MARK (")
2279
+ * U+0027 APOSTROPHE (')
2280
+ * U+0028 LEFT PARENTHESIS (()
2281
+ * U+000B LINE TABULATION
2282
+ * U+007F DELETE
2283
+ */
2284
+ case 0x22:
2285
+ case 0x27:
2286
+ case 0x28:
2287
+ case 0x0B:
2288
+ case 0x7F:
2289
+ if (begin < data) {
2290
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2291
+ }
2292
+
2293
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2294
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_QOINUR);
2295
+
2296
+ return lxb_css_syntax_state_bad_url(tkz, token, data + 1, end);
2297
+
2298
+ /* U+005C REVERSE SOLIDUS (\) */
2299
+ case 0x5C:
2300
+ if (begin < data) {
2301
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2302
+ }
2303
+
2304
+ begin = ++data;
2305
+
2306
+ if (data == end) {
2307
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2308
+ if (data >= end) {
2309
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2310
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_WRESINUR);
2311
+
2312
+ token->type = LXB_CSS_SYNTAX_TOKEN_BAD_URL;
2313
+
2314
+ lxb_css_syntax_token_base(token)->end = begin;
2315
+
2316
+ return lxb_css_syntax_state_string_set(tkz, token, data);
2317
+ }
2318
+ }
2319
+
2320
+ ch = *data;
2321
+
2322
+ if (ch == 0x0A || ch == 0x0C || ch == 0x0D) {
2323
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2324
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_WRESINUR);
2325
+
2326
+ lxb_css_syntax_token_base(token)->end = data;
2327
+
2328
+ return lxb_css_syntax_state_bad_url(tkz, token, data, end);
2329
+ }
2330
+
2331
+ data = lxb_css_syntax_state_escaped(tkz, data, &end);
2332
+ if (data == NULL) {
2333
+ return NULL;
2334
+ }
2335
+
2336
+ begin = data--;
2337
+
2338
+ break;
2339
+
2340
+ /*
2341
+ * U+0009 CHARACTER TABULATION (tab)
2342
+ * U+000A LINE FEED (LF)
2343
+ * U+000C FORM FEED (FF)
2344
+ * U+000D CARRIAGE RETURN (CR)
2345
+ * U+0020 SPACE
2346
+ */
2347
+ case 0x09:
2348
+ case 0x0A:
2349
+ case 0x0C:
2350
+ case 0x0D:
2351
+ case 0x20:
2352
+ if (begin < data) {
2353
+ LXB_CSS_SYNTAX_STR_APPEND(tkz, status, begin, data);
2354
+ }
2355
+
2356
+ lxb_css_syntax_token_base(token)->end = data;
2357
+
2358
+ begin = ++data;
2359
+
2360
+ do {
2361
+ if (data == end) {
2362
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2363
+ if (data >= end) {
2364
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2365
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_EOINUR);
2366
+
2367
+ token->type = LXB_CSS_SYNTAX_TOKEN_BAD_URL;
2368
+
2369
+ lxb_css_syntax_token_base(token)->end = begin;
2370
+
2371
+ return lxb_css_syntax_state_string_set(tkz, token, data);
2372
+ }
2373
+ }
2374
+
2375
+ ch = *data;
2376
+
2377
+ if (lexbor_utils_whitespace(ch, !=, &&)) {
2378
+ /* U+0029 RIGHT PARENTHESIS ()) */
2379
+ if (*data == 0x29) {
2380
+ token->type = LXB_CSS_SYNTAX_TOKEN_URL;
2381
+
2382
+ lxb_css_syntax_token_base(token)->end = ++data;
2383
+
2384
+ return lxb_css_syntax_state_string_set(tkz, token, data);
2385
+ }
2386
+
2387
+ return lxb_css_syntax_state_bad_url(tkz, token,
2388
+ data, end);
2389
+ }
2390
+
2391
+ data++;
2392
+ }
2393
+ while (true);
2394
+
2395
+ default:
2396
+ /*
2397
+ * Inclusive:
2398
+ * U+0000 NULL and U+0008 BACKSPACE or
2399
+ * U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE
2400
+ */
2401
+ if ((*data >= 0x00 && *data <= 0x08)
2402
+ || (*data >= 0x0E && *data <= 0x1F))
2403
+ {
2404
+ lxb_css_syntax_tokenizer_error_add(tkz->parse_errors, data,
2405
+ LXB_CSS_SYNTAX_TOKENIZER_ERROR_QOINUR);
2406
+
2407
+ return lxb_css_syntax_state_bad_url(tkz, token,
2408
+ data + 1, end);
2409
+ }
2410
+
2411
+ break;
2412
+ }
2413
+
2414
+ data++;
2415
+ }
2416
+ while (true);
2417
+
2418
+ return data;
2419
+ }
2420
+
2421
+ /*
2422
+ * Bad URL
2423
+ */
2424
+ static const lxb_char_t *
2425
+ lxb_css_syntax_state_bad_url(lxb_css_syntax_tokenizer_t *tkz, lxb_css_syntax_token_t *token,
2426
+ const lxb_char_t *data, const lxb_char_t *end)
2427
+ {
2428
+ lxb_status_t status;
2429
+
2430
+ token->type = LXB_CSS_SYNTAX_TOKEN_BAD_URL;
2431
+
2432
+ if(lxb_css_syntax_state_string_set(tkz, token, data) == NULL) {
2433
+ return NULL;
2434
+ }
2435
+
2436
+ do {
2437
+ if (data >= end) {
2438
+ LXB_CSS_SYNTAX_NEXT_CHUNK(tkz, status, data, end);
2439
+ if (data >= end) {
2440
+ lxb_css_syntax_token_base(token)->end = data;
2441
+ return data;
2442
+ }
2443
+ }
2444
+
2445
+ /* U+0029 RIGHT PARENTHESIS ()) */
2446
+ if (*data == 0x29) {
2447
+ lxb_css_syntax_token_base(token)->end = ++data;
2448
+ return data;
2449
+ }
2450
+ /* U+005C REVERSE SOLIDUS (\) */
2451
+ else if (*data == 0x5C) {
2452
+ data++;
2453
+ }
2454
+
2455
+ data++;
2456
+ }
2457
+ while (true);
2458
+
2459
+ return data;
2460
+ }
2461
+
2462
+ lxb_inline lxb_status_t
2463
+ lxb_css_syntax_string_append_rep(lxb_css_syntax_tokenizer_t *tkz)
2464
+ {
2465
+ return lxb_css_syntax_string_append(tkz, lexbor_str_res_ansi_replacement_character,
2466
+ sizeof(lexbor_str_res_ansi_replacement_character) - 1);
2467
+ }
2468
+
2469
+ static const lxb_char_t *
2470
+ lxb_css_syntax_state_escaped(lxb_css_syntax_tokenizer_t *tkz,
2471
+ const lxb_char_t *data, const lxb_char_t **end)
2472
+ {
2473
+ uint32_t cp;
2474
+ unsigned count;
2475
+ lxb_status_t status;
2476
+
2477
+ cp = 0;
2478
+
2479
+ for (count = 0; count < 6; count++, data++) {
2480
+ if (data >= *end) {
2481
+ status = lxb_css_syntax_tokenizer_next_chunk(tkz, &data, end);
2482
+ if (status != LXB_STATUS_OK) {
2483
+ return NULL;
2484
+ }
2485
+
2486
+ if (data >= *end) {
2487
+ if (count == 0) {
2488
+ return *end;
2489
+ }
2490
+
2491
+ break;
2492
+ }
2493
+ }
2494
+
2495
+ if (lexbor_str_res_map_hex[*data] == 0xFF) {
2496
+ if (count == 0) {
2497
+ if (*data == 0x00) {
2498
+ status = lxb_css_syntax_string_append_rep(tkz);
2499
+ if (status != LXB_STATUS_OK) {
2500
+ return NULL;
2501
+ }
2502
+
2503
+ return data + 1;
2504
+ }
2505
+
2506
+ status = lxb_css_syntax_string_append(tkz, data, 1);
2507
+ if (status != LXB_STATUS_OK) {
2508
+ return NULL;
2509
+ }
2510
+
2511
+ return data + 1;
2512
+ }
2513
+
2514
+ switch (*data) {
2515
+ case 0x0D:
2516
+ data++;
2517
+
2518
+ status = lxb_css_syntax_tokenizer_next_chunk(tkz, &data,
2519
+ end);
2520
+ if (status != LXB_STATUS_OK) {
2521
+ return NULL;
2522
+ }
2523
+
2524
+ if (data >= *end) {
2525
+ break;
2526
+ }
2527
+
2528
+ if (*data == 0x0A) {
2529
+ data++;
2530
+ }
2531
+
2532
+ break;
2533
+
2534
+ case 0x09:
2535
+ case 0x20:
2536
+ case 0x0A:
2537
+ case 0x0C:
2538
+ data++;
2539
+ break;
2540
+ }
2541
+
2542
+ break;
2543
+ }
2544
+
2545
+ cp <<= 4;
2546
+ cp |= lexbor_str_res_map_hex[*data];
2547
+ }
2548
+
2549
+ if ((tkz->end - tkz->pos) < 5) {
2550
+ if (lxb_css_syntax_string_realloc(tkz, 1024) != LXB_STATUS_OK) {
2551
+ return NULL;
2552
+ }
2553
+ }
2554
+
2555
+ lxb_css_syntax_codepoint_to_ascii(tkz, cp);
2556
+
2557
+ return data;
2558
+ }
2559
+
2560
+ static const lxb_char_t *
2561
+ lxb_css_syntax_state_escaped_string(lxb_css_syntax_tokenizer_t *tkz,
2562
+ const lxb_char_t *data, const lxb_char_t **end)
2563
+ {
2564
+ lxb_status_t status;
2565
+
2566
+ /* U+000D CARRIAGE RETURN */
2567
+ if (*data == 0x0D) {
2568
+ data++;
2569
+
2570
+ if (data >= *end) {
2571
+ status = lxb_css_syntax_tokenizer_next_chunk(tkz, &data, end);
2572
+ if (status != LXB_STATUS_OK) {
2573
+ return NULL;
2574
+ }
2575
+
2576
+ if (data >= *end) {
2577
+ return data;
2578
+ }
2579
+ }
2580
+
2581
+ /* U+000A LINE FEED */
2582
+ if (*data == 0x0A) {
2583
+ data++;
2584
+ }
2585
+
2586
+ return data;
2587
+ }
2588
+
2589
+ if (*data == 0x00) {
2590
+ status = lxb_css_syntax_string_append_rep(tkz);
2591
+ if (status != LXB_STATUS_OK) {
2592
+ return NULL;
2593
+ }
2594
+
2595
+ return data + 1;
2596
+ }
2597
+
2598
+ if (*data == 0x0A || *data == 0x0C) {
2599
+ return data + 1;
2600
+ }
2601
+
2602
+ return lxb_css_syntax_state_escaped(tkz, data, end);
2603
+ }