nokogiri 1.9.1 → 1.15.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +45 -0
  3. data/LICENSE-DEPENDENCIES.md +1636 -1024
  4. data/LICENSE.md +5 -28
  5. data/README.md +203 -89
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -61
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +864 -418
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +215 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +17 -17
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +40 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +401 -240
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +135 -61
  33. data/ext/nokogiri/xml_node.c +1346 -677
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +347 -212
  37. data/ext/nokogiri/xml_relax_ng.c +86 -77
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +145 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +64 -36
  41. data/ext/nokogiri/xml_schema.c +138 -81
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +36 -26
  44. data/ext/nokogiri/xml_xpath_context.c +366 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +111 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +224 -95
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +392 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +223 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +98 -54
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -126
  133. data/lib/nokogiri/xml/document_fragment.rb +93 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +45 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1088 -418
  142. data/lib/nokogiri/xml/node_set.rb +173 -63
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +38 -37
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  169. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  170. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  175. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
  177. metadata +128 -265
  178. data/ext/nokogiri/html_document.c +0 -170
  179. data/ext/nokogiri/html_document.h +0 -10
  180. data/ext/nokogiri/html_element_description.c +0 -279
  181. data/ext/nokogiri/html_element_description.h +0 -10
  182. data/ext/nokogiri/html_entity_lookup.c +0 -32
  183. data/ext/nokogiri/html_entity_lookup.h +0 -8
  184. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  185. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  186. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  187. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  188. data/ext/nokogiri/xml_attr.h +0 -9
  189. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  190. data/ext/nokogiri/xml_cdata.h +0 -9
  191. data/ext/nokogiri/xml_comment.h +0 -9
  192. data/ext/nokogiri/xml_document.h +0 -23
  193. data/ext/nokogiri/xml_document_fragment.h +0 -10
  194. data/ext/nokogiri/xml_dtd.h +0 -10
  195. data/ext/nokogiri/xml_element_content.h +0 -10
  196. data/ext/nokogiri/xml_element_decl.h +0 -9
  197. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  198. data/ext/nokogiri/xml_entity_decl.h +0 -10
  199. data/ext/nokogiri/xml_entity_reference.h +0 -9
  200. data/ext/nokogiri/xml_io.c +0 -61
  201. data/ext/nokogiri/xml_io.h +0 -11
  202. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  203. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  204. data/ext/nokogiri/xml_namespace.h +0 -14
  205. data/ext/nokogiri/xml_node.h +0 -13
  206. data/ext/nokogiri/xml_node_set.h +0 -12
  207. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  208. data/ext/nokogiri/xml_reader.h +0 -10
  209. data/ext/nokogiri/xml_relax_ng.h +0 -9
  210. data/ext/nokogiri/xml_sax_parser.h +0 -39
  211. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  212. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  213. data/ext/nokogiri/xml_schema.h +0 -9
  214. data/ext/nokogiri/xml_syntax_error.h +0 -13
  215. data/ext/nokogiri/xml_text.h +0 -9
  216. data/ext/nokogiri/xml_xpath_context.h +0 -10
  217. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  218. data/lib/nokogiri/html/document.rb +0 -335
  219. data/lib/nokogiri/html/document_fragment.rb +0 -49
  220. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  221. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  222. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  223. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  224. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  225. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  226. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
@@ -0,0 +1,170 @@
1
+ %{
2
+ #include "tag_lookup.h"
3
+ #include "macros.h"
4
+ #include "ascii.h"
5
+ %}
6
+
7
+ %ignore-case
8
+ %struct-type
9
+ %omit-struct-type
10
+ %compare-lengths
11
+ %readonly-tables
12
+ %null-strings
13
+ %includes
14
+ %define lookup-function-name gumbo_tag_lookup
15
+ %define slot-name key
16
+ %define initializer-suffix ,GUMBO_TAG_UNKNOWN
17
+ TagHashSlot;
18
+
19
+ %%
20
+ html, GUMBO_TAG_HTML
21
+ head, GUMBO_TAG_HEAD
22
+ title, GUMBO_TAG_TITLE
23
+ base, GUMBO_TAG_BASE
24
+ link, GUMBO_TAG_LINK
25
+ meta, GUMBO_TAG_META
26
+ style, GUMBO_TAG_STYLE
27
+ script, GUMBO_TAG_SCRIPT
28
+ noscript, GUMBO_TAG_NOSCRIPT
29
+ template, GUMBO_TAG_TEMPLATE
30
+ body, GUMBO_TAG_BODY
31
+ article, GUMBO_TAG_ARTICLE
32
+ section, GUMBO_TAG_SECTION
33
+ nav, GUMBO_TAG_NAV
34
+ aside, GUMBO_TAG_ASIDE
35
+ h1, GUMBO_TAG_H1
36
+ h2, GUMBO_TAG_H2
37
+ h3, GUMBO_TAG_H3
38
+ h4, GUMBO_TAG_H4
39
+ h5, GUMBO_TAG_H5
40
+ h6, GUMBO_TAG_H6
41
+ hgroup, GUMBO_TAG_HGROUP
42
+ header, GUMBO_TAG_HEADER
43
+ footer, GUMBO_TAG_FOOTER
44
+ address, GUMBO_TAG_ADDRESS
45
+ p, GUMBO_TAG_P
46
+ hr, GUMBO_TAG_HR
47
+ pre, GUMBO_TAG_PRE
48
+ blockquote, GUMBO_TAG_BLOCKQUOTE
49
+ ol, GUMBO_TAG_OL
50
+ ul, GUMBO_TAG_UL
51
+ li, GUMBO_TAG_LI
52
+ dl, GUMBO_TAG_DL
53
+ dt, GUMBO_TAG_DT
54
+ dd, GUMBO_TAG_DD
55
+ figure, GUMBO_TAG_FIGURE
56
+ figcaption, GUMBO_TAG_FIGCAPTION
57
+ main, GUMBO_TAG_MAIN
58
+ div, GUMBO_TAG_DIV
59
+ a, GUMBO_TAG_A
60
+ em, GUMBO_TAG_EM
61
+ strong, GUMBO_TAG_STRONG
62
+ small, GUMBO_TAG_SMALL
63
+ s, GUMBO_TAG_S
64
+ cite, GUMBO_TAG_CITE
65
+ q, GUMBO_TAG_Q
66
+ dfn, GUMBO_TAG_DFN
67
+ abbr, GUMBO_TAG_ABBR
68
+ data, GUMBO_TAG_DATA
69
+ time, GUMBO_TAG_TIME
70
+ code, GUMBO_TAG_CODE
71
+ var, GUMBO_TAG_VAR
72
+ samp, GUMBO_TAG_SAMP
73
+ kbd, GUMBO_TAG_KBD
74
+ sub, GUMBO_TAG_SUB
75
+ sup, GUMBO_TAG_SUP
76
+ i, GUMBO_TAG_I
77
+ b, GUMBO_TAG_B
78
+ u, GUMBO_TAG_U
79
+ mark, GUMBO_TAG_MARK
80
+ ruby, GUMBO_TAG_RUBY
81
+ rt, GUMBO_TAG_RT
82
+ rp, GUMBO_TAG_RP
83
+ bdi, GUMBO_TAG_BDI
84
+ bdo, GUMBO_TAG_BDO
85
+ span, GUMBO_TAG_SPAN
86
+ br, GUMBO_TAG_BR
87
+ wbr, GUMBO_TAG_WBR
88
+ ins, GUMBO_TAG_INS
89
+ del, GUMBO_TAG_DEL
90
+ image, GUMBO_TAG_IMAGE
91
+ img, GUMBO_TAG_IMG
92
+ iframe, GUMBO_TAG_IFRAME
93
+ embed, GUMBO_TAG_EMBED
94
+ object, GUMBO_TAG_OBJECT
95
+ param, GUMBO_TAG_PARAM
96
+ video, GUMBO_TAG_VIDEO
97
+ audio, GUMBO_TAG_AUDIO
98
+ source, GUMBO_TAG_SOURCE
99
+ track, GUMBO_TAG_TRACK
100
+ canvas, GUMBO_TAG_CANVAS
101
+ map, GUMBO_TAG_MAP
102
+ area, GUMBO_TAG_AREA
103
+ math, GUMBO_TAG_MATH
104
+ mi, GUMBO_TAG_MI
105
+ mo, GUMBO_TAG_MO
106
+ mn, GUMBO_TAG_MN
107
+ ms, GUMBO_TAG_MS
108
+ mtext, GUMBO_TAG_MTEXT
109
+ mglyph, GUMBO_TAG_MGLYPH
110
+ malignmark, GUMBO_TAG_MALIGNMARK
111
+ annotation-xml, GUMBO_TAG_ANNOTATION_XML
112
+ svg, GUMBO_TAG_SVG
113
+ foreignobject, GUMBO_TAG_FOREIGNOBJECT
114
+ desc, GUMBO_TAG_DESC
115
+ table, GUMBO_TAG_TABLE
116
+ caption, GUMBO_TAG_CAPTION
117
+ colgroup, GUMBO_TAG_COLGROUP
118
+ col, GUMBO_TAG_COL
119
+ tbody, GUMBO_TAG_TBODY
120
+ thead, GUMBO_TAG_THEAD
121
+ tfoot, GUMBO_TAG_TFOOT
122
+ tr, GUMBO_TAG_TR
123
+ td, GUMBO_TAG_TD
124
+ th, GUMBO_TAG_TH
125
+ form, GUMBO_TAG_FORM
126
+ fieldset, GUMBO_TAG_FIELDSET
127
+ legend, GUMBO_TAG_LEGEND
128
+ label, GUMBO_TAG_LABEL
129
+ input, GUMBO_TAG_INPUT
130
+ button, GUMBO_TAG_BUTTON
131
+ select, GUMBO_TAG_SELECT
132
+ datalist, GUMBO_TAG_DATALIST
133
+ optgroup, GUMBO_TAG_OPTGROUP
134
+ option, GUMBO_TAG_OPTION
135
+ textarea, GUMBO_TAG_TEXTAREA
136
+ keygen, GUMBO_TAG_KEYGEN
137
+ output, GUMBO_TAG_OUTPUT
138
+ progress, GUMBO_TAG_PROGRESS
139
+ meter, GUMBO_TAG_METER
140
+ details, GUMBO_TAG_DETAILS
141
+ summary, GUMBO_TAG_SUMMARY
142
+ menu, GUMBO_TAG_MENU
143
+ menuitem, GUMBO_TAG_MENUITEM
144
+ applet, GUMBO_TAG_APPLET
145
+ acronym, GUMBO_TAG_ACRONYM
146
+ bgsound, GUMBO_TAG_BGSOUND
147
+ dir, GUMBO_TAG_DIR
148
+ frame, GUMBO_TAG_FRAME
149
+ frameset, GUMBO_TAG_FRAMESET
150
+ noframes, GUMBO_TAG_NOFRAMES
151
+ listing, GUMBO_TAG_LISTING
152
+ xmp, GUMBO_TAG_XMP
153
+ nextid, GUMBO_TAG_NEXTID
154
+ noembed, GUMBO_TAG_NOEMBED
155
+ plaintext, GUMBO_TAG_PLAINTEXT
156
+ rb, GUMBO_TAG_RB
157
+ strike, GUMBO_TAG_STRIKE
158
+ basefont, GUMBO_TAG_BASEFONT
159
+ big, GUMBO_TAG_BIG
160
+ blink, GUMBO_TAG_BLINK
161
+ center, GUMBO_TAG_CENTER
162
+ font, GUMBO_TAG_FONT
163
+ marquee, GUMBO_TAG_MARQUEE
164
+ multicol, GUMBO_TAG_MULTICOL
165
+ nobr, GUMBO_TAG_NOBR
166
+ spacer, GUMBO_TAG_SPACER
167
+ tt, GUMBO_TAG_TT
168
+ rtc, GUMBO_TAG_RTC
169
+ dialog, GUMBO_TAG_DIALOG
170
+ search, GUMBO_TAG_SEARCH
@@ -0,0 +1,13 @@
1
+ #ifndef GUMBO_TAG_LOOKUP_H_
2
+ #define GUMBO_TAG_LOOKUP_H_
3
+
4
+ #include "nokogiri_gumbo.h"
5
+
6
+ typedef struct {
7
+ const char *key;
8
+ const GumboTag tag;
9
+ } TagHashSlot;
10
+
11
+ const TagHashSlot *gumbo_tag_lookup(const char *str, size_t len);
12
+
13
+ #endif // GUMBO_TAG_LOOKUP_H_
@@ -0,0 +1,79 @@
1
+ /*
2
+ Copyright 2018 Stephen Checkoway
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ */
16
+
17
+ #include <assert.h>
18
+
19
+ #include "ascii.h"
20
+ #include "token_buffer.h"
21
+ #include "tokenizer.h"
22
+ #include "util.h"
23
+
24
+ struct GumboInternalCharacterToken {
25
+ GumboSourcePosition position;
26
+ GumboStringPiece original_text;
27
+ int c;
28
+ };
29
+
30
+ void gumbo_character_token_buffer_init(GumboCharacterTokenBuffer* buffer) {
31
+ buffer->data = NULL;
32
+ buffer->length = 0;
33
+ buffer->capacity = 0;
34
+ }
35
+
36
+ void gumbo_character_token_buffer_append (
37
+ const GumboToken* token,
38
+ GumboCharacterTokenBuffer* buffer
39
+ ) {
40
+ assert(token->type == GUMBO_TOKEN_WHITESPACE
41
+ || token->type == GUMBO_TOKEN_CHARACTER);
42
+ if (buffer->length == buffer->capacity) {
43
+ if (buffer->capacity == 0)
44
+ buffer->capacity = 10;
45
+ else
46
+ buffer->capacity *= 2;
47
+ size_t bytes = sizeof(*buffer->data) * buffer->capacity;
48
+ buffer->data = gumbo_realloc(buffer->data, bytes);
49
+ }
50
+ size_t index = buffer->length++;
51
+ buffer->data[index].position = token->position;
52
+ buffer->data[index].original_text = token->original_text;
53
+ buffer->data[index].c = token->v.character;
54
+ }
55
+
56
+ void gumbo_character_token_buffer_get (
57
+ const GumboCharacterTokenBuffer* buffer,
58
+ size_t index,
59
+ struct GumboInternalToken* output
60
+ ) {
61
+ assert(index < buffer->length);
62
+ int c = buffer->data[index].c;
63
+ output->type = gumbo_ascii_isspace(c)?
64
+ GUMBO_TOKEN_WHITESPACE : GUMBO_TOKEN_CHARACTER;
65
+ output->position = buffer->data[index].position;
66
+ output->original_text = buffer->data[index].original_text;
67
+ output->v.character = c;
68
+ }
69
+
70
+ void gumbo_character_token_buffer_clear(GumboCharacterTokenBuffer* buffer) {
71
+ buffer->length = 0;
72
+ }
73
+
74
+ void gumbo_character_token_buffer_destroy(GumboCharacterTokenBuffer* buffer) {
75
+ gumbo_free(buffer->data);
76
+ buffer->data = NULL;
77
+ buffer->length = 0;
78
+ buffer->capacity = 0;
79
+ }
@@ -0,0 +1,71 @@
1
+ /*
2
+ Copyright 2018 Stephen Checkoway
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ */
16
+
17
+ #ifndef GUMBO_TOKEN_BUFFER_H
18
+ #define GUMBO_TOKEN_BUFFER_H
19
+
20
+ #include <stdbool.h>
21
+ #include <stddef.h>
22
+
23
+ #include "nokogiri_gumbo.h"
24
+
25
+ #ifdef __cplusplus
26
+ extern "C" {
27
+ #endif
28
+
29
+ struct GumboInternalCharacterToken;
30
+ struct GumboInternalToken;
31
+
32
+ // A struct representing a growable sequence of character (and whitespace)
33
+ // tokens.
34
+ typedef struct {
35
+ // A pointer to the start of the sequence.
36
+ struct GumboInternalCharacterToken* data;
37
+
38
+ // The length of the sequence.
39
+ size_t length;
40
+
41
+ // The capacity of the buffer.
42
+ size_t capacity;
43
+ } GumboCharacterTokenBuffer;
44
+
45
+ // Initializes a new GumboCharacterTokenBuffer.
46
+ void gumbo_character_token_buffer_init(GumboCharacterTokenBuffer* buffer);
47
+
48
+ // Appends a character (or whitespace) token.
49
+ void gumbo_character_token_buffer_append (
50
+ const struct GumboInternalToken* token,
51
+ GumboCharacterTokenBuffer* buffer
52
+ );
53
+
54
+ void gumbo_character_token_buffer_get (
55
+ const GumboCharacterTokenBuffer* buffer,
56
+ size_t index,
57
+ struct GumboInternalToken* output
58
+ );
59
+
60
+ // Reinitialize this string buffer. This clears it by setting length=0. It
61
+ // does not zero out the buffer itself.
62
+ void gumbo_character_token_buffer_clear(GumboCharacterTokenBuffer* buffer);
63
+
64
+ // Deallocates this GumboCharacterTokenBuffer.
65
+ void gumbo_character_token_buffer_destroy(GumboCharacterTokenBuffer* buffer);
66
+
67
+ #ifdef __cplusplus
68
+ }
69
+ #endif
70
+
71
+ #endif // GUMBO_TOKEN_BUFFER_H
@@ -0,0 +1,17 @@
1
+ #ifndef GUMBO_TOKEN_TYPE_H_
2
+ #define GUMBO_TOKEN_TYPE_H_
3
+
4
+ // An enum representing the type of token.
5
+ typedef enum {
6
+ GUMBO_TOKEN_DOCTYPE,
7
+ GUMBO_TOKEN_START_TAG,
8
+ GUMBO_TOKEN_END_TAG,
9
+ GUMBO_TOKEN_COMMENT,
10
+ GUMBO_TOKEN_WHITESPACE,
11
+ GUMBO_TOKEN_CHARACTER,
12
+ GUMBO_TOKEN_CDATA,
13
+ GUMBO_TOKEN_NULL,
14
+ GUMBO_TOKEN_EOF
15
+ } GumboTokenType;
16
+
17
+ #endif // GUMBO_TOKEN_TYPE_H_