nokogiri-backport 1.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (239) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1682 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +272 -0
  6. data/bin/nokogiri +118 -0
  7. data/dependencies.yml +74 -0
  8. data/ext/java/nokogiri/EncodingHandler.java +124 -0
  9. data/ext/java/nokogiri/HtmlDocument.java +178 -0
  10. data/ext/java/nokogiri/HtmlElementDescription.java +148 -0
  11. data/ext/java/nokogiri/HtmlEntityLookup.java +79 -0
  12. data/ext/java/nokogiri/HtmlSaxParserContext.java +282 -0
  13. data/ext/java/nokogiri/HtmlSaxPushParser.java +222 -0
  14. data/ext/java/nokogiri/NokogiriService.java +597 -0
  15. data/ext/java/nokogiri/XmlAttr.java +162 -0
  16. data/ext/java/nokogiri/XmlAttributeDecl.java +129 -0
  17. data/ext/java/nokogiri/XmlCdata.java +82 -0
  18. data/ext/java/nokogiri/XmlComment.java +97 -0
  19. data/ext/java/nokogiri/XmlDocument.java +633 -0
  20. data/ext/java/nokogiri/XmlDocumentFragment.java +185 -0
  21. data/ext/java/nokogiri/XmlDtd.java +481 -0
  22. data/ext/java/nokogiri/XmlElement.java +68 -0
  23. data/ext/java/nokogiri/XmlElementContent.java +382 -0
  24. data/ext/java/nokogiri/XmlElementDecl.java +147 -0
  25. data/ext/java/nokogiri/XmlEntityDecl.java +157 -0
  26. data/ext/java/nokogiri/XmlEntityReference.java +101 -0
  27. data/ext/java/nokogiri/XmlNamespace.java +199 -0
  28. data/ext/java/nokogiri/XmlNode.java +1684 -0
  29. data/ext/java/nokogiri/XmlNodeSet.java +434 -0
  30. data/ext/java/nokogiri/XmlProcessingInstruction.java +100 -0
  31. data/ext/java/nokogiri/XmlReader.java +531 -0
  32. data/ext/java/nokogiri/XmlRelaxng.java +151 -0
  33. data/ext/java/nokogiri/XmlSaxParserContext.java +374 -0
  34. data/ext/java/nokogiri/XmlSaxPushParser.java +286 -0
  35. data/ext/java/nokogiri/XmlSchema.java +388 -0
  36. data/ext/java/nokogiri/XmlSyntaxError.java +138 -0
  37. data/ext/java/nokogiri/XmlText.java +110 -0
  38. data/ext/java/nokogiri/XmlXpathContext.java +301 -0
  39. data/ext/java/nokogiri/XsltStylesheet.java +347 -0
  40. data/ext/java/nokogiri/internals/ClosedStreamException.java +10 -0
  41. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +252 -0
  42. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +20 -0
  43. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +151 -0
  44. data/ext/java/nokogiri/internals/NokogiriDomParser.java +116 -0
  45. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +121 -0
  46. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +69 -0
  47. data/ext/java/nokogiri/internals/NokogiriHandler.java +327 -0
  48. data/ext/java/nokogiri/internals/NokogiriHelpers.java +734 -0
  49. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +217 -0
  50. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +127 -0
  51. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +100 -0
  52. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +121 -0
  53. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +78 -0
  54. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +180 -0
  55. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +72 -0
  56. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +60 -0
  57. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +87 -0
  58. data/ext/java/nokogiri/internals/ParserContext.java +259 -0
  59. data/ext/java/nokogiri/internals/ReaderNode.java +488 -0
  60. data/ext/java/nokogiri/internals/SaveContextVisitor.java +778 -0
  61. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +73 -0
  62. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +168 -0
  63. data/ext/java/nokogiri/internals/XmlDeclHandler.java +42 -0
  64. data/ext/java/nokogiri/internals/XmlDomParserContext.java +274 -0
  65. data/ext/java/nokogiri/internals/XmlSaxParser.java +65 -0
  66. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +119 -0
  67. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +159 -0
  68. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +37 -0
  69. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +93 -0
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +252 -0
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +639 -0
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +38 -0
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +38 -0
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +367 -0
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +295 -0
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +40 -0
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +44 -0
  78. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +44 -0
  79. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +43 -0
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +630 -0
  81. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +173 -0
  82. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +76 -0
  83. data/ext/java/nokogiri/internals/c14n/Constants.java +42 -0
  84. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +293 -0
  85. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +93 -0
  86. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +79 -0
  87. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +166 -0
  88. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +76 -0
  89. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +402 -0
  90. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +51 -0
  91. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +179 -0
  92. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +507 -0
  93. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1745 -0
  94. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +685 -0
  95. data/ext/nokogiri/depend +477 -0
  96. data/ext/nokogiri/extconf.rb +836 -0
  97. data/ext/nokogiri/html_document.c +171 -0
  98. data/ext/nokogiri/html_document.h +10 -0
  99. data/ext/nokogiri/html_element_description.c +279 -0
  100. data/ext/nokogiri/html_element_description.h +10 -0
  101. data/ext/nokogiri/html_entity_lookup.c +32 -0
  102. data/ext/nokogiri/html_entity_lookup.h +8 -0
  103. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  104. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  105. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  106. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  107. data/ext/nokogiri/nokogiri.c +135 -0
  108. data/ext/nokogiri/nokogiri.h +130 -0
  109. data/ext/nokogiri/xml_attr.c +103 -0
  110. data/ext/nokogiri/xml_attr.h +9 -0
  111. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  112. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  113. data/ext/nokogiri/xml_cdata.c +62 -0
  114. data/ext/nokogiri/xml_cdata.h +9 -0
  115. data/ext/nokogiri/xml_comment.c +69 -0
  116. data/ext/nokogiri/xml_comment.h +9 -0
  117. data/ext/nokogiri/xml_document.c +622 -0
  118. data/ext/nokogiri/xml_document.h +23 -0
  119. data/ext/nokogiri/xml_document_fragment.c +48 -0
  120. data/ext/nokogiri/xml_document_fragment.h +10 -0
  121. data/ext/nokogiri/xml_dtd.c +202 -0
  122. data/ext/nokogiri/xml_dtd.h +10 -0
  123. data/ext/nokogiri/xml_element_content.c +123 -0
  124. data/ext/nokogiri/xml_element_content.h +10 -0
  125. data/ext/nokogiri/xml_element_decl.c +69 -0
  126. data/ext/nokogiri/xml_element_decl.h +9 -0
  127. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  128. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  129. data/ext/nokogiri/xml_entity_decl.c +110 -0
  130. data/ext/nokogiri/xml_entity_decl.h +10 -0
  131. data/ext/nokogiri/xml_entity_reference.c +52 -0
  132. data/ext/nokogiri/xml_entity_reference.h +9 -0
  133. data/ext/nokogiri/xml_io.c +63 -0
  134. data/ext/nokogiri/xml_io.h +11 -0
  135. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  136. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  137. data/ext/nokogiri/xml_namespace.c +111 -0
  138. data/ext/nokogiri/xml_namespace.h +14 -0
  139. data/ext/nokogiri/xml_node.c +1773 -0
  140. data/ext/nokogiri/xml_node.h +13 -0
  141. data/ext/nokogiri/xml_node_set.c +486 -0
  142. data/ext/nokogiri/xml_node_set.h +12 -0
  143. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  144. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  145. data/ext/nokogiri/xml_reader.c +657 -0
  146. data/ext/nokogiri/xml_reader.h +10 -0
  147. data/ext/nokogiri/xml_relax_ng.c +179 -0
  148. data/ext/nokogiri/xml_relax_ng.h +9 -0
  149. data/ext/nokogiri/xml_sax_parser.c +305 -0
  150. data/ext/nokogiri/xml_sax_parser.h +39 -0
  151. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  152. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  153. data/ext/nokogiri/xml_sax_push_parser.c +159 -0
  154. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  155. data/ext/nokogiri/xml_schema.c +276 -0
  156. data/ext/nokogiri/xml_schema.h +9 -0
  157. data/ext/nokogiri/xml_syntax_error.c +64 -0
  158. data/ext/nokogiri/xml_syntax_error.h +13 -0
  159. data/ext/nokogiri/xml_text.c +52 -0
  160. data/ext/nokogiri/xml_text.h +9 -0
  161. data/ext/nokogiri/xml_xpath_context.c +374 -0
  162. data/ext/nokogiri/xml_xpath_context.h +10 -0
  163. data/ext/nokogiri/xslt_stylesheet.c +263 -0
  164. data/ext/nokogiri/xslt_stylesheet.h +14 -0
  165. data/lib/isorelax.jar +0 -0
  166. data/lib/jing.jar +0 -0
  167. data/lib/nekodtd.jar +0 -0
  168. data/lib/nekohtml.jar +0 -0
  169. data/lib/nokogiri/css/node.rb +53 -0
  170. data/lib/nokogiri/css/parser.rb +751 -0
  171. data/lib/nokogiri/css/parser.y +272 -0
  172. data/lib/nokogiri/css/parser_extras.rb +94 -0
  173. data/lib/nokogiri/css/syntax_error.rb +8 -0
  174. data/lib/nokogiri/css/tokenizer.rb +154 -0
  175. data/lib/nokogiri/css/tokenizer.rex +55 -0
  176. data/lib/nokogiri/css/xpath_visitor.rb +260 -0
  177. data/lib/nokogiri/css.rb +28 -0
  178. data/lib/nokogiri/decorators/slop.rb +43 -0
  179. data/lib/nokogiri/html/builder.rb +36 -0
  180. data/lib/nokogiri/html/document.rb +322 -0
  181. data/lib/nokogiri/html/document_fragment.rb +50 -0
  182. data/lib/nokogiri/html/element_description.rb +24 -0
  183. data/lib/nokogiri/html/element_description_defaults.rb +672 -0
  184. data/lib/nokogiri/html/entity_lookup.rb +14 -0
  185. data/lib/nokogiri/html/sax/parser.rb +63 -0
  186. data/lib/nokogiri/html/sax/parser_context.rb +17 -0
  187. data/lib/nokogiri/html/sax/push_parser.rb +37 -0
  188. data/lib/nokogiri/html.rb +38 -0
  189. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  190. data/lib/nokogiri/syntax_error.rb +5 -0
  191. data/lib/nokogiri/version/constant.rb +5 -0
  192. data/lib/nokogiri/version/info.rb +182 -0
  193. data/lib/nokogiri/version.rb +3 -0
  194. data/lib/nokogiri/xml/attr.rb +15 -0
  195. data/lib/nokogiri/xml/attribute_decl.rb +19 -0
  196. data/lib/nokogiri/xml/builder.rb +447 -0
  197. data/lib/nokogiri/xml/cdata.rb +12 -0
  198. data/lib/nokogiri/xml/character_data.rb +8 -0
  199. data/lib/nokogiri/xml/document.rb +290 -0
  200. data/lib/nokogiri/xml/document_fragment.rb +159 -0
  201. data/lib/nokogiri/xml/dtd.rb +33 -0
  202. data/lib/nokogiri/xml/element_content.rb +37 -0
  203. data/lib/nokogiri/xml/element_decl.rb +14 -0
  204. data/lib/nokogiri/xml/entity_decl.rb +20 -0
  205. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  206. data/lib/nokogiri/xml/namespace.rb +14 -0
  207. data/lib/nokogiri/xml/node/save_options.rb +62 -0
  208. data/lib/nokogiri/xml/node.rb +1240 -0
  209. data/lib/nokogiri/xml/node_set.rb +372 -0
  210. data/lib/nokogiri/xml/notation.rb +7 -0
  211. data/lib/nokogiri/xml/parse_options.rb +127 -0
  212. data/lib/nokogiri/xml/pp/character_data.rb +19 -0
  213. data/lib/nokogiri/xml/pp/node.rb +57 -0
  214. data/lib/nokogiri/xml/pp.rb +3 -0
  215. data/lib/nokogiri/xml/processing_instruction.rb +9 -0
  216. data/lib/nokogiri/xml/reader.rb +116 -0
  217. data/lib/nokogiri/xml/relax_ng.rb +37 -0
  218. data/lib/nokogiri/xml/sax/document.rb +172 -0
  219. data/lib/nokogiri/xml/sax/parser.rb +123 -0
  220. data/lib/nokogiri/xml/sax/parser_context.rb +17 -0
  221. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  222. data/lib/nokogiri/xml/sax.rb +5 -0
  223. data/lib/nokogiri/xml/schema.rb +72 -0
  224. data/lib/nokogiri/xml/searchable.rb +239 -0
  225. data/lib/nokogiri/xml/syntax_error.rb +71 -0
  226. data/lib/nokogiri/xml/text.rb +10 -0
  227. data/lib/nokogiri/xml/xpath/syntax_error.rb +12 -0
  228. data/lib/nokogiri/xml/xpath.rb +11 -0
  229. data/lib/nokogiri/xml/xpath_context.rb +17 -0
  230. data/lib/nokogiri/xml.rb +76 -0
  231. data/lib/nokogiri/xslt/stylesheet.rb +26 -0
  232. data/lib/nokogiri/xslt.rb +57 -0
  233. data/lib/nokogiri.rb +144 -0
  234. data/lib/serializer.jar +0 -0
  235. data/lib/xalan.jar +0 -0
  236. data/lib/xercesImpl.jar +0 -0
  237. data/lib/xml-apis.jar +0 -0
  238. data/lib/xsd/xmlparser/nokogiri.rb +103 -0
  239. metadata +531 -0
@@ -0,0 +1,272 @@
1
+ class Nokogiri::CSS::Parser
2
+
3
+ token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
4
+ token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
5
+ token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE HAS
6
+
7
+ rule
8
+ selector
9
+ : selector COMMA simple_selector_1toN {
10
+ result = [val.first, val.last].flatten
11
+ }
12
+ | prefixless_combinator_selector { result = val.flatten }
13
+ | optional_S simple_selector_1toN { result = [val.last].flatten }
14
+ ;
15
+ combinator
16
+ : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
17
+ | GREATER { result = :CHILD_SELECTOR }
18
+ | TILDE { result = :FOLLOWING_SELECTOR }
19
+ | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
20
+ | SLASH { result = :CHILD_SELECTOR }
21
+ ;
22
+ simple_selector
23
+ : element_name hcap_0toN {
24
+ result = if val[1].nil?
25
+ val.first
26
+ else
27
+ Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
28
+ end
29
+ }
30
+ | function
31
+ | function pseudo {
32
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
33
+ }
34
+ | function attrib {
35
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
36
+ }
37
+ | hcap_1toN {
38
+ result = Node.new(:CONDITIONAL_SELECTOR,
39
+ [Node.new(:ELEMENT_NAME, ['*']), val.first]
40
+ )
41
+ }
42
+ ;
43
+ prefixless_combinator_selector
44
+ : combinator simple_selector_1toN {
45
+ result = Node.new(val.first, [nil, val.last])
46
+ }
47
+ ;
48
+ simple_selector_1toN
49
+ : simple_selector combinator simple_selector_1toN {
50
+ result = Node.new(val[1], [val.first, val.last])
51
+ }
52
+ | simple_selector S simple_selector_1toN {
53
+ result = Node.new(:DESCENDANT_SELECTOR, [val.first, val.last])
54
+ }
55
+ | simple_selector
56
+ ;
57
+ class
58
+ : '.' IDENT { result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])]) }
59
+ ;
60
+ element_name
61
+ : namespaced_ident
62
+ | '*' { result = Node.new(:ELEMENT_NAME, val) }
63
+ ;
64
+ namespaced_ident
65
+ : namespace '|' IDENT {
66
+ result = Node.new(:ELEMENT_NAME,
67
+ [[val.first, val.last].compact.join(':')]
68
+ )
69
+ }
70
+ | IDENT {
71
+ name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first
72
+ result = Node.new(:ELEMENT_NAME, [name])
73
+ }
74
+ ;
75
+ namespace
76
+ : IDENT { result = val[0] }
77
+ |
78
+ ;
79
+ attrib
80
+ : LSQUARE attrib_name attrib_val_0or1 RSQUARE {
81
+ result = Node.new(:ATTRIBUTE_CONDITION,
82
+ [val[1]] + (val[2] || [])
83
+ )
84
+ }
85
+ | LSQUARE function attrib_val_0or1 RSQUARE {
86
+ result = Node.new(:ATTRIBUTE_CONDITION,
87
+ [val[1]] + (val[2] || [])
88
+ )
89
+ }
90
+ | LSQUARE NUMBER RSQUARE {
91
+ # non-standard, from hpricot
92
+ result = Node.new(:PSEUDO_CLASS,
93
+ [Node.new(:FUNCTION, ['nth-child(', val[1]])]
94
+ )
95
+ }
96
+ ;
97
+ attrib_name
98
+ : namespace '|' IDENT {
99
+ result = Node.new(:ELEMENT_NAME,
100
+ [[val.first, val.last].compact.join(':')]
101
+ )
102
+ }
103
+ | IDENT {
104
+ # Default namespace is not applied to attributes.
105
+ # So we don't add prefix "xmlns:" as in namespaced_ident.
106
+ result = Node.new(:ELEMENT_NAME, [val.first])
107
+ }
108
+ ;
109
+ function
110
+ : FUNCTION RPAREN {
111
+ result = Node.new(:FUNCTION, [val.first.strip])
112
+ }
113
+ | FUNCTION expr RPAREN {
114
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
115
+ }
116
+ | FUNCTION nth RPAREN {
117
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
118
+ }
119
+ | NOT expr RPAREN {
120
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
121
+ }
122
+ | HAS selector RPAREN {
123
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
124
+ }
125
+ ;
126
+ expr
127
+ : NUMBER COMMA expr { result = [val.first, val.last] }
128
+ | STRING COMMA expr { result = [val.first, val.last] }
129
+ | IDENT COMMA expr { result = [val.first, val.last] }
130
+ | NUMBER
131
+ | STRING
132
+ | IDENT # even, odd
133
+ {
134
+ case val[0]
135
+ when 'even'
136
+ result = Node.new(:NTH, ['2','n','+','0'])
137
+ when 'odd'
138
+ result = Node.new(:NTH, ['2','n','+','1'])
139
+ when 'n'
140
+ result = Node.new(:NTH, ['1','n','+','0'])
141
+ else
142
+ # non-standard to support custom functions:
143
+ # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
144
+ # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
145
+ # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
146
+ result = val
147
+ end
148
+ }
149
+ ;
150
+ nth
151
+ : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
152
+ {
153
+ if val[1] == 'n'
154
+ result = Node.new(:NTH, val)
155
+ else
156
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
157
+ end
158
+ }
159
+ | IDENT PLUS NUMBER { # n+3, -n+3
160
+ if val[0] == 'n'
161
+ val.unshift("1")
162
+ result = Node.new(:NTH, val)
163
+ elsif val[0] == '-n'
164
+ val[0] = 'n'
165
+ val.unshift("-1")
166
+ result = Node.new(:NTH, val)
167
+ else
168
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
169
+ end
170
+ }
171
+ | NUMBER IDENT { # 5n, -5n, 10n-1
172
+ n = val[1]
173
+ if n[0, 2] == 'n-'
174
+ val[1] = 'n'
175
+ val << "-"
176
+ # b is contained in n as n is the string "n-b"
177
+ val << n[2, n.size]
178
+ result = Node.new(:NTH, val)
179
+ elsif n == 'n'
180
+ val << "+"
181
+ val << "0"
182
+ result = Node.new(:NTH, val)
183
+ else
184
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
185
+ end
186
+ }
187
+ ;
188
+ pseudo
189
+ : ':' function {
190
+ result = Node.new(:PSEUDO_CLASS, [val[1]])
191
+ }
192
+ | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
193
+ ;
194
+ hcap_0toN
195
+ : hcap_1toN
196
+ |
197
+ ;
198
+ hcap_1toN
199
+ : attribute_id hcap_1toN {
200
+ result = Node.new(:COMBINATOR, val)
201
+ }
202
+ | class hcap_1toN {
203
+ result = Node.new(:COMBINATOR, val)
204
+ }
205
+ | attrib hcap_1toN {
206
+ result = Node.new(:COMBINATOR, val)
207
+ }
208
+ | pseudo hcap_1toN {
209
+ result = Node.new(:COMBINATOR, val)
210
+ }
211
+ | negation hcap_1toN {
212
+ result = Node.new(:COMBINATOR, val)
213
+ }
214
+ | attribute_id
215
+ | class
216
+ | attrib
217
+ | pseudo
218
+ | negation
219
+ ;
220
+ attribute_id
221
+ : HASH { result = Node.new(:ID, [unescape_css_identifier(val.first)]) }
222
+ ;
223
+ attrib_val_0or1
224
+ : eql_incl_dash IDENT { result = [val.first, unescape_css_identifier(val[1])] }
225
+ | eql_incl_dash STRING { result = [val.first, unescape_css_string(val[1])] }
226
+ | eql_incl_dash NUMBER { result = [val.first, val[1]] }
227
+ |
228
+ ;
229
+ eql_incl_dash
230
+ : EQUAL { result = :equal }
231
+ | PREFIXMATCH { result = :prefix_match }
232
+ | SUFFIXMATCH { result = :suffix_match }
233
+ | SUBSTRINGMATCH { result = :substring_match }
234
+ | NOT_EQUAL { result = :not_equal }
235
+ | INCLUDES { result = :includes }
236
+ | DASHMATCH { result = :dash_match }
237
+ ;
238
+ negation
239
+ : NOT negation_arg RPAREN {
240
+ result = Node.new(:NOT, [val[1]])
241
+ }
242
+ ;
243
+ negation_arg
244
+ : element_name
245
+ | element_name hcap_1toN
246
+ | hcap_1toN
247
+ ;
248
+ optional_S
249
+ : S
250
+ |
251
+ ;
252
+ end
253
+
254
+ ---- header
255
+
256
+ require 'nokogiri/css/parser_extras'
257
+
258
+ ---- inner
259
+
260
+ def unescape_css_identifier(identifier)
261
+ identifier.gsub(/\\(?:([^0-9a-fA-F])|([0-9a-fA-F]{1,6})\s?)/){ |m| $1 || [$2.hex].pack('U') }
262
+ end
263
+
264
+ def unescape_css_string(str)
265
+ str.gsub(/\\(?:([^0-9a-fA-F])|([0-9a-fA-F]{1,6})\s?)/) do |m|
266
+ if $1=="\n"
267
+ ''
268
+ else
269
+ $1 || [$2.hex].pack('U')
270
+ end
271
+ end
272
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+ require "thread"
3
+
4
+ module Nokogiri
5
+ module CSS
6
+ class Parser < Racc::Parser
7
+ CACHE_SWITCH_NAME = :nokogiri_css_parser_cache_is_off
8
+
9
+ @cache = {}
10
+ @mutex = Mutex.new
11
+
12
+ class << self
13
+ # Return a thread-local boolean indicating whether the CSS-to-XPath cache is active. (Default is `true`.)
14
+ def cache_on?
15
+ !Thread.current[CACHE_SWITCH_NAME]
16
+ end
17
+
18
+ # Set a thread-local boolean to turn cacheing on and off. Truthy values turn the cache on, falsey values turn the cache off.
19
+ def set_cache(value)
20
+ Thread.current[CACHE_SWITCH_NAME] = !value
21
+ end
22
+
23
+ # Get the css selector in +string+ from the cache
24
+ def [](string)
25
+ return unless cache_on?
26
+ @mutex.synchronize { @cache[string] }
27
+ end
28
+
29
+ # Set the css selector in +string+ in the cache to +value+
30
+ def []=(string, value)
31
+ return value unless cache_on?
32
+ @mutex.synchronize { @cache[string] = value }
33
+ end
34
+
35
+ # Clear the cache
36
+ def clear_cache(create_new_object = false)
37
+ @mutex.synchronize do
38
+ if create_new_object
39
+ @cache = {}
40
+ else
41
+ @cache.clear
42
+ end
43
+ end
44
+ end
45
+
46
+ # Execute +block+ without cache
47
+ def without_cache(&block)
48
+ original_cache_setting = cache_on?
49
+ set_cache false
50
+ block.call
51
+ ensure
52
+ set_cache original_cache_setting
53
+ end
54
+ end
55
+
56
+ # Create a new CSS parser with respect to +namespaces+
57
+ def initialize(namespaces = {})
58
+ @tokenizer = Tokenizer.new
59
+ @namespaces = namespaces
60
+ super()
61
+ end
62
+
63
+ def parse(string)
64
+ @tokenizer.scan_setup string
65
+ do_parse
66
+ end
67
+
68
+ def next_token
69
+ @tokenizer.next_token
70
+ end
71
+
72
+ # Get the xpath for +string+ using +options+
73
+ def xpath_for(string, options = {})
74
+ key = "#{string}#{options[:ns]}#{options[:prefix]}"
75
+ v = self.class[key]
76
+ return v if v
77
+
78
+ args = [
79
+ options[:prefix] || "//",
80
+ options[:visitor] || XPathVisitor.new,
81
+ ]
82
+ self.class[key] = parse(string).map { |ast|
83
+ ast.to_xpath(*args)
84
+ }
85
+ end
86
+
87
+ # On CSS parser error, raise an exception
88
+ def on_error(error_token_id, error_value, value_stack)
89
+ after = value_stack.compact.last
90
+ raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+ require 'nokogiri/syntax_error'
3
+ module Nokogiri
4
+ module CSS
5
+ class SyntaxError < ::Nokogiri::SyntaxError
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,154 @@
1
+ # frozen_string_literal: true
2
+ #--
3
+ # DO NOT MODIFY!!!!
4
+ # This file is automatically generated by rex 1.0.7
5
+ # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
6
+ #++
7
+
8
+ module Nokogiri
9
+ module CSS
10
+ class Tokenizer # :nodoc:
11
+ require 'strscan'
12
+
13
+ class ScanError < StandardError ; end
14
+
15
+ attr_reader :lineno
16
+ attr_reader :filename
17
+ attr_accessor :state
18
+
19
+ def scan_setup(str)
20
+ @ss = StringScanner.new(str)
21
+ @lineno = 1
22
+ @state = nil
23
+ end
24
+
25
+ def action
26
+ yield
27
+ end
28
+
29
+ def scan_str(str)
30
+ scan_setup(str)
31
+ do_parse
32
+ end
33
+ alias :scan :scan_str
34
+
35
+ def load_file( filename )
36
+ @filename = filename
37
+ File.open(filename, "r") do |f|
38
+ scan_setup(f.read)
39
+ end
40
+ end
41
+
42
+ def scan_file( filename )
43
+ load_file(filename)
44
+ do_parse
45
+ end
46
+
47
+
48
+ def next_token
49
+ return if @ss.eos?
50
+
51
+ # skips empty actions
52
+ until token = _next_token or @ss.eos?; end
53
+ token
54
+ end
55
+
56
+ def _next_token
57
+ text = @ss.peek(1)
58
+ @lineno += 1 if text == "\n"
59
+ token = case @state
60
+ when nil
61
+ case
62
+ when (text = @ss.scan(/has\([\s]*/))
63
+ action { [:HAS, text] }
64
+
65
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
66
+ action { [:FUNCTION, text] }
67
+
68
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
69
+ action { [:IDENT, text] }
70
+
71
+ when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
72
+ action { [:HASH, text] }
73
+
74
+ when (text = @ss.scan(/[\s]*~=[\s]*/))
75
+ action { [:INCLUDES, text] }
76
+
77
+ when (text = @ss.scan(/[\s]*\|=[\s]*/))
78
+ action { [:DASHMATCH, text] }
79
+
80
+ when (text = @ss.scan(/[\s]*\^=[\s]*/))
81
+ action { [:PREFIXMATCH, text] }
82
+
83
+ when (text = @ss.scan(/[\s]*\$=[\s]*/))
84
+ action { [:SUFFIXMATCH, text] }
85
+
86
+ when (text = @ss.scan(/[\s]*\*=[\s]*/))
87
+ action { [:SUBSTRINGMATCH, text] }
88
+
89
+ when (text = @ss.scan(/[\s]*!=[\s]*/))
90
+ action { [:NOT_EQUAL, text] }
91
+
92
+ when (text = @ss.scan(/[\s]*=[\s]*/))
93
+ action { [:EQUAL, text] }
94
+
95
+ when (text = @ss.scan(/[\s]*\)/))
96
+ action { [:RPAREN, text] }
97
+
98
+ when (text = @ss.scan(/\[[\s]*/))
99
+ action { [:LSQUARE, text] }
100
+
101
+ when (text = @ss.scan(/[\s]*\]/))
102
+ action { [:RSQUARE, text] }
103
+
104
+ when (text = @ss.scan(/[\s]*\+[\s]*/))
105
+ action { [:PLUS, text] }
106
+
107
+ when (text = @ss.scan(/[\s]*>[\s]*/))
108
+ action { [:GREATER, text] }
109
+
110
+ when (text = @ss.scan(/[\s]*,[\s]*/))
111
+ action { [:COMMA, text] }
112
+
113
+ when (text = @ss.scan(/[\s]*~[\s]*/))
114
+ action { [:TILDE, text] }
115
+
116
+ when (text = @ss.scan(/\:not\([\s]*/))
117
+ action { [:NOT, text] }
118
+
119
+ when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
120
+ action { [:NUMBER, text] }
121
+
122
+ when (text = @ss.scan(/[\s]*\/\/[\s]*/))
123
+ action { [:DOUBLESLASH, text] }
124
+
125
+ when (text = @ss.scan(/[\s]*\/[\s]*/))
126
+ action { [:SLASH, text] }
127
+
128
+ when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
129
+ action {[:UNICODE_RANGE, text] }
130
+
131
+ when (text = @ss.scan(/[\s]+/))
132
+ action { [:S, text] }
133
+
134
+ when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
135
+ action { [:STRING, text] }
136
+
137
+ when (text = @ss.scan(/./))
138
+ action { [text, text] }
139
+
140
+
141
+ else
142
+ text = @ss.string[@ss.pos .. -1]
143
+ raise ScanError, "can not match: '" + text + "'"
144
+ end # if
145
+
146
+ else
147
+ raise ScanError, "undefined state: '" + state.to_s + "'"
148
+ end # case state
149
+ token
150
+ end # def _next_token
151
+
152
+ end # class
153
+ end
154
+ end
@@ -0,0 +1,55 @@
1
+ module Nokogiri
2
+ module CSS
3
+ class Tokenizer # :nodoc:
4
+
5
+ macro
6
+ nl \n|\r\n|\r|\f
7
+ w [\s]*
8
+ nonascii [^\0-\177]
9
+ num -?([0-9]+|[0-9]*\.[0-9]+)
10
+ unicode \\[0-9A-Fa-f]{1,6}(\r\n|[\s])?
11
+
12
+ escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
13
+ nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
14
+ nmstart [_A-Za-z]|{nonascii}|{escape}
15
+ ident [-@]?({nmstart})({nmchar})*
16
+ name ({nmchar})+
17
+ string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*"
18
+ string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*'
19
+ string {string1}|{string2}
20
+
21
+ rule
22
+
23
+ # [:state] pattern [actions]
24
+
25
+ has\({w} { [:HAS, text] }
26
+ {ident}\({w} { [:FUNCTION, text] }
27
+ {ident} { [:IDENT, text] }
28
+ \#{name} { [:HASH, text] }
29
+ {w}~={w} { [:INCLUDES, text] }
30
+ {w}\|={w} { [:DASHMATCH, text] }
31
+ {w}\^={w} { [:PREFIXMATCH, text] }
32
+ {w}\$={w} { [:SUFFIXMATCH, text] }
33
+ {w}\*={w} { [:SUBSTRINGMATCH, text] }
34
+ {w}!={w} { [:NOT_EQUAL, text] }
35
+ {w}={w} { [:EQUAL, text] }
36
+ {w}\) { [:RPAREN, text] }
37
+ \[{w} { [:LSQUARE, text] }
38
+ {w}\] { [:RSQUARE, text] }
39
+ {w}\+{w} { [:PLUS, text] }
40
+ {w}>{w} { [:GREATER, text] }
41
+ {w},{w} { [:COMMA, text] }
42
+ {w}~{w} { [:TILDE, text] }
43
+ \:not\({w} { [:NOT, text] }
44
+ {num} { [:NUMBER, text] }
45
+ {w}\/\/{w} { [:DOUBLESLASH, text] }
46
+ {w}\/{w} { [:SLASH, text] }
47
+
48
+ U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
49
+
50
+ [\s]+ { [:S, text] }
51
+ {string} { [:STRING, text] }
52
+ . { [text, text] }
53
+ end
54
+ end
55
+ end