nokogiri 1.3.0-x86-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/.autotest +27 -0
- data/CHANGELOG.ja.rdoc +233 -0
- data/CHANGELOG.rdoc +222 -0
- data/Manifest.txt +247 -0
- data/README.ja.rdoc +103 -0
- data/README.rdoc +117 -0
- data/Rakefile +205 -0
- data/bin/nokogiri +47 -0
- data/ext/nokogiri/extconf.rb +89 -0
- data/ext/nokogiri/html_document.c +183 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +272 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +30 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser.c +57 -0
- data/ext/nokogiri/html_sax_parser.h +11 -0
- data/ext/nokogiri/iconv.dll +0 -0
- data/ext/nokogiri/libexslt.dll +0 -0
- data/ext/nokogiri/libxml2.dll +0 -0
- data/ext/nokogiri/libxslt.dll +0 -0
- data/ext/nokogiri/nokogiri.c +81 -0
- data/ext/nokogiri/nokogiri.h +149 -0
- data/ext/nokogiri/xml_attr.c +92 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_cdata.c +53 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +51 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +308 -0
- data/ext/nokogiri/xml_document.h +21 -0
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +102 -0
- data/ext/nokogiri/xml_dtd.h +8 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +24 -0
- data/ext/nokogiri/xml_io.h +10 -0
- data/ext/nokogiri/xml_namespace.c +69 -0
- data/ext/nokogiri/xml_namespace.h +12 -0
- data/ext/nokogiri/xml_node.c +928 -0
- data/ext/nokogiri/xml_node.h +14 -0
- data/ext/nokogiri/xml_node_set.c +386 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +572 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +106 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +336 -0
- data/ext/nokogiri/xml_sax_parser.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +86 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +107 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +203 -0
- data/ext/nokogiri/xml_syntax_error.h +12 -0
- data/ext/nokogiri/xml_text.c +47 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +53 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +252 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +131 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/ext/nokogiri/zlib1.dll +0 -0
- data/lib/action-nokogiri.rb +36 -0
- data/lib/nokogiri.rb +110 -0
- data/lib/nokogiri/1.8/nokogiri.so +0 -0
- data/lib/nokogiri/1.9/nokogiri.so +0 -0
- data/lib/nokogiri/css.rb +25 -0
- data/lib/nokogiri/css/generated_parser.rb +748 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +144 -0
- data/lib/nokogiri/css/node.rb +107 -0
- data/lib/nokogiri/css/parser.rb +82 -0
- data/lib/nokogiri/css/parser.y +227 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +11 -0
- data/lib/nokogiri/css/tokenizer.rex +54 -0
- data/lib/nokogiri/css/xpath_visitor.rb +172 -0
- data/lib/nokogiri/decorators.rb +2 -0
- data/lib/nokogiri/decorators/hpricot.rb +3 -0
- data/lib/nokogiri/decorators/hpricot/node.rb +56 -0
- data/lib/nokogiri/decorators/hpricot/node_set.rb +54 -0
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +30 -0
- data/lib/nokogiri/decorators/slop.rb +33 -0
- data/lib/nokogiri/ffi/html/document.rb +37 -0
- data/lib/nokogiri/ffi/html/element_description.rb +85 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
- data/lib/nokogiri/ffi/libxml.rb +314 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +107 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
- data/lib/nokogiri/ffi/xml/node.rb +380 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +217 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
- data/lib/nokogiri/ffi/xml/schema.rb +55 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
- data/lib/nokogiri/hpricot.rb +62 -0
- data/lib/nokogiri/html.rb +34 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +71 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +47 -0
- data/lib/nokogiri/nokogiri.rb +1 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +29 -0
- data/lib/nokogiri/version_warning.rb +11 -0
- data/lib/nokogiri/xml.rb +62 -0
- data/lib/nokogiri/xml/attr.rb +9 -0
- data/lib/nokogiri/xml/builder.rb +254 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/document.rb +100 -0
- data/lib/nokogiri/xml/document_fragment.rb +49 -0
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/entity_declaration.rb +11 -0
- data/lib/nokogiri/xml/fragment_handler.rb +55 -0
- data/lib/nokogiri/xml/namespace.rb +7 -0
- data/lib/nokogiri/xml/node.rb +745 -0
- data/lib/nokogiri/xml/node/save_options.rb +42 -0
- data/lib/nokogiri/xml/node_set.rb +238 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +80 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +66 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +3 -0
- data/lib/nokogiri/xml/sax/document.rb +143 -0
- data/lib/nokogiri/xml/sax/parser.rb +101 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +65 -0
- data/lib/nokogiri/xml/syntax_error.rb +34 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +48 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +64 -0
- data/tasks/test.rb +161 -0
- data/test/css/test_nthiness.rb +160 -0
- data/test/css/test_parser.rb +277 -0
- data/test/css/test_tokenizer.rb +176 -0
- data/test/css/test_xpath_visitor.rb +76 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/helper.rb +123 -0
- data/test/hpricot/files/basic.xhtml +17 -0
- data/test/hpricot/files/boingboing.html +2266 -0
- data/test/hpricot/files/cy0.html +3653 -0
- data/test/hpricot/files/immob.html +400 -0
- data/test/hpricot/files/pace_application.html +1320 -0
- data/test/hpricot/files/tenderlove.html +16 -0
- data/test/hpricot/files/uswebgen.html +220 -0
- data/test/hpricot/files/utf8.html +1054 -0
- data/test/hpricot/files/week9.html +1723 -0
- data/test/hpricot/files/why.xml +19 -0
- data/test/hpricot/load_files.rb +11 -0
- data/test/hpricot/test_alter.rb +68 -0
- data/test/hpricot/test_builder.rb +20 -0
- data/test/hpricot/test_parser.rb +426 -0
- data/test/hpricot/test_paths.rb +15 -0
- data/test/hpricot/test_preserved.rb +77 -0
- data/test/hpricot/test_xml.rb +30 -0
- data/test/html/sax/test_parser.rb +52 -0
- data/test/html/test_builder.rb +156 -0
- data/test/html/test_document.rb +361 -0
- data/test/html/test_document_encoding.rb +46 -0
- data/test/html/test_document_fragment.rb +97 -0
- data/test/html/test_element_description.rb +95 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +165 -0
- data/test/test_convert_xpath.rb +186 -0
- data/test/test_css_cache.rb +56 -0
- data/test/test_gc.rb +15 -0
- data/test/test_memory_leak.rb +77 -0
- data/test/test_nokogiri.rb +127 -0
- data/test/test_reader.rb +316 -0
- data/test/test_xslt_transforms.rb +131 -0
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +169 -0
- data/test/xml/sax/test_push_parser.rb +92 -0
- data/test/xml/test_attr.rb +38 -0
- data/test/xml/test_builder.rb +73 -0
- data/test/xml/test_cdata.rb +38 -0
- data/test/xml/test_comment.rb +23 -0
- data/test/xml/test_document.rb +397 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +76 -0
- data/test/xml/test_dtd.rb +42 -0
- data/test/xml/test_dtd_encoding.rb +31 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +43 -0
- data/test/xml/test_node.rb +808 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +84 -0
- data/test/xml/test_node_set.rb +368 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +65 -0
- data/test/xml/test_text.rb +18 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +106 -0
- metadata +409 -0
@@ -0,0 +1,144 @@
|
|
1
|
+
#
|
2
|
+
# DO NOT MODIFY!!!!
|
3
|
+
# This file is automatically generated by rex 1.0.1
|
4
|
+
# from lexical definition file "lib/nokogiri/css/tokenizer.rex".
|
5
|
+
#
|
6
|
+
|
7
|
+
module Nokogiri
|
8
|
+
module CSS
|
9
|
+
class GeneratedTokenizer < GeneratedParser
|
10
|
+
require 'strscan'
|
11
|
+
|
12
|
+
class ScanError < StandardError ; end
|
13
|
+
|
14
|
+
attr_reader :lineno
|
15
|
+
attr_reader :filename
|
16
|
+
|
17
|
+
def scan_setup ; end
|
18
|
+
|
19
|
+
def action &block
|
20
|
+
yield
|
21
|
+
end
|
22
|
+
|
23
|
+
def scan_str( str )
|
24
|
+
scan_evaluate str
|
25
|
+
do_parse
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_file( filename )
|
29
|
+
@filename = filename
|
30
|
+
open(filename, "r") do |f|
|
31
|
+
scan_evaluate f.read
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def scan_file( filename )
|
36
|
+
load_file filename
|
37
|
+
do_parse
|
38
|
+
end
|
39
|
+
|
40
|
+
def next_token
|
41
|
+
@rex_tokens.shift
|
42
|
+
end
|
43
|
+
|
44
|
+
def scan_evaluate( str )
|
45
|
+
scan_setup
|
46
|
+
@rex_tokens = []
|
47
|
+
@lineno = 1
|
48
|
+
ss = StringScanner.new(str)
|
49
|
+
state = nil
|
50
|
+
until ss.eos?
|
51
|
+
text = ss.peek(1)
|
52
|
+
@lineno += 1 if text == "\n"
|
53
|
+
case state
|
54
|
+
when nil
|
55
|
+
case
|
56
|
+
when (text = ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*\(\s*/))
|
57
|
+
@rex_tokens.push action { [:FUNCTION, text] }
|
58
|
+
|
59
|
+
when (text = ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*/))
|
60
|
+
@rex_tokens.push action { [:IDENT, text] }
|
61
|
+
|
62
|
+
when (text = ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])+/))
|
63
|
+
@rex_tokens.push action { [:HASH, text] }
|
64
|
+
|
65
|
+
when (text = ss.scan(/[\s\r\n\f]*~=[\s\r\n\f]*/))
|
66
|
+
@rex_tokens.push action { [:INCLUDES, text] }
|
67
|
+
|
68
|
+
when (text = ss.scan(/[\s\r\n\f]*\|=[\s\r\n\f]*/))
|
69
|
+
@rex_tokens.push action { [:DASHMATCH, text] }
|
70
|
+
|
71
|
+
when (text = ss.scan(/[\s\r\n\f]*\^=[\s\r\n\f]*/))
|
72
|
+
@rex_tokens.push action { [:PREFIXMATCH, text] }
|
73
|
+
|
74
|
+
when (text = ss.scan(/[\s\r\n\f]*\$=[\s\r\n\f]*/))
|
75
|
+
@rex_tokens.push action { [:SUFFIXMATCH, text] }
|
76
|
+
|
77
|
+
when (text = ss.scan(/[\s\r\n\f]*\*=[\s\r\n\f]*/))
|
78
|
+
@rex_tokens.push action { [:SUBSTRINGMATCH, text] }
|
79
|
+
|
80
|
+
when (text = ss.scan(/[\s\r\n\f]*!=[\s\r\n\f]*/))
|
81
|
+
@rex_tokens.push action { [:NOT_EQUAL, text] }
|
82
|
+
|
83
|
+
when (text = ss.scan(/[\s\r\n\f]*=[\s\r\n\f]*/))
|
84
|
+
@rex_tokens.push action { [:EQUAL, text] }
|
85
|
+
|
86
|
+
when (text = ss.scan(/[\s\r\n\f]*\)/))
|
87
|
+
@rex_tokens.push action { [:RPAREN, text] }
|
88
|
+
|
89
|
+
when (text = ss.scan(/[\s\r\n\f]*\[[\s\r\n\f]*/))
|
90
|
+
@rex_tokens.push action { [:LSQUARE, text] }
|
91
|
+
|
92
|
+
when (text = ss.scan(/[\s\r\n\f]*\]/))
|
93
|
+
@rex_tokens.push action { [:RSQUARE, text] }
|
94
|
+
|
95
|
+
when (text = ss.scan(/[\s\r\n\f]*\+[\s\r\n\f]*/))
|
96
|
+
@rex_tokens.push action { [:PLUS, text] }
|
97
|
+
|
98
|
+
when (text = ss.scan(/[\s\r\n\f]*>[\s\r\n\f]*/))
|
99
|
+
@rex_tokens.push action { [:GREATER, text] }
|
100
|
+
|
101
|
+
when (text = ss.scan(/[\s\r\n\f]*,[\s\r\n\f]*/))
|
102
|
+
@rex_tokens.push action { [:COMMA, text] }
|
103
|
+
|
104
|
+
when (text = ss.scan(/[\s\r\n\f]*~[\s\r\n\f]*/))
|
105
|
+
@rex_tokens.push action { [:TILDE, text] }
|
106
|
+
|
107
|
+
when (text = ss.scan(/\:not\([\s\r\n\f]*/))
|
108
|
+
@rex_tokens.push action { [:NOT, text] }
|
109
|
+
|
110
|
+
when (text = ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
|
111
|
+
@rex_tokens.push action { [:NUMBER, text] }
|
112
|
+
|
113
|
+
when (text = ss.scan(/[\s\r\n\f]*\/\/[\s\r\n\f]*/))
|
114
|
+
@rex_tokens.push action { [:DOUBLESLASH, text] }
|
115
|
+
|
116
|
+
when (text = ss.scan(/[\s\r\n\f]*\/[\s\r\n\f]*/))
|
117
|
+
@rex_tokens.push action { [:SLASH, text] }
|
118
|
+
|
119
|
+
when (text = ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
|
120
|
+
@rex_tokens.push action {[:UNICODE_RANGE, text] }
|
121
|
+
|
122
|
+
when (text = ss.scan(/[\s\t\r\n\f]+/))
|
123
|
+
@rex_tokens.push action { [:S, text] }
|
124
|
+
|
125
|
+
when (text = ss.scan(/"([^\n\r\f"]|\\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*"|'([^\n\r\f']|\\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*'/))
|
126
|
+
@rex_tokens.push action { [:STRING, text] }
|
127
|
+
|
128
|
+
when (text = ss.scan(/./))
|
129
|
+
@rex_tokens.push action { [text, text] }
|
130
|
+
|
131
|
+
else
|
132
|
+
text = ss.string[ss.pos .. -1]
|
133
|
+
raise ScanError, "can not match: '" + text + "'"
|
134
|
+
end # if
|
135
|
+
|
136
|
+
else
|
137
|
+
raise ScanError, "undefined state: '" + state.to_s + "'"
|
138
|
+
end # case state
|
139
|
+
end # until ss
|
140
|
+
end # def scan_evaluate
|
141
|
+
|
142
|
+
end # class
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module CSS
|
3
|
+
class Node
|
4
|
+
# Get the type of this node
|
5
|
+
attr_accessor :type
|
6
|
+
# Get the value of this node
|
7
|
+
attr_accessor :value
|
8
|
+
|
9
|
+
# Create a new Node with +type+ and +value+
|
10
|
+
def initialize type, value
|
11
|
+
@type = type
|
12
|
+
@value = value
|
13
|
+
end
|
14
|
+
|
15
|
+
# Accept +visitor+
|
16
|
+
def accept visitor
|
17
|
+
visitor.send(:"visit_#{type.to_s.downcase}", self)
|
18
|
+
end
|
19
|
+
|
20
|
+
###
|
21
|
+
# Convert this CSS node to xpath with +prefix+ using +visitor+
|
22
|
+
def to_xpath prefix = '//', visitor = XPathVisitor.new
|
23
|
+
self.preprocess!
|
24
|
+
prefix + visitor.accept(self)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Preprocess this node tree
|
28
|
+
def preprocess!
|
29
|
+
### Deal with nth-child
|
30
|
+
matches = find_by_type(
|
31
|
+
[:CONDITIONAL_SELECTOR,
|
32
|
+
[:ELEMENT_NAME],
|
33
|
+
[:PSEUDO_CLASS,
|
34
|
+
[:FUNCTION]
|
35
|
+
]
|
36
|
+
]
|
37
|
+
)
|
38
|
+
matches.each do |match|
|
39
|
+
if match.value[1].value[0].value[0] =~ /^nth-child/
|
40
|
+
tag_name = match.value[0].value.first
|
41
|
+
match.value[0].value = ['*']
|
42
|
+
match.value[1] = Node.new(:COMBINATOR, [
|
43
|
+
match.value[1].value[0],
|
44
|
+
Node.new(:FUNCTION, ['self(', tag_name])
|
45
|
+
])
|
46
|
+
end
|
47
|
+
if match.value[1].value[0].value[0] =~ /^nth-last-child/
|
48
|
+
tag_name = match.value[0].value.first
|
49
|
+
match.value[0].value = ['*']
|
50
|
+
match.value[1] = Node.new(:COMBINATOR, [
|
51
|
+
match.value[1].value[0],
|
52
|
+
Node.new(:FUNCTION, ['self(', tag_name])
|
53
|
+
])
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
### Deal with first-child, last-child
|
58
|
+
matches = find_by_type(
|
59
|
+
[:CONDITIONAL_SELECTOR,
|
60
|
+
[:ELEMENT_NAME], [:PSEUDO_CLASS]
|
61
|
+
])
|
62
|
+
matches.each do |match|
|
63
|
+
if ['first-child', 'last-child'].include?(match.value[1].value.first)
|
64
|
+
which = match.value[1].value.first.gsub(/-\w*$/, '')
|
65
|
+
tag_name = match.value[0].value.first
|
66
|
+
match.value[0].value = ['*']
|
67
|
+
match.value[1] = Node.new(:COMBINATOR, [
|
68
|
+
Node.new(:FUNCTION, ["#{which}("]),
|
69
|
+
Node.new(:FUNCTION, ['self(', tag_name])
|
70
|
+
])
|
71
|
+
elsif 'only-child' == match.value[1].value.first
|
72
|
+
tag_name = match.value[0].value.first
|
73
|
+
match.value[0].value = ['*']
|
74
|
+
match.value[1] = Node.new(:COMBINATOR, [
|
75
|
+
Node.new(:FUNCTION, ["#{match.value[1].value.first}("]),
|
76
|
+
Node.new(:FUNCTION, ['self(', tag_name])
|
77
|
+
])
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
self
|
82
|
+
end
|
83
|
+
|
84
|
+
# Find a node by type using +types+
|
85
|
+
def find_by_type types
|
86
|
+
matches = []
|
87
|
+
matches << self if to_type == types
|
88
|
+
@value.each do |v|
|
89
|
+
matches += v.find_by_type(types) if v.respond_to?(:find_by_type)
|
90
|
+
end
|
91
|
+
matches
|
92
|
+
end
|
93
|
+
|
94
|
+
# Convert to_type
|
95
|
+
def to_type
|
96
|
+
[@type] + @value.map { |n|
|
97
|
+
n.to_type if n.respond_to?(:to_type)
|
98
|
+
}.compact
|
99
|
+
end
|
100
|
+
|
101
|
+
# Convert to array
|
102
|
+
def to_a
|
103
|
+
[@type] + @value.map { |n| n.respond_to?(:to_a) ? n.to_a : [n] }
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'thread'
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module CSS
|
5
|
+
class Parser < GeneratedTokenizer
|
6
|
+
@cache_on = true
|
7
|
+
@cache = {}
|
8
|
+
@mutex = Mutex.new
|
9
|
+
|
10
|
+
class << self
|
11
|
+
# Turn on CSS parse caching
|
12
|
+
attr_accessor :cache_on
|
13
|
+
alias :cache_on? :cache_on
|
14
|
+
alias :set_cache :cache_on=
|
15
|
+
|
16
|
+
# Get the css selector in +string+ from the cache
|
17
|
+
def [] string
|
18
|
+
return unless @cache_on
|
19
|
+
@mutex.synchronize { @cache[string] }
|
20
|
+
end
|
21
|
+
|
22
|
+
# Set the css selector in +string+ in the cache to +value+
|
23
|
+
def []= string, value
|
24
|
+
return value unless @cache_on
|
25
|
+
@mutex.synchronize { @cache[string] = value }
|
26
|
+
end
|
27
|
+
|
28
|
+
# Clear the cache
|
29
|
+
def clear_cache
|
30
|
+
@mutex.synchronize { @cache = {} }
|
31
|
+
end
|
32
|
+
|
33
|
+
# Execute +block+ without cache
|
34
|
+
def without_cache &block
|
35
|
+
tmp = @cache_on
|
36
|
+
@cache_on = false
|
37
|
+
block.call
|
38
|
+
@cache_on = tmp
|
39
|
+
end
|
40
|
+
|
41
|
+
###
|
42
|
+
# Parse this CSS selector in +selector+. Returns an AST.
|
43
|
+
def parse selector
|
44
|
+
@warned ||= false
|
45
|
+
unless @warned
|
46
|
+
$stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse()')
|
47
|
+
@warned = true
|
48
|
+
end
|
49
|
+
new.parse selector
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Create a new CSS parser with respect to +namespaces+
|
54
|
+
def initialize namespaces = {}
|
55
|
+
@namespaces = namespaces
|
56
|
+
super()
|
57
|
+
end
|
58
|
+
alias :parse :scan_str
|
59
|
+
|
60
|
+
# Get the xpath for +string+ using +options+
|
61
|
+
def xpath_for string, options={}
|
62
|
+
key = string + options[:ns].to_s
|
63
|
+
v = self.class[key]
|
64
|
+
return v if v
|
65
|
+
|
66
|
+
args = [
|
67
|
+
options[:prefix] || '//',
|
68
|
+
options[:visitor] || XPathVisitor.new
|
69
|
+
]
|
70
|
+
self.class[key] = parse(string).map { |ast|
|
71
|
+
ast.to_xpath(*args)
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
# On CSS parser error, raise an exception
|
76
|
+
def on_error error_token_id, error_value, value_stack
|
77
|
+
after = value_stack.compact.last
|
78
|
+
raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,227 @@
|
|
1
|
+
class Nokogiri::CSS::GeneratedParser
|
2
|
+
|
3
|
+
token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
|
4
|
+
token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
|
5
|
+
token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE
|
6
|
+
|
7
|
+
rule
|
8
|
+
selector
|
9
|
+
: selector COMMA simple_selector_1toN {
|
10
|
+
result = [val.first, val.last].flatten
|
11
|
+
}
|
12
|
+
| simple_selector_1toN { result = val.flatten }
|
13
|
+
;
|
14
|
+
combinator
|
15
|
+
: PLUS { result = :DIRECT_ADJACENT_SELECTOR }
|
16
|
+
| GREATER { result = :CHILD_SELECTOR }
|
17
|
+
| TILDE { result = :PRECEDING_SELECTOR }
|
18
|
+
| S { result = :DESCENDANT_SELECTOR }
|
19
|
+
| DOUBLESLASH { result = :DESCENDANT_SELECTOR }
|
20
|
+
| SLASH { result = :CHILD_SELECTOR }
|
21
|
+
;
|
22
|
+
simple_selector
|
23
|
+
: element_name hcap_0toN {
|
24
|
+
result = if val[1].nil?
|
25
|
+
val.first
|
26
|
+
else
|
27
|
+
Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
|
28
|
+
end
|
29
|
+
}
|
30
|
+
| element_name hcap_1toN negation {
|
31
|
+
result = Node.new(:CONDITIONAL_SELECTOR,
|
32
|
+
[
|
33
|
+
val.first,
|
34
|
+
Node.new(:COMBINATOR, [val[1], val.last])
|
35
|
+
]
|
36
|
+
)
|
37
|
+
}
|
38
|
+
| element_name negation {
|
39
|
+
result = Node.new(:CONDITIONAL_SELECTOR, val)
|
40
|
+
}
|
41
|
+
| function
|
42
|
+
| function attrib {
|
43
|
+
result = Node.new(:CONDITIONAL_SELECTOR, val)
|
44
|
+
}
|
45
|
+
| hcap_1toN negation {
|
46
|
+
result = Node.new(:CONDITIONAL_SELECTOR,
|
47
|
+
[
|
48
|
+
Node.new(:ELEMENT_NAME, ['*']),
|
49
|
+
Node.new(:COMBINATOR, val)
|
50
|
+
]
|
51
|
+
)
|
52
|
+
}
|
53
|
+
| hcap_1toN {
|
54
|
+
result = Node.new(:CONDITIONAL_SELECTOR,
|
55
|
+
[Node.new(:ELEMENT_NAME, ['*']), val.first]
|
56
|
+
)
|
57
|
+
}
|
58
|
+
;
|
59
|
+
simple_selector_1toN
|
60
|
+
: simple_selector combinator simple_selector_1toN {
|
61
|
+
result = Node.new(val[1], [val.first, val.last])
|
62
|
+
}
|
63
|
+
| simple_selector
|
64
|
+
;
|
65
|
+
class
|
66
|
+
: '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
|
67
|
+
;
|
68
|
+
element_name
|
69
|
+
: namespace '|' IDENT {
|
70
|
+
result = Node.new(:ELEMENT_NAME,
|
71
|
+
[[val.first, val.last].compact.join(':')]
|
72
|
+
)
|
73
|
+
}
|
74
|
+
| IDENT {
|
75
|
+
name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first
|
76
|
+
result = Node.new(:ELEMENT_NAME, [name])
|
77
|
+
}
|
78
|
+
| '*' { result = Node.new(:ELEMENT_NAME, val) }
|
79
|
+
;
|
80
|
+
namespace
|
81
|
+
: IDENT { result = val[0] }
|
82
|
+
|
|
83
|
+
;
|
84
|
+
attrib
|
85
|
+
: LSQUARE IDENT attrib_val_0or1 RSQUARE {
|
86
|
+
result = Node.new(:ATTRIBUTE_CONDITION,
|
87
|
+
[Node.new(:ELEMENT_NAME, [val[1]])] + (val[2] || [])
|
88
|
+
)
|
89
|
+
}
|
90
|
+
| LSQUARE function attrib_val_0or1 RSQUARE {
|
91
|
+
result = Node.new(:ATTRIBUTE_CONDITION,
|
92
|
+
[val[1]] + (val[2] || [])
|
93
|
+
)
|
94
|
+
}
|
95
|
+
| LSQUARE NUMBER RSQUARE {
|
96
|
+
# Non standard, but hpricot supports it.
|
97
|
+
result = Node.new(:PSEUDO_CLASS,
|
98
|
+
[Node.new(:FUNCTION, ['nth-child(', val[1]])]
|
99
|
+
)
|
100
|
+
}
|
101
|
+
;
|
102
|
+
function
|
103
|
+
: FUNCTION RPAREN {
|
104
|
+
result = Node.new(:FUNCTION, [val.first.strip])
|
105
|
+
}
|
106
|
+
| FUNCTION expr RPAREN {
|
107
|
+
result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
|
108
|
+
}
|
109
|
+
| FUNCTION an_plus_b RPAREN {
|
110
|
+
result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
|
111
|
+
}
|
112
|
+
| NOT expr RPAREN {
|
113
|
+
result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
|
114
|
+
}
|
115
|
+
;
|
116
|
+
expr
|
117
|
+
: NUMBER COMMA expr { result = [val.first, val.last] }
|
118
|
+
| STRING COMMA expr { result = [val.first, val.last] }
|
119
|
+
| IDENT COMMA expr { result = [val.first, val.last] }
|
120
|
+
| NUMBER
|
121
|
+
| STRING
|
122
|
+
| IDENT # even, odd
|
123
|
+
{
|
124
|
+
if val[0] == 'even'
|
125
|
+
val = ["2","n","+","0"]
|
126
|
+
result = Node.new(:AN_PLUS_B, val)
|
127
|
+
elsif val[0] == 'odd'
|
128
|
+
val = ["2","n","+","1"]
|
129
|
+
result = Node.new(:AN_PLUS_B, val)
|
130
|
+
else
|
131
|
+
# This is not CSS standard. It allows us to support this:
|
132
|
+
# assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
|
133
|
+
# assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
|
134
|
+
# assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
|
135
|
+
result = val
|
136
|
+
end
|
137
|
+
}
|
138
|
+
;
|
139
|
+
an_plus_b
|
140
|
+
: NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
|
141
|
+
{
|
142
|
+
if val[1] == 'n'
|
143
|
+
result = Node.new(:AN_PLUS_B, val)
|
144
|
+
else
|
145
|
+
raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
|
146
|
+
end
|
147
|
+
}
|
148
|
+
| IDENT PLUS NUMBER { # n+3, -n+3
|
149
|
+
if val[0] == 'n'
|
150
|
+
val.unshift("1")
|
151
|
+
result = Node.new(:AN_PLUS_B, val)
|
152
|
+
elsif val[0] == '-n'
|
153
|
+
val[0] = 'n'
|
154
|
+
val.unshift("-1")
|
155
|
+
result = Node.new(:AN_PLUS_B, val)
|
156
|
+
else
|
157
|
+
raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
|
158
|
+
end
|
159
|
+
}
|
160
|
+
| NUMBER IDENT # 5n, -5n
|
161
|
+
{
|
162
|
+
if val[1] == 'n'
|
163
|
+
val << "+"
|
164
|
+
val << "0"
|
165
|
+
result = Node.new(:AN_PLUS_B, val)
|
166
|
+
else
|
167
|
+
raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
|
168
|
+
end
|
169
|
+
}
|
170
|
+
;
|
171
|
+
pseudo
|
172
|
+
: ':' function {
|
173
|
+
result = Node.new(:PSEUDO_CLASS, [val[1]])
|
174
|
+
}
|
175
|
+
| ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
|
176
|
+
;
|
177
|
+
hcap_0toN
|
178
|
+
: hcap_1toN
|
179
|
+
|
|
180
|
+
;
|
181
|
+
hcap_1toN
|
182
|
+
: attribute_id hcap_1toN {
|
183
|
+
result = Node.new(:COMBINATOR, val)
|
184
|
+
}
|
185
|
+
| class hcap_1toN {
|
186
|
+
result = Node.new(:COMBINATOR, val)
|
187
|
+
}
|
188
|
+
| attrib hcap_1toN {
|
189
|
+
result = Node.new(:COMBINATOR, val)
|
190
|
+
}
|
191
|
+
| pseudo hcap_1toN {
|
192
|
+
result = Node.new(:COMBINATOR, val)
|
193
|
+
}
|
194
|
+
| attribute_id
|
195
|
+
| class
|
196
|
+
| attrib
|
197
|
+
| pseudo
|
198
|
+
;
|
199
|
+
attribute_id
|
200
|
+
: HASH { result = Node.new(:ID, val) }
|
201
|
+
;
|
202
|
+
attrib_val_0or1
|
203
|
+
: eql_incl_dash IDENT { result = [val.first, val[1]] }
|
204
|
+
| eql_incl_dash STRING { result = [val.first, val[1]] }
|
205
|
+
|
|
206
|
+
;
|
207
|
+
eql_incl_dash
|
208
|
+
: EQUAL { result = :equal }
|
209
|
+
| PREFIXMATCH { result = :prefix_match }
|
210
|
+
| SUFFIXMATCH { result = :suffix_match }
|
211
|
+
| SUBSTRINGMATCH { result = :substring_match }
|
212
|
+
| NOT_EQUAL { result = :not_equal }
|
213
|
+
| INCLUDES { result = :includes }
|
214
|
+
| DASHMATCH { result = :dash_match }
|
215
|
+
;
|
216
|
+
negation
|
217
|
+
: NOT negation_arg RPAREN {
|
218
|
+
result = Node.new(:NOT, [val[1]])
|
219
|
+
}
|
220
|
+
;
|
221
|
+
negation_arg
|
222
|
+
: hcap_1toN
|
223
|
+
;
|
224
|
+
end
|
225
|
+
|
226
|
+
---- header
|
227
|
+
|