actionview 4.1.0.beta1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of actionview might be problematic. Click here for more details.

Files changed (106) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +274 -0
  3. data/MIT-LICENSE +21 -0
  4. data/README.rdoc +34 -0
  5. data/lib/action_view.rb +97 -0
  6. data/lib/action_view/base.rb +205 -0
  7. data/lib/action_view/buffers.rb +49 -0
  8. data/lib/action_view/context.rb +36 -0
  9. data/lib/action_view/dependency_tracker.rb +93 -0
  10. data/lib/action_view/digestor.rb +116 -0
  11. data/lib/action_view/flows.rb +76 -0
  12. data/lib/action_view/helpers.rb +64 -0
  13. data/lib/action_view/helpers/active_model_helper.rb +49 -0
  14. data/lib/action_view/helpers/asset_tag_helper.rb +322 -0
  15. data/lib/action_view/helpers/asset_url_helper.rb +355 -0
  16. data/lib/action_view/helpers/atom_feed_helper.rb +203 -0
  17. data/lib/action_view/helpers/cache_helper.rb +200 -0
  18. data/lib/action_view/helpers/capture_helper.rb +216 -0
  19. data/lib/action_view/helpers/controller_helper.rb +25 -0
  20. data/lib/action_view/helpers/csrf_helper.rb +30 -0
  21. data/lib/action_view/helpers/date_helper.rb +1075 -0
  22. data/lib/action_view/helpers/debug_helper.rb +39 -0
  23. data/lib/action_view/helpers/form_helper.rb +1876 -0
  24. data/lib/action_view/helpers/form_options_helper.rb +843 -0
  25. data/lib/action_view/helpers/form_tag_helper.rb +746 -0
  26. data/lib/action_view/helpers/javascript_helper.rb +75 -0
  27. data/lib/action_view/helpers/number_helper.rb +425 -0
  28. data/lib/action_view/helpers/output_safety_helper.rb +38 -0
  29. data/lib/action_view/helpers/record_tag_helper.rb +108 -0
  30. data/lib/action_view/helpers/rendering_helper.rb +90 -0
  31. data/lib/action_view/helpers/sanitize_helper.rb +256 -0
  32. data/lib/action_view/helpers/tag_helper.rb +176 -0
  33. data/lib/action_view/helpers/tags.rb +41 -0
  34. data/lib/action_view/helpers/tags/base.rb +148 -0
  35. data/lib/action_view/helpers/tags/check_box.rb +64 -0
  36. data/lib/action_view/helpers/tags/checkable.rb +16 -0
  37. data/lib/action_view/helpers/tags/collection_check_boxes.rb +44 -0
  38. data/lib/action_view/helpers/tags/collection_helpers.rb +85 -0
  39. data/lib/action_view/helpers/tags/collection_radio_buttons.rb +36 -0
  40. data/lib/action_view/helpers/tags/collection_select.rb +28 -0
  41. data/lib/action_view/helpers/tags/color_field.rb +25 -0
  42. data/lib/action_view/helpers/tags/date_field.rb +13 -0
  43. data/lib/action_view/helpers/tags/date_select.rb +72 -0
  44. data/lib/action_view/helpers/tags/datetime_field.rb +22 -0
  45. data/lib/action_view/helpers/tags/datetime_local_field.rb +19 -0
  46. data/lib/action_view/helpers/tags/datetime_select.rb +8 -0
  47. data/lib/action_view/helpers/tags/email_field.rb +8 -0
  48. data/lib/action_view/helpers/tags/file_field.rb +8 -0
  49. data/lib/action_view/helpers/tags/grouped_collection_select.rb +29 -0
  50. data/lib/action_view/helpers/tags/hidden_field.rb +8 -0
  51. data/lib/action_view/helpers/tags/label.rb +65 -0
  52. data/lib/action_view/helpers/tags/month_field.rb +13 -0
  53. data/lib/action_view/helpers/tags/number_field.rb +18 -0
  54. data/lib/action_view/helpers/tags/password_field.rb +12 -0
  55. data/lib/action_view/helpers/tags/radio_button.rb +31 -0
  56. data/lib/action_view/helpers/tags/range_field.rb +8 -0
  57. data/lib/action_view/helpers/tags/search_field.rb +24 -0
  58. data/lib/action_view/helpers/tags/select.rb +41 -0
  59. data/lib/action_view/helpers/tags/tel_field.rb +8 -0
  60. data/lib/action_view/helpers/tags/text_area.rb +18 -0
  61. data/lib/action_view/helpers/tags/text_field.rb +29 -0
  62. data/lib/action_view/helpers/tags/time_field.rb +13 -0
  63. data/lib/action_view/helpers/tags/time_select.rb +8 -0
  64. data/lib/action_view/helpers/tags/time_zone_select.rb +20 -0
  65. data/lib/action_view/helpers/tags/url_field.rb +8 -0
  66. data/lib/action_view/helpers/tags/week_field.rb +13 -0
  67. data/lib/action_view/helpers/text_helper.rb +447 -0
  68. data/lib/action_view/helpers/translation_helper.rb +111 -0
  69. data/lib/action_view/helpers/url_helper.rb +625 -0
  70. data/lib/action_view/layouts.rb +426 -0
  71. data/lib/action_view/locale/en.yml +56 -0
  72. data/lib/action_view/log_subscriber.rb +44 -0
  73. data/lib/action_view/lookup_context.rb +249 -0
  74. data/lib/action_view/model_naming.rb +12 -0
  75. data/lib/action_view/path_set.rb +77 -0
  76. data/lib/action_view/railtie.rb +49 -0
  77. data/lib/action_view/record_identifier.rb +84 -0
  78. data/lib/action_view/renderer/abstract_renderer.rb +47 -0
  79. data/lib/action_view/renderer/partial_renderer.rb +492 -0
  80. data/lib/action_view/renderer/renderer.rb +50 -0
  81. data/lib/action_view/renderer/streaming_template_renderer.rb +103 -0
  82. data/lib/action_view/renderer/template_renderer.rb +96 -0
  83. data/lib/action_view/rendering.rb +145 -0
  84. data/lib/action_view/routing_url_for.rb +109 -0
  85. data/lib/action_view/tasks/dependencies.rake +17 -0
  86. data/lib/action_view/template.rb +340 -0
  87. data/lib/action_view/template/error.rb +141 -0
  88. data/lib/action_view/template/handlers.rb +53 -0
  89. data/lib/action_view/template/handlers/builder.rb +26 -0
  90. data/lib/action_view/template/handlers/erb.rb +145 -0
  91. data/lib/action_view/template/handlers/raw.rb +11 -0
  92. data/lib/action_view/template/resolver.rb +329 -0
  93. data/lib/action_view/template/text.rb +34 -0
  94. data/lib/action_view/template/types.rb +57 -0
  95. data/lib/action_view/test_case.rb +272 -0
  96. data/lib/action_view/testing/resolvers.rb +50 -0
  97. data/lib/action_view/vendor/html-scanner.rb +20 -0
  98. data/lib/action_view/vendor/html-scanner/html/document.rb +68 -0
  99. data/lib/action_view/vendor/html-scanner/html/node.rb +532 -0
  100. data/lib/action_view/vendor/html-scanner/html/sanitizer.rb +188 -0
  101. data/lib/action_view/vendor/html-scanner/html/selector.rb +830 -0
  102. data/lib/action_view/vendor/html-scanner/html/tokenizer.rb +107 -0
  103. data/lib/action_view/vendor/html-scanner/html/version.rb +11 -0
  104. data/lib/action_view/version.rb +11 -0
  105. data/lib/action_view/view_paths.rb +96 -0
  106. metadata +218 -0
@@ -0,0 +1,188 @@
1
+ require 'set'
2
+ require 'cgi'
3
+ require 'active_support/core_ext/module/attribute_accessors'
4
+
5
+ module HTML
6
+ class Sanitizer
7
+ def sanitize(text, options = {})
8
+ validate_options(options)
9
+ return text unless sanitizeable?(text)
10
+ tokenize(text, options).join
11
+ end
12
+
13
+ def sanitizeable?(text)
14
+ !(text.nil? || text.empty? || !text.index("<"))
15
+ end
16
+
17
+ protected
18
+ def tokenize(text, options)
19
+ tokenizer = HTML::Tokenizer.new(text)
20
+ result = []
21
+ while token = tokenizer.next
22
+ node = Node.parse(nil, 0, 0, token, false)
23
+ process_node node, result, options
24
+ end
25
+ result
26
+ end
27
+
28
+ def process_node(node, result, options)
29
+ result << node.to_s
30
+ end
31
+
32
+ def validate_options(options)
33
+ if options[:tags] && !options[:tags].is_a?(Enumerable)
34
+ raise ArgumentError, "You should pass :tags as an Enumerable"
35
+ end
36
+
37
+ if options[:attributes] && !options[:attributes].is_a?(Enumerable)
38
+ raise ArgumentError, "You should pass :attributes as an Enumerable"
39
+ end
40
+ end
41
+ end
42
+
43
+ class FullSanitizer < Sanitizer
44
+ def sanitize(text, options = {})
45
+ result = super
46
+ # strip any comments, and if they have a newline at the end (ie. line with
47
+ # only a comment) strip that too
48
+ result = result.gsub(/<!--(.*?)-->[\n]?/m, "") if (result && result =~ /<!--(.*?)-->[\n]?/m)
49
+ # Recurse - handle all dirty nested tags
50
+ result == text ? result : sanitize(result, options)
51
+ end
52
+
53
+ def process_node(node, result, options)
54
+ result << node.to_s if node.class == HTML::Text
55
+ end
56
+ end
57
+
58
+ class LinkSanitizer < FullSanitizer
59
+ cattr_accessor :included_tags, :instance_writer => false
60
+ self.included_tags = Set.new(%w(a href))
61
+
62
+ def sanitizeable?(text)
63
+ !(text.nil? || text.empty? || !((text.index("<a") || text.index("<href")) && text.index(">")))
64
+ end
65
+
66
+ protected
67
+ def process_node(node, result, options)
68
+ result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name)
69
+ end
70
+ end
71
+
72
+ class WhiteListSanitizer < Sanitizer
73
+ [:protocol_separator, :uri_attributes, :allowed_attributes, :allowed_tags, :allowed_protocols, :bad_tags,
74
+ :allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties].each do |attr|
75
+ class_attribute attr, :instance_writer => false
76
+ end
77
+
78
+ # A regular expression of the valid characters used to separate protocols like
79
+ # the ':' in 'http://foo.com'
80
+ self.protocol_separator = /:|(&#0*58)|(&#x70)|(&#x0*3a)|(%|&#37;)3A/i
81
+
82
+ # Specifies a Set of HTML attributes that can have URIs.
83
+ self.uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc))
84
+
85
+ # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed
86
+ # to just escaping harmless tags like &lt;font&gt;
87
+ self.bad_tags = Set.new(%w(script))
88
+
89
+ # Specifies the default Set of tags that the #sanitize helper will allow unscathed.
90
+ self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub
91
+ sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr
92
+ acronym a img blockquote del ins))
93
+
94
+ # Specifies the default Set of html attributes that the #sanitize helper will leave
95
+ # in the allowed tag.
96
+ self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr))
97
+
98
+ # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept.
99
+ self.allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto
100
+ feed svn urn aim rsync tag ssh sftp rtsp afs))
101
+
102
+ # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept.
103
+ self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse
104
+ border-color border-left-color border-right-color border-top-color clear color cursor direction display
105
+ elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height
106
+ overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation
107
+ speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space
108
+ width))
109
+
110
+ # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept.
111
+ self.allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center
112
+ collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal
113
+ nowrap olive pointer purple red right solid silver teal top transparent underline white yellow))
114
+
115
+ # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers.
116
+ self.shorthand_css_properties = Set.new(%w(background border margin padding))
117
+
118
+ # Sanitizes a block of css code. Used by #sanitize when it comes across a style attribute
119
+ def sanitize_css(style)
120
+ # disallow urls
121
+ style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
122
+
123
+ # gauntlet
124
+ if style !~ /\A([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*\z/ ||
125
+ style !~ /\A(\s*[-\w]+\s*:\s*[^:;]*(;|$)\s*)*\z/
126
+ return ''
127
+ end
128
+
129
+ clean = []
130
+ style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val|
131
+ if allowed_css_properties.include?(prop.downcase)
132
+ clean << prop + ': ' + val + ';'
133
+ elsif shorthand_css_properties.include?(prop.split('-')[0].downcase)
134
+ unless val.split().any? do |keyword|
135
+ !allowed_css_keywords.include?(keyword) &&
136
+ keyword !~ /\A(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
137
+ end
138
+ clean << prop + ': ' + val + ';'
139
+ end
140
+ end
141
+ end
142
+ clean.join(' ')
143
+ end
144
+
145
+ protected
146
+ def tokenize(text, options)
147
+ options[:parent] = []
148
+ options[:attributes] ||= allowed_attributes
149
+ options[:tags] ||= allowed_tags
150
+ super
151
+ end
152
+
153
+ def process_node(node, result, options)
154
+ result << case node
155
+ when HTML::Tag
156
+ if node.closing == :close
157
+ options[:parent].shift
158
+ else
159
+ options[:parent].unshift node.name
160
+ end
161
+
162
+ process_attributes_for node, options
163
+
164
+ options[:tags].include?(node.name) ? node : nil
165
+ else
166
+ bad_tags.include?(options[:parent].first) ? nil : node.to_s.gsub(/</, "&lt;")
167
+ end
168
+ end
169
+
170
+ def process_attributes_for(node, options)
171
+ return unless node.attributes
172
+ node.attributes.keys.each do |attr_name|
173
+ value = node.attributes[attr_name].to_s
174
+
175
+ if !options[:attributes].include?(attr_name) || contains_bad_protocols?(attr_name, value)
176
+ node.attributes.delete(attr_name)
177
+ else
178
+ node.attributes[attr_name] = attr_name == 'style' ? sanitize_css(value) : CGI::escapeHTML(CGI::unescapeHTML(value))
179
+ end
180
+ end
181
+ end
182
+
183
+ def contains_bad_protocols?(attr_name, value)
184
+ uri_attributes.include?(attr_name) &&
185
+ (value =~ /(^[^\/:]*):|(&#0*58)|(&#x70)|(&#x0*3a)|(%|&#37;)3A/i && !allowed_protocols.include?(value.split(protocol_separator).first.downcase.strip))
186
+ end
187
+ end
188
+ end
@@ -0,0 +1,830 @@
1
+ #--
2
+ # Copyright (c) 2006 Assaf Arkin (http://labnotes.org)
3
+ # Under MIT and/or CC By license.
4
+ #++
5
+
6
+ module HTML
7
+
8
+ # Selects HTML elements using CSS 2 selectors.
9
+ #
10
+ # The +Selector+ class uses CSS selector expressions to match and select
11
+ # HTML elements.
12
+ #
13
+ # For example:
14
+ # selector = HTML::Selector.new "form.login[action=/login]"
15
+ # creates a new selector that matches any +form+ element with the class
16
+ # +login+ and an attribute +action+ with the value <tt>/login</tt>.
17
+ #
18
+ # === Matching Elements
19
+ #
20
+ # Use the #match method to determine if an element matches the selector.
21
+ #
22
+ # For simple selectors, the method returns an array with that element,
23
+ # or +nil+ if the element does not match. For complex selectors (see below)
24
+ # the method returns an array with all matched elements, of +nil+ if no
25
+ # match found.
26
+ #
27
+ # For example:
28
+ # if selector.match(element)
29
+ # puts "Element is a login form"
30
+ # end
31
+ #
32
+ # === Selecting Elements
33
+ #
34
+ # Use the #select method to select all matching elements starting with
35
+ # one element and going through all children in depth-first order.
36
+ #
37
+ # This method returns an array of all matching elements, an empty array
38
+ # if no match is found
39
+ #
40
+ # For example:
41
+ # selector = HTML::Selector.new "input[type=text]"
42
+ # matches = selector.select(element)
43
+ # matches.each do |match|
44
+ # puts "Found text field with name #{match.attributes['name']}"
45
+ # end
46
+ #
47
+ # === Expressions
48
+ #
49
+ # Selectors can match elements using any of the following criteria:
50
+ # * <tt>name</tt> -- Match an element based on its name (tag name).
51
+ # For example, <tt>p</tt> to match a paragraph. You can use <tt>*</tt>
52
+ # to match any element.
53
+ # * <tt>#</tt><tt>id</tt> -- Match an element based on its identifier (the
54
+ # <tt>id</tt> attribute). For example, <tt>#</tt><tt>page</tt>.
55
+ # * <tt>.class</tt> -- Match an element based on its class name, all
56
+ # class names if more than one specified.
57
+ # * <tt>[attr]</tt> -- Match an element that has the specified attribute.
58
+ # * <tt>[attr=value]</tt> -- Match an element that has the specified
59
+ # attribute and value. (More operators are supported see below)
60
+ # * <tt>:pseudo-class</tt> -- Match an element based on a pseudo class,
61
+ # such as <tt>:nth-child</tt> and <tt>:empty</tt>.
62
+ # * <tt>:not(expr)</tt> -- Match an element that does not match the
63
+ # negation expression.
64
+ #
65
+ # When using a combination of the above, the element name comes first
66
+ # followed by identifier, class names, attributes, pseudo classes and
67
+ # negation in any order. Do not separate these parts with spaces!
68
+ # Space separation is used for descendant selectors.
69
+ #
70
+ # For example:
71
+ # selector = HTML::Selector.new "form.login[action=/login]"
72
+ # The matched element must be of type +form+ and have the class +login+.
73
+ # It may have other classes, but the class +login+ is required to match.
74
+ # It must also have an attribute called +action+ with the value
75
+ # <tt>/login</tt>.
76
+ #
77
+ # This selector will match the following element:
78
+ # <form class="login form" method="post" action="/login">
79
+ # but will not match the element:
80
+ # <form method="post" action="/logout">
81
+ #
82
+ # === Attribute Values
83
+ #
84
+ # Several operators are supported for matching attributes:
85
+ # * <tt>name</tt> -- The element must have an attribute with that name.
86
+ # * <tt>name=value</tt> -- The element must have an attribute with that
87
+ # name and value.
88
+ # * <tt>name^=value</tt> -- The attribute value must start with the
89
+ # specified value.
90
+ # * <tt>name$=value</tt> -- The attribute value must end with the
91
+ # specified value.
92
+ # * <tt>name*=value</tt> -- The attribute value must contain the
93
+ # specified value.
94
+ # * <tt>name~=word</tt> -- The attribute value must contain the specified
95
+ # word (space separated).
96
+ # * <tt>name|=word</tt> -- The attribute value must start with specified
97
+ # word.
98
+ #
99
+ # For example, the following two selectors match the same element:
100
+ # #my_id
101
+ # [id=my_id]
102
+ # and so do the following two selectors:
103
+ # .my_class
104
+ # [class~=my_class]
105
+ #
106
+ # === Alternatives, siblings, children
107
+ #
108
+ # Complex selectors use a combination of expressions to match elements:
109
+ # * <tt>expr1 expr2</tt> -- Match any element against the second expression
110
+ # if it has some parent element that matches the first expression.
111
+ # * <tt>expr1 > expr2</tt> -- Match any element against the second expression
112
+ # if it is the child of an element that matches the first expression.
113
+ # * <tt>expr1 + expr2</tt> -- Match any element against the second expression
114
+ # if it immediately follows an element that matches the first expression.
115
+ # * <tt>expr1 ~ expr2</tt> -- Match any element against the second expression
116
+ # that comes after an element that matches the first expression.
117
+ # * <tt>expr1, expr2</tt> -- Match any element against the first expression,
118
+ # or against the second expression.
119
+ #
120
+ # Since children and sibling selectors may match more than one element given
121
+ # the first element, the #match method may return more than one match.
122
+ #
123
+ # === Pseudo classes
124
+ #
125
+ # Pseudo classes were introduced in CSS 3. They are most often used to select
126
+ # elements in a given position:
127
+ # * <tt>:root</tt> -- Match the element only if it is the root element
128
+ # (no parent element).
129
+ # * <tt>:empty</tt> -- Match the element only if it has no child elements,
130
+ # and no text content.
131
+ # * <tt>:content(string)</tt> -- Match the element only if it has <tt>string</tt>
132
+ # as its text content (ignoring leading and trailing whitespace).
133
+ # * <tt>:only-child</tt> -- Match the element if it is the only child (element)
134
+ # of its parent element.
135
+ # * <tt>:only-of-type</tt> -- Match the element if it is the only child (element)
136
+ # of its parent element and its type.
137
+ # * <tt>:first-child</tt> -- Match the element if it is the first child (element)
138
+ # of its parent element.
139
+ # * <tt>:first-of-type</tt> -- Match the element if it is the first child (element)
140
+ # of its parent element of its type.
141
+ # * <tt>:last-child</tt> -- Match the element if it is the last child (element)
142
+ # of its parent element.
143
+ # * <tt>:last-of-type</tt> -- Match the element if it is the last child (element)
144
+ # of its parent element of its type.
145
+ # * <tt>:nth-child(b)</tt> -- Match the element if it is the b-th child (element)
146
+ # of its parent element. The value <tt>b</tt> specifies its index, starting with 1.
147
+ # * <tt>:nth-child(an+b)</tt> -- Match the element if it is the b-th child (element)
148
+ # in each group of <tt>a</tt> child elements of its parent element.
149
+ # * <tt>:nth-child(-an+b)</tt> -- Match the element if it is the first child (element)
150
+ # in each group of <tt>a</tt> child elements, up to the first <tt>b</tt> child
151
+ # elements of its parent element.
152
+ # * <tt>:nth-child(odd)</tt> -- Match element in the odd position (i.e. first, third).
153
+ # Same as <tt>:nth-child(2n+1)</tt>.
154
+ # * <tt>:nth-child(even)</tt> -- Match element in the even position (i.e. second,
155
+ # fourth). Same as <tt>:nth-child(2n+2)</tt>.
156
+ # * <tt>:nth-of-type(..)</tt> -- As above, but only counts elements of its type.
157
+ # * <tt>:nth-last-child(..)</tt> -- As above, but counts from the last child.
158
+ # * <tt>:nth-last-of-type(..)</tt> -- As above, but counts from the last child and
159
+ # only elements of its type.
160
+ # * <tt>:not(selector)</tt> -- Match the element only if the element does not
161
+ # match the simple selector.
162
+ #
163
+ # As you can see, <tt>:nth-child</tt> pseudo class and its variant can get quite
164
+ # tricky and the CSS specification doesn't do a much better job explaining it.
165
+ # But after reading the examples and trying a few combinations, it's easy to
166
+ # figure out.
167
+ #
168
+ # For example:
169
+ # table tr:nth-child(odd)
170
+ # Selects every second row in the table starting with the first one.
171
+ #
172
+ # div p:nth-child(4)
173
+ # Selects the fourth paragraph in the +div+, but not if the +div+ contains
174
+ # other elements, since those are also counted.
175
+ #
176
+ # div p:nth-of-type(4)
177
+ # Selects the fourth paragraph in the +div+, counting only paragraphs, and
178
+ # ignoring all other elements.
179
+ #
180
+ # div p:nth-of-type(-n+4)
181
+ # Selects the first four paragraphs, ignoring all others.
182
+ #
183
+ # And you can always select an element that matches one set of rules but
184
+ # not another using <tt>:not</tt>. For example:
185
+ # p:not(.post)
186
+ # Matches all paragraphs that do not have the class <tt>.post</tt>.
187
+ #
188
+ # === Substitution Values
189
+ #
190
+ # You can use substitution with identifiers, class names and element values.
191
+ # A substitution takes the form of a question mark (<tt>?</tt>) and uses the
192
+ # next value in the argument list following the CSS expression.
193
+ #
194
+ # The substitution value may be a string or a regular expression. All other
195
+ # values are converted to strings.
196
+ #
197
+ # For example:
198
+ # selector = HTML::Selector.new "#?", /^\d+$/
199
+ # matches any element whose identifier consists of one or more digits.
200
+ #
201
+ # See http://www.w3.org/TR/css3-selectors/
202
+ class Selector
203
+
204
+
205
+ # An invalid selector.
206
+ class InvalidSelectorError < StandardError #:nodoc:
207
+ end
208
+
209
+
210
+ class << self
211
+
212
+ # :call-seq:
213
+ # Selector.for_class(cls) => selector
214
+ #
215
+ # Creates a new selector for the given class name.
216
+ def for_class(cls)
217
+ self.new([".?", cls])
218
+ end
219
+
220
+
221
+ # :call-seq:
222
+ # Selector.for_id(id) => selector
223
+ #
224
+ # Creates a new selector for the given id.
225
+ def for_id(id)
226
+ self.new(["#?", id])
227
+ end
228
+
229
+ end
230
+
231
+
232
+ # :call-seq:
233
+ # Selector.new(string, [values ...]) => selector
234
+ #
235
+ # Creates a new selector from a CSS 2 selector expression.
236
+ #
237
+ # The first argument is the selector expression. All other arguments
238
+ # are used for value substitution.
239
+ #
240
+ # Throws InvalidSelectorError is the selector expression is invalid.
241
+ def initialize(selector, *values)
242
+ raise ArgumentError, "CSS expression cannot be empty" if selector.empty?
243
+ @source = ""
244
+ values = values[0] if values.size == 1 && values[0].is_a?(Array)
245
+
246
+ # We need a copy to determine if we failed to parse, and also
247
+ # preserve the original pass by-ref statement.
248
+ statement = selector.strip.dup
249
+
250
+ # Create a simple selector, along with negation.
251
+ simple_selector(statement, values).each { |name, value| instance_variable_set("@#{name}", value) }
252
+
253
+ @alternates = []
254
+ @depends = nil
255
+
256
+ # Alternative selector.
257
+ if statement.sub!(/^\s*,\s*/, "")
258
+ second = Selector.new(statement, values)
259
+ @alternates << second
260
+ # If there are alternate selectors, we group them in the top selector.
261
+ if alternates = second.instance_variable_get(:@alternates)
262
+ second.instance_variable_set(:@alternates, [])
263
+ @alternates.concat alternates
264
+ end
265
+ @source << " , " << second.to_s
266
+ # Sibling selector: create a dependency into second selector that will
267
+ # match element immediately following this one.
268
+ elsif statement.sub!(/^\s*\+\s*/, "")
269
+ second = next_selector(statement, values)
270
+ @depends = lambda do |element, first|
271
+ if element = next_element(element)
272
+ second.match(element, first)
273
+ end
274
+ end
275
+ @source << " + " << second.to_s
276
+ # Adjacent selector: create a dependency into second selector that will
277
+ # match all elements following this one.
278
+ elsif statement.sub!(/^\s*~\s*/, "")
279
+ second = next_selector(statement, values)
280
+ @depends = lambda do |element, first|
281
+ matches = []
282
+ while element = next_element(element)
283
+ if subset = second.match(element, first)
284
+ if first && !subset.empty?
285
+ matches << subset.first
286
+ break
287
+ else
288
+ matches.concat subset
289
+ end
290
+ end
291
+ end
292
+ matches.empty? ? nil : matches
293
+ end
294
+ @source << " ~ " << second.to_s
295
+ # Child selector: create a dependency into second selector that will
296
+ # match a child element of this one.
297
+ elsif statement.sub!(/^\s*>\s*/, "")
298
+ second = next_selector(statement, values)
299
+ @depends = lambda do |element, first|
300
+ matches = []
301
+ element.children.each do |child|
302
+ if child.tag? && subset = second.match(child, first)
303
+ if first && !subset.empty?
304
+ matches << subset.first
305
+ break
306
+ else
307
+ matches.concat subset
308
+ end
309
+ end
310
+ end
311
+ matches.empty? ? nil : matches
312
+ end
313
+ @source << " > " << second.to_s
314
+ # Descendant selector: create a dependency into second selector that
315
+ # will match all descendant elements of this one. Note,
316
+ elsif statement =~ /^\s+\S+/ && statement != selector
317
+ second = next_selector(statement, values)
318
+ @depends = lambda do |element, first|
319
+ matches = []
320
+ stack = element.children.reverse
321
+ while node = stack.pop
322
+ next unless node.tag?
323
+ if subset = second.match(node, first)
324
+ if first && !subset.empty?
325
+ matches << subset.first
326
+ break
327
+ else
328
+ matches.concat subset
329
+ end
330
+ elsif children = node.children
331
+ stack.concat children.reverse
332
+ end
333
+ end
334
+ matches.empty? ? nil : matches
335
+ end
336
+ @source << " " << second.to_s
337
+ else
338
+ # The last selector is where we check that we parsed
339
+ # all the parts.
340
+ unless statement.empty? || statement.strip.empty?
341
+ raise ArgumentError, "Invalid selector: #{statement}"
342
+ end
343
+ end
344
+ end
345
+
346
+
347
+ # :call-seq:
348
+ # match(element, first?) => array or nil
349
+ #
350
+ # Matches an element against the selector.
351
+ #
352
+ # For a simple selector this method returns an array with the
353
+ # element if the element matches, nil otherwise.
354
+ #
355
+ # For a complex selector (sibling and descendant) this method
356
+ # returns an array with all matching elements, nil if no match is
357
+ # found.
358
+ #
359
+ # Use +first_only=true+ if you are only interested in the first element.
360
+ #
361
+ # For example:
362
+ # if selector.match(element)
363
+ # puts "Element is a login form"
364
+ # end
365
+ def match(element, first_only = false)
366
+ # Match element if no element name or element name same as element name
367
+ if matched = (!@tag_name || @tag_name == element.name)
368
+ # No match if one of the attribute matches failed
369
+ for attr in @attributes
370
+ if element.attributes[attr[0]] !~ attr[1]
371
+ matched = false
372
+ break
373
+ end
374
+ end
375
+ end
376
+
377
+ # Pseudo class matches (nth-child, empty, etc).
378
+ if matched
379
+ for pseudo in @pseudo
380
+ unless pseudo.call(element)
381
+ matched = false
382
+ break
383
+ end
384
+ end
385
+ end
386
+
387
+ # Negation. Same rules as above, but we fail if a match is made.
388
+ if matched && @negation
389
+ for negation in @negation
390
+ if negation[:tag_name] == element.name
391
+ matched = false
392
+ else
393
+ for attr in negation[:attributes]
394
+ if element.attributes[attr[0]] =~ attr[1]
395
+ matched = false
396
+ break
397
+ end
398
+ end
399
+ end
400
+ if matched
401
+ for pseudo in negation[:pseudo]
402
+ if pseudo.call(element)
403
+ matched = false
404
+ break
405
+ end
406
+ end
407
+ end
408
+ break unless matched
409
+ end
410
+ end
411
+
412
+ # If element matched but depends on another element (child,
413
+ # sibling, etc), apply the dependent matches instead.
414
+ if matched && @depends
415
+ matches = @depends.call(element, first_only)
416
+ else
417
+ matches = matched ? [element] : nil
418
+ end
419
+
420
+ # If this selector is part of the group, try all the alternative
421
+ # selectors (unless first_only).
422
+ if !first_only || !matches
423
+ @alternates.each do |alternate|
424
+ break if matches && first_only
425
+ if subset = alternate.match(element, first_only)
426
+ if matches
427
+ matches.concat subset
428
+ else
429
+ matches = subset
430
+ end
431
+ end
432
+ end
433
+ end
434
+
435
+ matches
436
+ end
437
+
438
+
439
+ # :call-seq:
440
+ # select(root) => array
441
+ #
442
+ # Selects and returns an array with all matching elements, beginning
443
+ # with one node and traversing through all children depth-first.
444
+ # Returns an empty array if no match is found.
445
+ #
446
+ # The root node may be any element in the document, or the document
447
+ # itself.
448
+ #
449
+ # For example:
450
+ # selector = HTML::Selector.new "input[type=text]"
451
+ # matches = selector.select(element)
452
+ # matches.each do |match|
453
+ # puts "Found text field with name #{match.attributes['name']}"
454
+ # end
455
+ def select(root)
456
+ matches = []
457
+ stack = [root]
458
+ while node = stack.pop
459
+ if node.tag? && subset = match(node, false)
460
+ subset.each do |match|
461
+ matches << match unless matches.any? { |item| item.equal?(match) }
462
+ end
463
+ elsif children = node.children
464
+ stack.concat children.reverse
465
+ end
466
+ end
467
+ matches
468
+ end
469
+
470
+
471
+ # Similar to #select but returns the first matching element. Returns +nil+
472
+ # if no element matches the selector.
473
+ def select_first(root)
474
+ stack = [root]
475
+ while node = stack.pop
476
+ if node.tag? && subset = match(node, true)
477
+ return subset.first if !subset.empty?
478
+ elsif children = node.children
479
+ stack.concat children.reverse
480
+ end
481
+ end
482
+ nil
483
+ end
484
+
485
+
486
+ def to_s #:nodoc:
487
+ @source
488
+ end
489
+
490
+
491
+ # Return the next element after this one. Skips sibling text nodes.
492
+ #
493
+ # With the +name+ argument, returns the next element with that name,
494
+ # skipping other sibling elements.
495
+ def next_element(element, name = nil)
496
+ if siblings = element.parent.children
497
+ found = false
498
+ siblings.each do |node|
499
+ if node.equal?(element)
500
+ found = true
501
+ elsif found && node.tag?
502
+ return node if (name.nil? || node.name == name)
503
+ end
504
+ end
505
+ end
506
+ nil
507
+ end
508
+
509
+
510
+ protected
511
+
512
+
513
+ # Creates a simple selector given the statement and array of
514
+ # substitution values.
515
+ #
516
+ # Returns a hash with the values +tag_name+, +attributes+,
517
+ # +pseudo+ (classes) and +negation+.
518
+ #
519
+ # Called the first time with +can_negate+ true to allow
520
+ # negation. Called a second time with false since negation
521
+ # cannot be negated.
522
+ def simple_selector(statement, values, can_negate = true)
523
+ tag_name = nil
524
+ attributes = []
525
+ pseudo = []
526
+ negation = []
527
+
528
+ # Element name. (Note that in negation, this can come at
529
+ # any order, but for simplicity we allow if only first).
530
+ statement.sub!(/^(\*|[[:alpha:]][\w\-]*)/) do |match|
531
+ match.strip!
532
+ tag_name = match.downcase unless match == "*"
533
+ @source << match
534
+ "" # Remove
535
+ end
536
+
537
+ # Get identifier, class, attribute name, pseudo or negation.
538
+ while true
539
+ # Element identifier.
540
+ next if statement.sub!(/^#(\?|[\w\-]+)/) do
541
+ id = $1
542
+ if id == "?"
543
+ id = values.shift
544
+ end
545
+ @source << "##{id}"
546
+ id = Regexp.new("^#{Regexp.escape(id.to_s)}$") unless id.is_a?(Regexp)
547
+ attributes << ["id", id]
548
+ "" # Remove
549
+ end
550
+
551
+ # Class name.
552
+ next if statement.sub!(/^\.([\w\-]+)/) do
553
+ class_name = $1
554
+ @source << ".#{class_name}"
555
+ class_name = Regexp.new("(^|\s)#{Regexp.escape(class_name)}($|\s)") unless class_name.is_a?(Regexp)
556
+ attributes << ["class", class_name]
557
+ "" # Remove
558
+ end
559
+
560
+ # Attribute value.
561
+ next if statement.sub!(/^\[\s*([[:alpha:]][\w\-:]*)\s*((?:[~|^$*])?=)?\s*('[^']*'|"[^*]"|[^\]]*)\s*\]/) do
562
+ name, equality, value = $1, $2, $3
563
+ if value == "?"
564
+ value = values.shift
565
+ else
566
+ # Handle single and double quotes.
567
+ value.strip!
568
+ if (value[0] == ?" || value[0] == ?') && value[0] == value[-1]
569
+ value = value[1..-2]
570
+ end
571
+ end
572
+ @source << "[#{name}#{equality}'#{value}']"
573
+ attributes << [name.downcase.strip, attribute_match(equality, value)]
574
+ "" # Remove
575
+ end
576
+
577
+ # Root element only.
578
+ next if statement.sub!(/^:root/) do
579
+ pseudo << lambda do |element|
580
+ element.parent.nil? || !element.parent.tag?
581
+ end
582
+ @source << ":root"
583
+ "" # Remove
584
+ end
585
+
586
+ # Nth-child including last and of-type.
587
+ next if statement.sub!(/^:nth-(last-)?(child|of-type)\((odd|even|(\d+|\?)|(-?\d*|\?)?n([+\-]\d+|\?)?)\)/) do |match|
588
+ reverse = $1 == "last-"
589
+ of_type = $2 == "of-type"
590
+ @source << ":nth-#{$1}#{$2}("
591
+ case $3
592
+ when "odd"
593
+ pseudo << nth_child(2, 1, of_type, reverse)
594
+ @source << "odd)"
595
+ when "even"
596
+ pseudo << nth_child(2, 2, of_type, reverse)
597
+ @source << "even)"
598
+ when /^(\d+|\?)$/ # b only
599
+ b = ($1 == "?" ? values.shift : $1).to_i
600
+ pseudo << nth_child(0, b, of_type, reverse)
601
+ @source << "#{b})"
602
+ when /^(-?\d*|\?)?n([+\-]\d+|\?)?$/
603
+ a = ($1 == "?" ? values.shift :
604
+ $1 == "" ? 1 : $1 == "-" ? -1 : $1).to_i
605
+ b = ($2 == "?" ? values.shift : $2).to_i
606
+ pseudo << nth_child(a, b, of_type, reverse)
607
+ @source << (b >= 0 ? "#{a}n+#{b})" : "#{a}n#{b})")
608
+ else
609
+ raise ArgumentError, "Invalid nth-child #{match}"
610
+ end
611
+ "" # Remove
612
+ end
613
+ # First/last child (of type).
614
+ next if statement.sub!(/^:(first|last)-(child|of-type)/) do
615
+ reverse = $1 == "last"
616
+ of_type = $2 == "of-type"
617
+ pseudo << nth_child(0, 1, of_type, reverse)
618
+ @source << ":#{$1}-#{$2}"
619
+ "" # Remove
620
+ end
621
+ # Only child (of type).
622
+ next if statement.sub!(/^:only-(child|of-type)/) do
623
+ of_type = $1 == "of-type"
624
+ pseudo << only_child(of_type)
625
+ @source << ":only-#{$1}"
626
+ "" # Remove
627
+ end
628
+
629
+ # Empty: no child elements or meaningful content (whitespaces
630
+ # are ignored).
631
+ next if statement.sub!(/^:empty/) do
632
+ pseudo << lambda do |element|
633
+ empty = true
634
+ for child in element.children
635
+ if child.tag? || !child.content.strip.empty?
636
+ empty = false
637
+ break
638
+ end
639
+ end
640
+ empty
641
+ end
642
+ @source << ":empty"
643
+ "" # Remove
644
+ end
645
+ # Content: match the text content of the element, stripping
646
+ # leading and trailing spaces.
647
+ next if statement.sub!(/^:content\(\s*(\?|'[^']*'|"[^"]*"|[^)]*)\s*\)/) do
648
+ content = $1
649
+ if content == "?"
650
+ content = values.shift
651
+ elsif (content[0] == ?" || content[0] == ?') && content[0] == content[-1]
652
+ content = content[1..-2]
653
+ end
654
+ @source << ":content('#{content}')"
655
+ content = Regexp.new("^#{Regexp.escape(content.to_s)}$") unless content.is_a?(Regexp)
656
+ pseudo << lambda do |element|
657
+ text = ""
658
+ for child in element.children
659
+ unless child.tag?
660
+ text << child.content
661
+ end
662
+ end
663
+ text.strip =~ content
664
+ end
665
+ "" # Remove
666
+ end
667
+
668
+ # Negation. Create another simple selector to handle it.
669
+ if statement.sub!(/^:not\(\s*/, "")
670
+ raise ArgumentError, "Double negatives are not missing feature" unless can_negate
671
+ @source << ":not("
672
+ negation << simple_selector(statement, values, false)
673
+ raise ArgumentError, "Negation not closed" unless statement.sub!(/^\s*\)/, "")
674
+ @source << ")"
675
+ next
676
+ end
677
+
678
+ # No match: moving on.
679
+ break
680
+ end
681
+
682
+ # Return hash. The keys are mapped to instance variables.
683
+ {:tag_name=>tag_name, :attributes=>attributes, :pseudo=>pseudo, :negation=>negation}
684
+ end
685
+
686
+
687
+ # Create a regular expression to match an attribute value based
688
+ # on the equality operator (=, ^=, |=, etc).
689
+ def attribute_match(equality, value)
690
+ regexp = value.is_a?(Regexp) ? value : Regexp.escape(value.to_s)
691
+ case equality
692
+ when "=" then
693
+ # Match the attribute value in full
694
+ Regexp.new("^#{regexp}$")
695
+ when "~=" then
696
+ # Match a space-separated word within the attribute value
697
+ Regexp.new("(^|\s)#{regexp}($|\s)")
698
+ when "^="
699
+ # Match the beginning of the attribute value
700
+ Regexp.new("^#{regexp}")
701
+ when "$="
702
+ # Match the end of the attribute value
703
+ Regexp.new("#{regexp}$")
704
+ when "*="
705
+ # Match substring of the attribute value
706
+ regexp.is_a?(Regexp) ? regexp : Regexp.new(regexp)
707
+ when "|=" then
708
+ # Match the first space-separated item of the attribute value
709
+ Regexp.new("^#{regexp}($|\s)")
710
+ else
711
+ raise InvalidSelectorError, "Invalid operation/value" unless value.empty?
712
+ # Match all attributes values (existence check)
713
+ //
714
+ end
715
+ end
716
+
717
+
718
+ # Returns a lambda that can match an element against the nth-child
719
+ # pseudo class, given the following arguments:
720
+ # * +a+ -- Value of a part.
721
+ # * +b+ -- Value of b part.
722
+ # * +of_type+ -- True to test only elements of this type (of-type).
723
+ # * +reverse+ -- True to count in reverse order (last-).
724
+ def nth_child(a, b, of_type, reverse)
725
+ # a = 0 means select at index b, if b = 0 nothing selected
726
+ return lambda { |element| false } if a == 0 && b == 0
727
+ # a < 0 and b < 0 will never match against an index
728
+ return lambda { |element| false } if a < 0 && b < 0
729
+ b = a + b + 1 if b < 0 # b < 0 just picks last element from each group
730
+ b -= 1 unless b == 0 # b == 0 is same as b == 1, otherwise zero based
731
+ lambda do |element|
732
+ # Element must be inside parent element.
733
+ return false unless element.parent && element.parent.tag?
734
+ index = 0
735
+ # Get siblings, reverse if counting from last.
736
+ siblings = element.parent.children
737
+ siblings = siblings.reverse if reverse
738
+ # Match element name if of-type, otherwise ignore name.
739
+ name = of_type ? element.name : nil
740
+ found = false
741
+ for child in siblings
742
+ # Skip text nodes/comments.
743
+ if child.tag? && (name == nil || child.name == name)
744
+ if a == 0
745
+ # Shortcut when a == 0 no need to go past count
746
+ if index == b
747
+ found = child.equal?(element)
748
+ break
749
+ end
750
+ elsif a < 0
751
+ # Only look for first b elements
752
+ break if index > b
753
+ if child.equal?(element)
754
+ found = (index % a) == 0
755
+ break
756
+ end
757
+ else
758
+ # Otherwise, break if child found and count == an+b
759
+ if child.equal?(element)
760
+ found = (index % a) == b
761
+ break
762
+ end
763
+ end
764
+ index += 1
765
+ end
766
+ end
767
+ found
768
+ end
769
+ end
770
+
771
+
772
+ # Creates a only child lambda. Pass +of-type+ to only look at
773
+ # elements of its type.
774
+ def only_child(of_type)
775
+ lambda do |element|
776
+ # Element must be inside parent element.
777
+ return false unless element.parent && element.parent.tag?
778
+ name = of_type ? element.name : nil
779
+ other = false
780
+ for child in element.parent.children
781
+ # Skip text nodes/comments.
782
+ if child.tag? && (name == nil || child.name == name)
783
+ unless child.equal?(element)
784
+ other = true
785
+ break
786
+ end
787
+ end
788
+ end
789
+ !other
790
+ end
791
+ end
792
+
793
+
794
+ # Called to create a dependent selector (sibling, descendant, etc).
795
+ # Passes the remainder of the statement that will be reduced to zero
796
+ # eventually, and array of substitution values.
797
+ #
798
+ # This method is called from four places, so it helps to put it here
799
+ # for reuse. The only logic deals with the need to detect comma
800
+ # separators (alternate) and apply them to the selector group of the
801
+ # top selector.
802
+ def next_selector(statement, values)
803
+ second = Selector.new(statement, values)
804
+ # If there are alternate selectors, we group them in the top selector.
805
+ if alternates = second.instance_variable_get(:@alternates)
806
+ second.instance_variable_set(:@alternates, [])
807
+ @alternates.concat alternates
808
+ end
809
+ second
810
+ end
811
+
812
+ end
813
+
814
+
815
+ # See HTML::Selector.new
816
+ def self.selector(statement, *values)
817
+ Selector.new(statement, *values)
818
+ end
819
+
820
+
821
+ class Tag
822
+
823
+ def select(selector, *values)
824
+ selector = HTML::Selector.new(selector, values)
825
+ selector.select(self)
826
+ end
827
+
828
+ end
829
+
830
+ end