tdreyno-staticmatic 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. data/LICENSE +21 -0
  2. data/Rakefile +12 -0
  3. data/bin/staticmatic +12 -0
  4. data/lib/staticmatic/actionpack_support/mime.rb +5 -0
  5. data/lib/staticmatic/actionpack_support/remove_partial_benchmark.rb +6 -0
  6. data/lib/staticmatic/autoload.rb +18 -0
  7. data/lib/staticmatic/base.rb +171 -0
  8. data/lib/staticmatic/builder.rb +102 -0
  9. data/lib/staticmatic/config.rb +47 -0
  10. data/lib/staticmatic/creator.rb +18 -0
  11. data/lib/staticmatic/deprecation.rb +26 -0
  12. data/lib/staticmatic/helpers/asset_tag_helper.rb +37 -0
  13. data/lib/staticmatic/helpers/deprecated_helpers.rb +48 -0
  14. data/lib/staticmatic/helpers/page_helper.rb +9 -0
  15. data/lib/staticmatic/helpers/url_helper.rb +19 -0
  16. data/lib/staticmatic/previewer.rb +65 -0
  17. data/lib/staticmatic/rescue.rb +14 -0
  18. data/lib/staticmatic/template_handlers/haml.rb +19 -0
  19. data/lib/staticmatic/template_handlers/liquid.rb +13 -0
  20. data/lib/staticmatic/template_handlers/markdown.rb +13 -0
  21. data/lib/staticmatic/template_handlers/sass.rb +13 -0
  22. data/lib/staticmatic/template_handlers/textile.rb +13 -0
  23. data/lib/staticmatic/templates/default/Rakefile +3 -0
  24. data/lib/staticmatic/templates/default/config.rb +3 -0
  25. data/lib/staticmatic/templates/default/src/helpers/site_helper.rb +5 -0
  26. data/lib/staticmatic/templates/default/src/layouts/site.html.haml +6 -0
  27. data/lib/staticmatic/templates/default/src/pages/index.html.haml +1 -0
  28. data/lib/staticmatic/templates/default/src/stylesheets/site.css.sass +3 -0
  29. data/lib/staticmatic/templates/rescues/default_error.html.erb +2 -0
  30. data/lib/staticmatic/templates/rescues/template_error.html.erb +19 -0
  31. data/lib/staticmatic.rb +28 -0
  32. data/lib/tasks/staticmatic.rb +9 -0
  33. data/staticmatic.gemspec +53 -0
  34. data/vendor/html-scanner/html/document.rb +68 -0
  35. data/vendor/html-scanner/html/node.rb +530 -0
  36. data/vendor/html-scanner/html/sanitizer.rb +173 -0
  37. data/vendor/html-scanner/html/selector.rb +828 -0
  38. data/vendor/html-scanner/html/tokenizer.rb +105 -0
  39. data/vendor/html-scanner/html/version.rb +11 -0
  40. metadata +127 -0
@@ -0,0 +1,828 @@
1
+ #--
2
+ # Copyright (c) 2006 Assaf Arkin (http://labnotes.org)
3
+ # Under MIT and/or CC By license.
4
+ #++
5
+
6
+ module HTML
7
+
8
+ # Selects HTML elements using CSS 2 selectors.
9
+ #
10
+ # The +Selector+ class uses CSS selector expressions to match and select
11
+ # HTML elements.
12
+ #
13
+ # For example:
14
+ # selector = HTML::Selector.new "form.login[action=/login]"
15
+ # creates a new selector that matches any +form+ element with the class
16
+ # +login+ and an attribute +action+ with the value <tt>/login</tt>.
17
+ #
18
+ # === Matching Elements
19
+ #
20
+ # Use the #match method to determine if an element matches the selector.
21
+ #
22
+ # For simple selectors, the method returns an array with that element,
23
+ # or +nil+ if the element does not match. For complex selectors (see below)
24
+ # the method returns an array with all matched elements, of +nil+ if no
25
+ # match found.
26
+ #
27
+ # For example:
28
+ # if selector.match(element)
29
+ # puts "Element is a login form"
30
+ # end
31
+ #
32
+ # === Selecting Elements
33
+ #
34
+ # Use the #select method to select all matching elements starting with
35
+ # one element and going through all children in depth-first order.
36
+ #
37
+ # This method returns an array of all matching elements, an empty array
38
+ # if no match is found
39
+ #
40
+ # For example:
41
+ # selector = HTML::Selector.new "input[type=text]"
42
+ # matches = selector.select(element)
43
+ # matches.each do |match|
44
+ # puts "Found text field with name #{match.attributes['name']}"
45
+ # end
46
+ #
47
+ # === Expressions
48
+ #
49
+ # Selectors can match elements using any of the following criteria:
50
+ # * <tt>name</tt> -- Match an element based on its name (tag name).
51
+ # For example, <tt>p</tt> to match a paragraph. You can use <tt>*</tt>
52
+ # to match any element.
53
+ # * <tt>#</tt><tt>id</tt> -- Match an element based on its identifier (the
54
+ # <tt>id</tt> attribute). For example, <tt>#</tt><tt>page</tt>.
55
+ # * <tt>.class</tt> -- Match an element based on its class name, all
56
+ # class names if more than one specified.
57
+ # * <tt>[attr]</tt> -- Match an element that has the specified attribute.
58
+ # * <tt>[attr=value]</tt> -- Match an element that has the specified
59
+ # attribute and value. (More operators are supported see below)
60
+ # * <tt>:pseudo-class</tt> -- Match an element based on a pseudo class,
61
+ # such as <tt>:nth-child</tt> and <tt>:empty</tt>.
62
+ # * <tt>:not(expr)</tt> -- Match an element that does not match the
63
+ # negation expression.
64
+ #
65
+ # When using a combination of the above, the element name comes first
66
+ # followed by identifier, class names, attributes, pseudo classes and
67
+ # negation in any order. Do not seprate these parts with spaces!
68
+ # Space separation is used for descendant selectors.
69
+ #
70
+ # For example:
71
+ # selector = HTML::Selector.new "form.login[action=/login]"
72
+ # The matched element must be of type +form+ and have the class +login+.
73
+ # It may have other classes, but the class +login+ is required to match.
74
+ # It must also have an attribute called +action+ with the value
75
+ # <tt>/login</tt>.
76
+ #
77
+ # This selector will match the following element:
78
+ # <form class="login form" method="post" action="/login">
79
+ # but will not match the element:
80
+ # <form method="post" action="/logout">
81
+ #
82
+ # === Attribute Values
83
+ #
84
+ # Several operators are supported for matching attributes:
85
+ # * <tt>name</tt> -- The element must have an attribute with that name.
86
+ # * <tt>name=value</tt> -- The element must have an attribute with that
87
+ # name and value.
88
+ # * <tt>name^=value</tt> -- The attribute value must start with the
89
+ # specified value.
90
+ # * <tt>name$=value</tt> -- The attribute value must end with the
91
+ # specified value.
92
+ # * <tt>name*=value</tt> -- The attribute value must contain the
93
+ # specified value.
94
+ # * <tt>name~=word</tt> -- The attribute value must contain the specified
95
+ # word (space separated).
96
+ # * <tt>name|=word</tt> -- The attribute value must start with specified
97
+ # word.
98
+ #
99
+ # For example, the following two selectors match the same element:
100
+ # #my_id
101
+ # [id=my_id]
102
+ # and so do the following two selectors:
103
+ # .my_class
104
+ # [class~=my_class]
105
+ #
106
+ # === Alternatives, siblings, children
107
+ #
108
+ # Complex selectors use a combination of expressions to match elements:
109
+ # * <tt>expr1 expr2</tt> -- Match any element against the second expression
110
+ # if it has some parent element that matches the first expression.
111
+ # * <tt>expr1 > expr2</tt> -- Match any element against the second expression
112
+ # if it is the child of an element that matches the first expression.
113
+ # * <tt>expr1 + expr2</tt> -- Match any element against the second expression
114
+ # if it immediately follows an element that matches the first expression.
115
+ # * <tt>expr1 ~ expr2</tt> -- Match any element against the second expression
116
+ # that comes after an element that matches the first expression.
117
+ # * <tt>expr1, expr2</tt> -- Match any element against the first expression,
118
+ # or against the second expression.
119
+ #
120
+ # Since children and sibling selectors may match more than one element given
121
+ # the first element, the #match method may return more than one match.
122
+ #
123
+ # === Pseudo classes
124
+ #
125
+ # Pseudo classes were introduced in CSS 3. They are most often used to select
126
+ # elements in a given position:
127
+ # * <tt>:root</tt> -- Match the element only if it is the root element
128
+ # (no parent element).
129
+ # * <tt>:empty</tt> -- Match the element only if it has no child elements,
130
+ # and no text content.
131
+ # * <tt>:only-child</tt> -- Match the element if it is the only child (element)
132
+ # of its parent element.
133
+ # * <tt>:only-of-type</tt> -- Match the element if it is the only child (element)
134
+ # of its parent element and its type.
135
+ # * <tt>:first-child</tt> -- Match the element if it is the first child (element)
136
+ # of its parent element.
137
+ # * <tt>:first-of-type</tt> -- Match the element if it is the first child (element)
138
+ # of its parent element of its type.
139
+ # * <tt>:last-child</tt> -- Match the element if it is the last child (element)
140
+ # of its parent element.
141
+ # * <tt>:last-of-type</tt> -- Match the element if it is the last child (element)
142
+ # of its parent element of its type.
143
+ # * <tt>:nth-child(b)</tt> -- Match the element if it is the b-th child (element)
144
+ # of its parent element. The value <tt>b</tt> specifies its index, starting with 1.
145
+ # * <tt>:nth-child(an+b)</tt> -- Match the element if it is the b-th child (element)
146
+ # in each group of <tt>a</tt> child elements of its parent element.
147
+ # * <tt>:nth-child(-an+b)</tt> -- Match the element if it is the first child (element)
148
+ # in each group of <tt>a</tt> child elements, up to the first <tt>b</tt> child
149
+ # elements of its parent element.
150
+ # * <tt>:nth-child(odd)</tt> -- Match element in the odd position (i.e. first, third).
151
+ # Same as <tt>:nth-child(2n+1)</tt>.
152
+ # * <tt>:nth-child(even)</tt> -- Match element in the even position (i.e. second,
153
+ # fourth). Same as <tt>:nth-child(2n+2)</tt>.
154
+ # * <tt>:nth-of-type(..)</tt> -- As above, but only counts elements of its type.
155
+ # * <tt>:nth-last-child(..)</tt> -- As above, but counts from the last child.
156
+ # * <tt>:nth-last-of-type(..)</tt> -- As above, but counts from the last child and
157
+ # only elements of its type.
158
+ # * <tt>:not(selector)</tt> -- Match the element only if the element does not
159
+ # match the simple selector.
160
+ #
161
+ # As you can see, <tt>:nth-child<tt> pseudo class and its varient can get quite
162
+ # tricky and the CSS specification doesn't do a much better job explaining it.
163
+ # But after reading the examples and trying a few combinations, it's easy to
164
+ # figure out.
165
+ #
166
+ # For example:
167
+ # table tr:nth-child(odd)
168
+ # Selects every second row in the table starting with the first one.
169
+ #
170
+ # div p:nth-child(4)
171
+ # Selects the fourth paragraph in the +div+, but not if the +div+ contains
172
+ # other elements, since those are also counted.
173
+ #
174
+ # div p:nth-of-type(4)
175
+ # Selects the fourth paragraph in the +div+, counting only paragraphs, and
176
+ # ignoring all other elements.
177
+ #
178
+ # div p:nth-of-type(-n+4)
179
+ # Selects the first four paragraphs, ignoring all others.
180
+ #
181
+ # And you can always select an element that matches one set of rules but
182
+ # not another using <tt>:not</tt>. For example:
183
+ # p:not(.post)
184
+ # Matches all paragraphs that do not have the class <tt>.post</tt>.
185
+ #
186
+ # === Substitution Values
187
+ #
188
+ # You can use substitution with identifiers, class names and element values.
189
+ # A substitution takes the form of a question mark (<tt>?</tt>) and uses the
190
+ # next value in the argument list following the CSS expression.
191
+ #
192
+ # The substitution value may be a string or a regular expression. All other
193
+ # values are converted to strings.
194
+ #
195
+ # For example:
196
+ # selector = HTML::Selector.new "#?", /^\d+$/
197
+ # matches any element whose identifier consists of one or more digits.
198
+ #
199
+ # See http://www.w3.org/TR/css3-selectors/
200
+ class Selector
201
+
202
+
203
+ # An invalid selector.
204
+ class InvalidSelectorError < StandardError #:nodoc:
205
+ end
206
+
207
+
208
+ class << self
209
+
210
+ # :call-seq:
211
+ # Selector.for_class(cls) => selector
212
+ #
213
+ # Creates a new selector for the given class name.
214
+ def for_class(cls)
215
+ self.new([".?", cls])
216
+ end
217
+
218
+
219
+ # :call-seq:
220
+ # Selector.for_id(id) => selector
221
+ #
222
+ # Creates a new selector for the given id.
223
+ def for_id(id)
224
+ self.new(["#?", id])
225
+ end
226
+
227
+ end
228
+
229
+
230
+ # :call-seq:
231
+ # Selector.new(string, [values ...]) => selector
232
+ #
233
+ # Creates a new selector from a CSS 2 selector expression.
234
+ #
235
+ # The first argument is the selector expression. All other arguments
236
+ # are used for value substitution.
237
+ #
238
+ # Throws InvalidSelectorError is the selector expression is invalid.
239
+ def initialize(selector, *values)
240
+ raise ArgumentError, "CSS expression cannot be empty" if selector.empty?
241
+ @source = ""
242
+ values = values[0] if values.size == 1 && values[0].is_a?(Array)
243
+
244
+ # We need a copy to determine if we failed to parse, and also
245
+ # preserve the original pass by-ref statement.
246
+ statement = selector.strip.dup
247
+
248
+ # Create a simple selector, along with negation.
249
+ simple_selector(statement, values).each { |name, value| instance_variable_set("@#{name}", value) }
250
+
251
+ @alternates = []
252
+ @depends = nil
253
+
254
+ # Alternative selector.
255
+ if statement.sub!(/^\s*,\s*/, "")
256
+ second = Selector.new(statement, values)
257
+ @alternates << second
258
+ # If there are alternate selectors, we group them in the top selector.
259
+ if alternates = second.instance_variable_get(:@alternates)
260
+ second.instance_variable_set(:@alternates, [])
261
+ @alternates.concat alternates
262
+ end
263
+ @source << " , " << second.to_s
264
+ # Sibling selector: create a dependency into second selector that will
265
+ # match element immediately following this one.
266
+ elsif statement.sub!(/^\s*\+\s*/, "")
267
+ second = next_selector(statement, values)
268
+ @depends = lambda do |element, first|
269
+ if element = next_element(element)
270
+ second.match(element, first)
271
+ end
272
+ end
273
+ @source << " + " << second.to_s
274
+ # Adjacent selector: create a dependency into second selector that will
275
+ # match all elements following this one.
276
+ elsif statement.sub!(/^\s*~\s*/, "")
277
+ second = next_selector(statement, values)
278
+ @depends = lambda do |element, first|
279
+ matches = []
280
+ while element = next_element(element)
281
+ if subset = second.match(element, first)
282
+ if first && !subset.empty?
283
+ matches << subset.first
284
+ break
285
+ else
286
+ matches.concat subset
287
+ end
288
+ end
289
+ end
290
+ matches.empty? ? nil : matches
291
+ end
292
+ @source << " ~ " << second.to_s
293
+ # Child selector: create a dependency into second selector that will
294
+ # match a child element of this one.
295
+ elsif statement.sub!(/^\s*>\s*/, "")
296
+ second = next_selector(statement, values)
297
+ @depends = lambda do |element, first|
298
+ matches = []
299
+ element.children.each do |child|
300
+ if child.tag? && subset = second.match(child, first)
301
+ if first && !subset.empty?
302
+ matches << subset.first
303
+ break
304
+ else
305
+ matches.concat subset
306
+ end
307
+ end
308
+ end
309
+ matches.empty? ? nil : matches
310
+ end
311
+ @source << " > " << second.to_s
312
+ # Descendant selector: create a dependency into second selector that
313
+ # will match all descendant elements of this one. Note,
314
+ elsif statement =~ /^\s+\S+/ && statement != selector
315
+ second = next_selector(statement, values)
316
+ @depends = lambda do |element, first|
317
+ matches = []
318
+ stack = element.children.reverse
319
+ while node = stack.pop
320
+ next unless node.tag?
321
+ if subset = second.match(node, first)
322
+ if first && !subset.empty?
323
+ matches << subset.first
324
+ break
325
+ else
326
+ matches.concat subset
327
+ end
328
+ elsif children = node.children
329
+ stack.concat children.reverse
330
+ end
331
+ end
332
+ matches.empty? ? nil : matches
333
+ end
334
+ @source << " " << second.to_s
335
+ else
336
+ # The last selector is where we check that we parsed
337
+ # all the parts.
338
+ unless statement.empty? || statement.strip.empty?
339
+ raise ArgumentError, "Invalid selector: #{statement}"
340
+ end
341
+ end
342
+ end
343
+
344
+
345
+ # :call-seq:
346
+ # match(element, first?) => array or nil
347
+ #
348
+ # Matches an element against the selector.
349
+ #
350
+ # For a simple selector this method returns an array with the
351
+ # element if the element matches, nil otherwise.
352
+ #
353
+ # For a complex selector (sibling and descendant) this method
354
+ # returns an array with all matching elements, nil if no match is
355
+ # found.
356
+ #
357
+ # Use +first_only=true+ if you are only interested in the first element.
358
+ #
359
+ # For example:
360
+ # if selector.match(element)
361
+ # puts "Element is a login form"
362
+ # end
363
+ def match(element, first_only = false)
364
+ # Match element if no element name or element name same as element name
365
+ if matched = (!@tag_name || @tag_name == element.name)
366
+ # No match if one of the attribute matches failed
367
+ for attr in @attributes
368
+ if element.attributes[attr[0]] !~ attr[1]
369
+ matched = false
370
+ break
371
+ end
372
+ end
373
+ end
374
+
375
+ # Pseudo class matches (nth-child, empty, etc).
376
+ if matched
377
+ for pseudo in @pseudo
378
+ unless pseudo.call(element)
379
+ matched = false
380
+ break
381
+ end
382
+ end
383
+ end
384
+
385
+ # Negation. Same rules as above, but we fail if a match is made.
386
+ if matched && @negation
387
+ for negation in @negation
388
+ if negation[:tag_name] == element.name
389
+ matched = false
390
+ else
391
+ for attr in negation[:attributes]
392
+ if element.attributes[attr[0]] =~ attr[1]
393
+ matched = false
394
+ break
395
+ end
396
+ end
397
+ end
398
+ if matched
399
+ for pseudo in negation[:pseudo]
400
+ if pseudo.call(element)
401
+ matched = false
402
+ break
403
+ end
404
+ end
405
+ end
406
+ break unless matched
407
+ end
408
+ end
409
+
410
+ # If element matched but depends on another element (child,
411
+ # sibling, etc), apply the dependent matches instead.
412
+ if matched && @depends
413
+ matches = @depends.call(element, first_only)
414
+ else
415
+ matches = matched ? [element] : nil
416
+ end
417
+
418
+ # If this selector is part of the group, try all the alternative
419
+ # selectors (unless first_only).
420
+ if !first_only || !matches
421
+ @alternates.each do |alternate|
422
+ break if matches && first_only
423
+ if subset = alternate.match(element, first_only)
424
+ if matches
425
+ matches.concat subset
426
+ else
427
+ matches = subset
428
+ end
429
+ end
430
+ end
431
+ end
432
+
433
+ matches
434
+ end
435
+
436
+
437
+ # :call-seq:
438
+ # select(root) => array
439
+ #
440
+ # Selects and returns an array with all matching elements, beginning
441
+ # with one node and traversing through all children depth-first.
442
+ # Returns an empty array if no match is found.
443
+ #
444
+ # The root node may be any element in the document, or the document
445
+ # itself.
446
+ #
447
+ # For example:
448
+ # selector = HTML::Selector.new "input[type=text]"
449
+ # matches = selector.select(element)
450
+ # matches.each do |match|
451
+ # puts "Found text field with name #{match.attributes['name']}"
452
+ # end
453
+ def select(root)
454
+ matches = []
455
+ stack = [root]
456
+ while node = stack.pop
457
+ if node.tag? && subset = match(node, false)
458
+ subset.each do |match|
459
+ matches << match unless matches.any? { |item| item.equal?(match) }
460
+ end
461
+ elsif children = node.children
462
+ stack.concat children.reverse
463
+ end
464
+ end
465
+ matches
466
+ end
467
+
468
+
469
+ # Similar to #select but returns the first matching element. Returns +nil+
470
+ # if no element matches the selector.
471
+ def select_first(root)
472
+ stack = [root]
473
+ while node = stack.pop
474
+ if node.tag? && subset = match(node, true)
475
+ return subset.first if !subset.empty?
476
+ elsif children = node.children
477
+ stack.concat children.reverse
478
+ end
479
+ end
480
+ nil
481
+ end
482
+
483
+
484
+ def to_s #:nodoc:
485
+ @source
486
+ end
487
+
488
+
489
+ # Return the next element after this one. Skips sibling text nodes.
490
+ #
491
+ # With the +name+ argument, returns the next element with that name,
492
+ # skipping other sibling elements.
493
+ def next_element(element, name = nil)
494
+ if siblings = element.parent.children
495
+ found = false
496
+ siblings.each do |node|
497
+ if node.equal?(element)
498
+ found = true
499
+ elsif found && node.tag?
500
+ return node if (name.nil? || node.name == name)
501
+ end
502
+ end
503
+ end
504
+ nil
505
+ end
506
+
507
+
508
+ protected
509
+
510
+
511
+ # Creates a simple selector given the statement and array of
512
+ # substitution values.
513
+ #
514
+ # Returns a hash with the values +tag_name+, +attributes+,
515
+ # +pseudo+ (classes) and +negation+.
516
+ #
517
+ # Called the first time with +can_negate+ true to allow
518
+ # negation. Called a second time with false since negation
519
+ # cannot be negated.
520
+ def simple_selector(statement, values, can_negate = true)
521
+ tag_name = nil
522
+ attributes = []
523
+ pseudo = []
524
+ negation = []
525
+
526
+ # Element name. (Note that in negation, this can come at
527
+ # any order, but for simplicity we allow if only first).
528
+ statement.sub!(/^(\*|[[:alpha:]][\w\-]*)/) do |match|
529
+ match.strip!
530
+ tag_name = match.downcase unless match == "*"
531
+ @source << match
532
+ "" # Remove
533
+ end
534
+
535
+ # Get identifier, class, attribute name, pseudo or negation.
536
+ while true
537
+ # Element identifier.
538
+ next if statement.sub!(/^#(\?|[\w\-]+)/) do |match|
539
+ id = $1
540
+ if id == "?"
541
+ id = values.shift
542
+ end
543
+ @source << "##{id}"
544
+ id = Regexp.new("^#{Regexp.escape(id.to_s)}$") unless id.is_a?(Regexp)
545
+ attributes << ["id", id]
546
+ "" # Remove
547
+ end
548
+
549
+ # Class name.
550
+ next if statement.sub!(/^\.([\w\-]+)/) do |match|
551
+ class_name = $1
552
+ @source << ".#{class_name}"
553
+ class_name = Regexp.new("(^|\s)#{Regexp.escape(class_name)}($|\s)") unless class_name.is_a?(Regexp)
554
+ attributes << ["class", class_name]
555
+ "" # Remove
556
+ end
557
+
558
+ # Attribute value.
559
+ next if statement.sub!(/^\[\s*([[:alpha:]][\w\-]*)\s*((?:[~|^$*])?=)?\s*('[^']*'|"[^*]"|[^\]]*)\s*\]/) do |match|
560
+ name, equality, value = $1, $2, $3
561
+ if value == "?"
562
+ value = values.shift
563
+ else
564
+ # Handle single and double quotes.
565
+ value.strip!
566
+ if (value[0] == ?" || value[0] == ?') && value[0] == value[-1]
567
+ value = value[1..-2]
568
+ end
569
+ end
570
+ @source << "[#{name}#{equality}'#{value}']"
571
+ attributes << [name.downcase.strip, attribute_match(equality, value)]
572
+ "" # Remove
573
+ end
574
+
575
+ # Root element only.
576
+ next if statement.sub!(/^:root/) do |match|
577
+ pseudo << lambda do |element|
578
+ element.parent.nil? || !element.parent.tag?
579
+ end
580
+ @source << ":root"
581
+ "" # Remove
582
+ end
583
+
584
+ # Nth-child including last and of-type.
585
+ next if statement.sub!(/^:nth-(last-)?(child|of-type)\((odd|even|(\d+|\?)|(-?\d*|\?)?n([+\-]\d+|\?)?)\)/) do |match|
586
+ reverse = $1 == "last-"
587
+ of_type = $2 == "of-type"
588
+ @source << ":nth-#{$1}#{$2}("
589
+ case $3
590
+ when "odd"
591
+ pseudo << nth_child(2, 1, of_type, reverse)
592
+ @source << "odd)"
593
+ when "even"
594
+ pseudo << nth_child(2, 2, of_type, reverse)
595
+ @source << "even)"
596
+ when /^(\d+|\?)$/ # b only
597
+ b = ($1 == "?" ? values.shift : $1).to_i
598
+ pseudo << nth_child(0, b, of_type, reverse)
599
+ @source << "#{b})"
600
+ when /^(-?\d*|\?)?n([+\-]\d+|\?)?$/
601
+ a = ($1 == "?" ? values.shift :
602
+ $1 == "" ? 1 : $1 == "-" ? -1 : $1).to_i
603
+ b = ($2 == "?" ? values.shift : $2).to_i
604
+ pseudo << nth_child(a, b, of_type, reverse)
605
+ @source << (b >= 0 ? "#{a}n+#{b})" : "#{a}n#{b})")
606
+ else
607
+ raise ArgumentError, "Invalid nth-child #{match}"
608
+ end
609
+ "" # Remove
610
+ end
611
+ # First/last child (of type).
612
+ next if statement.sub!(/^:(first|last)-(child|of-type)/) do |match|
613
+ reverse = $1 == "last"
614
+ of_type = $2 == "of-type"
615
+ pseudo << nth_child(0, 1, of_type, reverse)
616
+ @source << ":#{$1}-#{$2}"
617
+ "" # Remove
618
+ end
619
+ # Only child (of type).
620
+ next if statement.sub!(/^:only-(child|of-type)/) do |match|
621
+ of_type = $1 == "of-type"
622
+ pseudo << only_child(of_type)
623
+ @source << ":only-#{$1}"
624
+ "" # Remove
625
+ end
626
+
627
+ # Empty: no child elements or meaningful content (whitespaces
628
+ # are ignored).
629
+ next if statement.sub!(/^:empty/) do |match|
630
+ pseudo << lambda do |element|
631
+ empty = true
632
+ for child in element.children
633
+ if child.tag? || !child.content.strip.empty?
634
+ empty = false
635
+ break
636
+ end
637
+ end
638
+ empty
639
+ end
640
+ @source << ":empty"
641
+ "" # Remove
642
+ end
643
+ # Content: match the text content of the element, stripping
644
+ # leading and trailing spaces.
645
+ next if statement.sub!(/^:content\(\s*(\?|'[^']*'|"[^"]*"|[^)]*)\s*\)/) do |match|
646
+ content = $1
647
+ if content == "?"
648
+ content = values.shift
649
+ elsif (content[0] == ?" || content[0] == ?') && content[0] == content[-1]
650
+ content = content[1..-2]
651
+ end
652
+ @source << ":content('#{content}')"
653
+ content = Regexp.new("^#{Regexp.escape(content.to_s)}$") unless content.is_a?(Regexp)
654
+ pseudo << lambda do |element|
655
+ text = ""
656
+ for child in element.children
657
+ unless child.tag?
658
+ text << child.content
659
+ end
660
+ end
661
+ text.strip =~ content
662
+ end
663
+ "" # Remove
664
+ end
665
+
666
+ # Negation. Create another simple selector to handle it.
667
+ if statement.sub!(/^:not\(\s*/, "")
668
+ raise ArgumentError, "Double negatives are not missing feature" unless can_negate
669
+ @source << ":not("
670
+ negation << simple_selector(statement, values, false)
671
+ raise ArgumentError, "Negation not closed" unless statement.sub!(/^\s*\)/, "")
672
+ @source << ")"
673
+ next
674
+ end
675
+
676
+ # No match: moving on.
677
+ break
678
+ end
679
+
680
+ # Return hash. The keys are mapped to instance variables.
681
+ {:tag_name=>tag_name, :attributes=>attributes, :pseudo=>pseudo, :negation=>negation}
682
+ end
683
+
684
+
685
+ # Create a regular expression to match an attribute value based
686
+ # on the equality operator (=, ^=, |=, etc).
687
+ def attribute_match(equality, value)
688
+ regexp = value.is_a?(Regexp) ? value : Regexp.escape(value.to_s)
689
+ case equality
690
+ when "=" then
691
+ # Match the attribute value in full
692
+ Regexp.new("^#{regexp}$")
693
+ when "~=" then
694
+ # Match a space-separated word within the attribute value
695
+ Regexp.new("(^|\s)#{regexp}($|\s)")
696
+ when "^="
697
+ # Match the beginning of the attribute value
698
+ Regexp.new("^#{regexp}")
699
+ when "$="
700
+ # Match the end of the attribute value
701
+ Regexp.new("#{regexp}$")
702
+ when "*="
703
+ # Match substring of the attribute value
704
+ regexp.is_a?(Regexp) ? regexp : Regexp.new(regexp)
705
+ when "|=" then
706
+ # Match the first space-separated item of the attribute value
707
+ Regexp.new("^#{regexp}($|\s)")
708
+ else
709
+ raise InvalidSelectorError, "Invalid operation/value" unless value.empty?
710
+ # Match all attributes values (existence check)
711
+ //
712
+ end
713
+ end
714
+
715
+
716
+ # Returns a lambda that can match an element against the nth-child
717
+ # pseudo class, given the following arguments:
718
+ # * +a+ -- Value of a part.
719
+ # * +b+ -- Value of b part.
720
+ # * +of_type+ -- True to test only elements of this type (of-type).
721
+ # * +reverse+ -- True to count in reverse order (last-).
722
+ def nth_child(a, b, of_type, reverse)
723
+ # a = 0 means select at index b, if b = 0 nothing selected
724
+ return lambda { |element| false } if a == 0 && b == 0
725
+ # a < 0 and b < 0 will never match against an index
726
+ return lambda { |element| false } if a < 0 && b < 0
727
+ b = a + b + 1 if b < 0 # b < 0 just picks last element from each group
728
+ b -= 1 unless b == 0 # b == 0 is same as b == 1, otherwise zero based
729
+ lambda do |element|
730
+ # Element must be inside parent element.
731
+ return false unless element.parent && element.parent.tag?
732
+ index = 0
733
+ # Get siblings, reverse if counting from last.
734
+ siblings = element.parent.children
735
+ siblings = siblings.reverse if reverse
736
+ # Match element name if of-type, otherwise ignore name.
737
+ name = of_type ? element.name : nil
738
+ found = false
739
+ for child in siblings
740
+ # Skip text nodes/comments.
741
+ if child.tag? && (name == nil || child.name == name)
742
+ if a == 0
743
+ # Shortcut when a == 0 no need to go past count
744
+ if index == b
745
+ found = child.equal?(element)
746
+ break
747
+ end
748
+ elsif a < 0
749
+ # Only look for first b elements
750
+ break if index > b
751
+ if child.equal?(element)
752
+ found = (index % a) == 0
753
+ break
754
+ end
755
+ else
756
+ # Otherwise, break if child found and count == an+b
757
+ if child.equal?(element)
758
+ found = (index % a) == b
759
+ break
760
+ end
761
+ end
762
+ index += 1
763
+ end
764
+ end
765
+ found
766
+ end
767
+ end
768
+
769
+
770
+ # Creates a only child lambda. Pass +of-type+ to only look at
771
+ # elements of its type.
772
+ def only_child(of_type)
773
+ lambda do |element|
774
+ # Element must be inside parent element.
775
+ return false unless element.parent && element.parent.tag?
776
+ name = of_type ? element.name : nil
777
+ other = false
778
+ for child in element.parent.children
779
+ # Skip text nodes/comments.
780
+ if child.tag? && (name == nil || child.name == name)
781
+ unless child.equal?(element)
782
+ other = true
783
+ break
784
+ end
785
+ end
786
+ end
787
+ !other
788
+ end
789
+ end
790
+
791
+
792
+ # Called to create a dependent selector (sibling, descendant, etc).
793
+ # Passes the remainder of the statement that will be reduced to zero
794
+ # eventually, and array of substitution values.
795
+ #
796
+ # This method is called from four places, so it helps to put it here
797
+ # for reuse. The only logic deals with the need to detect comma
798
+ # separators (alternate) and apply them to the selector group of the
799
+ # top selector.
800
+ def next_selector(statement, values)
801
+ second = Selector.new(statement, values)
802
+ # If there are alternate selectors, we group them in the top selector.
803
+ if alternates = second.instance_variable_get(:@alternates)
804
+ second.instance_variable_set(:@alternates, [])
805
+ @alternates.concat alternates
806
+ end
807
+ second
808
+ end
809
+
810
+ end
811
+
812
+
813
+ # See HTML::Selector.new
814
+ def self.selector(statement, *values)
815
+ Selector.new(statement, *values)
816
+ end
817
+
818
+
819
+ class Tag
820
+
821
+ def select(selector, *values)
822
+ selector = HTML::Selector.new(selector, values)
823
+ selector.select(self)
824
+ end
825
+
826
+ end
827
+
828
+ end