gammo 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +32 -0
  3. data/Gemfile.lock +6 -6
  4. data/README.md +334 -10
  5. data/Rakefile +5 -1
  6. data/lib/gammo/attributes.rb +5 -0
  7. data/lib/gammo/css_selector/ast/combinator.rb +92 -0
  8. data/lib/gammo/css_selector/ast/selector/attrib_selector.rb +86 -0
  9. data/lib/gammo/css_selector/ast/selector/class_selector.rb +19 -0
  10. data/lib/gammo/css_selector/ast/selector/id_selector.rb +18 -0
  11. data/lib/gammo/css_selector/ast/selector/negation.rb +21 -0
  12. data/lib/gammo/css_selector/ast/selector/pseudo_class.rb +92 -0
  13. data/lib/gammo/css_selector/ast/selector.rb +100 -0
  14. data/lib/gammo/css_selector/context.rb +17 -0
  15. data/lib/gammo/css_selector/errors.rb +6 -0
  16. data/lib/gammo/css_selector/node_set.rb +44 -0
  17. data/lib/gammo/css_selector/parser.rb +790 -0
  18. data/lib/gammo/css_selector/parser.y +321 -0
  19. data/lib/gammo/css_selector.rb +33 -0
  20. data/lib/gammo/modules/subclassify.rb +31 -0
  21. data/lib/gammo/node.rb +2 -0
  22. data/lib/gammo/parser/foreign.rb +3 -3
  23. data/lib/gammo/parser/insertion_mode/after_after_body.rb +1 -1
  24. data/lib/gammo/parser/insertion_mode/after_after_frameset.rb +1 -1
  25. data/lib/gammo/parser/insertion_mode/after_body.rb +1 -1
  26. data/lib/gammo/parser/insertion_mode/after_frameset.rb +1 -1
  27. data/lib/gammo/parser/insertion_mode/after_head.rb +1 -1
  28. data/lib/gammo/parser/insertion_mode/before_head.rb +1 -1
  29. data/lib/gammo/parser/insertion_mode/before_html.rb +1 -1
  30. data/lib/gammo/parser/insertion_mode/in_body.rb +1 -1
  31. data/lib/gammo/parser/insertion_mode/in_column_group.rb +1 -1
  32. data/lib/gammo/parser/insertion_mode/in_frameset.rb +1 -1
  33. data/lib/gammo/parser/insertion_mode/in_head.rb +3 -2
  34. data/lib/gammo/parser/insertion_mode/in_head_noscript.rb +1 -1
  35. data/lib/gammo/parser/insertion_mode/in_select.rb +1 -1
  36. data/lib/gammo/parser/insertion_mode/in_table.rb +1 -1
  37. data/lib/gammo/parser/insertion_mode/in_template.rb +1 -1
  38. data/lib/gammo/parser/insertion_mode/initial.rb +1 -1
  39. data/lib/gammo/parser/insertion_mode/text.rb +1 -1
  40. data/lib/gammo/parser/insertion_mode.rb +1 -1
  41. data/lib/gammo/tokenizer/tokens.rb +10 -1
  42. data/lib/gammo/tokenizer.rb +10 -10
  43. data/lib/gammo/version.rb +1 -1
  44. data/lib/gammo/xpath/ast/axis.rb +1 -1
  45. data/lib/gammo/xpath/ast/expression.rb +2 -0
  46. data/lib/gammo/xpath/ast/function.rb +1 -1
  47. data/lib/gammo/xpath/ast/node_test.rb +1 -1
  48. data/lib/gammo/xpath/ast/path.rb +1 -0
  49. data/lib/gammo/xpath.rb +4 -5
  50. metadata +17 -4
  51. data/.travis.yml +0 -6
  52. data/lib/gammo/xpath/ast/subclassify.rb +0 -35
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f88ade2267388af8d29137f6ff0db934d324d77f5bface53cd6ae9fea3f1e466
4
- data.tar.gz: 92642a6208ede13520e29c20529f29fd21bb040eac39ec4af97935990b94b0eb
3
+ metadata.gz: f8fc3eae3f0b1cbe125012fef023b8796430f699f1bcac5a8336770070346315
4
+ data.tar.gz: ec33fbc6c045d1b458544ecbec141939a301fdce1ce9dd46542ba61dae5e5d6a
5
5
  SHA512:
6
- metadata.gz: f15b94b27a1738234662a2f196c440b97e4cd2b83a9c9bb468d7c4cfd4058ad6482b0ac06559dc4cd50a0dc8d41ba6114db1a09d51177bca511d1ce4196ef9e6
7
- data.tar.gz: 5377c2d17574abcc5e0069e7506ad5e47da211c970a79718218c88d9099723c04a7edba0b58da61eb9898e24fc300c04b3f8e2daba49ce41994ec990c27ff5fe
6
+ metadata.gz: 3a6f7dda7321a8cebba91755efe435a5207e95be939b87c8abccdf5b177730ac402975bf52572026d0776b9cc3ab8852e873099b9013e41c1222f9ba496a9085
7
+ data.tar.gz: 8cbe7ea25c29514dca39ded2407bee031bec9ecf2a02a06c82066ca34248b1ecb885e4d392c50801d1a874b13f282c8a5fc715220acbbde582e421c1bcce2814
@@ -0,0 +1,32 @@
1
+ # This workflow uses actions that are not certified by GitHub.
2
+ # They are provided by a third-party and are governed by
3
+ # separate terms of service, privacy policy, and support
4
+ # documentation.
5
+ # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
6
+ # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
7
+
8
+ name: Testing
9
+
10
+ on:
11
+ push:
12
+ branches:
13
+ - master
14
+ pull_request:
15
+
16
+ jobs:
17
+ test:
18
+ name: Test with Ruby-${{ matrix.ruby }}
19
+ runs-on: ubuntu-latest
20
+ strategy:
21
+ matrix:
22
+ ruby: [2.4, 2.5, 2.6, 2.7, 3.0]
23
+ steps:
24
+ - uses: actions/checkout@v2
25
+ - uses: ruby/setup-ruby@v1
26
+ with:
27
+ ruby-version: ${{ matrix.ruby }}
28
+ bundler-cache: true
29
+ - name: Install dependencies
30
+ run: bundle install
31
+ - name: Run tests
32
+ run: bundle exec rake
data/Gemfile.lock CHANGED
@@ -1,23 +1,23 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- gammo (0.1.0)
4
+ gammo (0.2.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
9
  docile (1.3.2)
10
10
  erubi (1.9.0)
11
- power_assert (1.1.5)
11
+ power_assert (1.2.0)
12
12
  racc (1.5.0)
13
13
  rake (12.3.3)
14
14
  simplecov (0.18.5)
15
15
  docile (~> 1.1)
16
16
  simplecov-html (~> 0.11)
17
- simplecov-html (0.12.2)
18
- test-unit (3.3.5)
17
+ simplecov-html (0.12.3)
18
+ test-unit (3.3.6)
19
19
  power_assert
20
- yard (0.9.20)
20
+ yard (0.9.25)
21
21
 
22
22
  PLATFORMS
23
23
  ruby
@@ -32,4 +32,4 @@ DEPENDENCIES
32
32
  yard
33
33
 
34
34
  BUNDLED WITH
35
- 2.1.4
35
+ 2.2.3
data/README.md CHANGED
@@ -7,7 +7,8 @@
7
7
  [![GitHub license](https://img.shields.io/github/license/namusyaka/gammo?color=brightgreen)](https://github.com/namusyaka/gammo/blob/master/LICENSE.txt)
8
8
  [![Documentation](http://img.shields.io/:yard-docs-38c800.svg)](http://www.rubydoc.info/gems/gammo/frames)
9
9
 
10
- Gammo is an implementation of the HTML5 parsing algorithm which conforms [the WHATWG specification](https://html.spec.whatwg.org/multipage/parsing.html), without any dependencies. Given an HTML string, Gammo parses it and builds DOM tree based on the tokenization and tree-construction algorithm defined in WHATWG parsing algorithm.
10
+ Gammo provides a pure Ruby HTML5-compliant parser and CSS selector / XPath support for traversing the DOM tree built by Gammo.
11
+ The implementation of the HTML5 parsing algorithm in Gammo conforms [the WHATWG specification](https://html.spec.whatwg.org/multipage/parsing.html). Given an HTML string, Gammo parses it and builds DOM tree based on the tokenization and tree-construction algorithm defined in WHATWG parsing algorithm, these implementations are provided without any external dependencies.
11
12
 
12
13
  Gammo, its naming is inspired by [Gumbo](https://github.com/google/gumbo-parser). But Gammo is a fried tofu fritter made with vegetables.
13
14
 
@@ -15,10 +16,45 @@ Gammo, its naming is inspired by [Gumbo](https://github.com/google/gumbo-parser)
15
16
  require 'gammo'
16
17
  require 'open-uri'
17
18
 
18
- parser = open('https://google.com') { |f| Gammo.new(f.read) }
19
- parser.parse #=> #<Gammo::Node::Document>
19
+ parser = URI.open('https://google.com') { |f| Gammo.new(f.read) }
20
+ document = parser.parse #=> #<Gammo::Node::Document>
21
+
22
+ puts document.css('title').first.inner_text #=> 'Google'
20
23
  ```
21
24
 
25
+ * [Overview](#overview)
26
+ * [Features](#features)
27
+ * [Tokenizaton](#tokenizaton)
28
+ * [Token types](#token-types)
29
+ * [Parsing](#parsing)
30
+ * [Notes](#notes)
31
+ * [Node](#node)
32
+ * [DOM Tree Traversal](#dom-tree-traversal)
33
+ * [XPath 1.0 (experimental)](#xpath-10-experimental)
34
+ * [Example](#example)
35
+ * [Axis Specifiers](#axis-specifiers)
36
+ * [Node Test](#node-test)
37
+ * [Operators](#operators)
38
+ * [Functions](#functions)
39
+ * [Node set functions](#node-set-functions)
40
+ * [String Functions](#string-functions)
41
+ * [Boolean Functions](#boolean-functions)
42
+ * [Number Functions](#number-functions)
43
+ * [CSS Selector (experimental)](#css-selector-experimental)
44
+ * [Example](#example)
45
+ * [Groups of selectors](#groups-of-selectors)
46
+ * [Simple selectors](#simple-selectors)
47
+ * [Type selector &amp; Universal selector](#type-selector--universal-selector)
48
+ * [Attribute selectors](#attribute-selectors)
49
+ * [Class selectors](#class-selectors)
50
+ * [ID selectors](#id-selectors)
51
+ * [Pseudo-classes](#pseudo-classes)
52
+ * [Combinators](#combinators)
53
+ * [Performance](#performance)
54
+ * [References](#references)
55
+ * [License](#license)
56
+ * [Release History](#release-history)
57
+
22
58
  ## Overview
23
59
 
24
60
  ### Features
@@ -26,7 +62,7 @@ parser.parse #=> #<Gammo::Node::Document>
26
62
  - [Tokenization](#tokenization): Gammo has a tokenizer for implementing [the tokenization algorithm](https://html.spec.whatwg.org/multipage/parsing.html#tokenization).
27
63
  - [Parsing](#parsing): Gammo provides a parser which implements the parsing algorithm by the above tokenization and [the tree-construction algorithm](https://html.spec.whatwg.org/multipage/parsing.html#tree-construction).
28
64
  - [Node](#node): Gammo provides the nodes which implement [WHATWG DOM specification](https://dom.spec.whatwg.org/) partially.
29
- - [DOM Tree Traversal](#dom-tree-traversal): Gammo provides a way of DOM tree traversal.
65
+ - [DOM Tree Traversal](#dom-tree-traversal): Gammo provides a way of DOM tree traversal (CSS selector / XPath).
30
66
  - [Performance](#performance): Gammo does not prioritize performance, and there are a few potential performance notes.
31
67
 
32
68
  ## Tokenizaton
@@ -165,8 +201,7 @@ For some nodes such as `Gammo::Node::Element` and `Gammo::Node::Document`, they
165
201
 
166
202
  ## DOM Tree Traversal
167
203
 
168
- Currently, XPath 1.0 is the only way for traversing DOM tree built by Gammo.
169
- CSS selector support is also planned but not having any ETA.
204
+ CSS selector and XPath-1.0 are the way for traversing DOM tree built by Gammo.
170
205
 
171
206
  ### XPath 1.0 (experimental)
172
207
 
@@ -346,12 +381,12 @@ Node tests consist of specific node names or more general expressions. Although
346
381
  <tr>
347
382
  <td><code>text()</code></td>
348
383
  <td>yes</td>
349
- <td>Finds a node of type text, e.g. <code>hello</code> in <code><p>hello <a href="https://hello">world</a></p></code></td>
384
+ <td>Finds a node of type text, e.g. <code>hello</code> in <code>&lt;p&gt;hello &lt;a href="https://hello"&gt;world&lt;/a&gt;&lt;/p&gt;</td>
350
385
  </tr>
351
386
  <tr>
352
387
  <td><code>comment()</code></td>
353
388
  <td>yes</td>
354
- <td>Finds a node of type comment, e.g. <code><!-- comment --></code></td>
389
+ <td>Finds a node of type comment, e.g. <code>&lt;!-- comment --&gt;</code></td>
355
390
  </tr>
356
391
  <tr>
357
392
  <td><code>node()</code></td>
@@ -546,9 +581,296 @@ XPath 1.0 defines four data types (nodeset, string, number, boolean) and there a
546
581
  </tbody>
547
582
  </table>
548
583
 
549
- ### CSS Selector
584
+ ### CSS Selector (experimental)
585
+
586
+ Gammo has an original lexer/parser for CSS Selector, it's provided as a helper in the DOM tree built by Gammo.
587
+ Here is a simple example:
588
+
589
+ ```ruby
590
+ document = Gammo.new('<!doctype html><input type="button">').parse
591
+ node_set = document.css('input[type="button"]') #=> "<Gammo::CSSSelector::NodeSet>"
592
+
593
+ node_set.length #=> 1
594
+ node_set.first #=> "<Gammo::Node::Element>"
595
+ ```
596
+
597
+ Since this is implemented by full scratch, Gammo is providing this support as a very experimental feature. Please file an issue if you find bugs.
598
+
599
+ #### Example
600
+
601
+ Before proceeding at the details of CSS Selector support, let's have a look at a few simple examples. Given a sample HTML text and its DOM tree:
602
+
603
+ ```ruby
604
+ document = Gammo.new(<<-EOS).parse
605
+ <!DOCTYPE html>
606
+ <html>
607
+ <head>
608
+ </head>
609
+ <body>
610
+ <h1>namusyaka.com</h1>
611
+ <p class="description">Here is a sample web site.</p>
612
+ <ul>
613
+ <li>hello</li>
614
+ <li>world</li>
615
+ </ul>
616
+ <ul id="links">
617
+ <li>Google <a href="https://google.com/">google.com</a></li>
618
+ <li>GitHub <a href="https://github.com/namusyaka">github.com/namusyaka</a></li>
619
+ </ul>
620
+ </body>
621
+ </html>
622
+ EOS
623
+ ```
624
+
625
+ The following CSS selector gets all `li` elements and prints thoese text contents:
626
+
627
+ ```ruby
628
+ document.css('li').each do |elm|
629
+ puts elm.inner_text
630
+ end
631
+ ```
632
+
633
+ The following CSS selector gets all `li` elements under the `ul` element having the `id=links` attribute:
634
+
635
+ ```ruby
636
+ document.xpath('ul#links li').each do |elm|
637
+ puts elm.inner_text
638
+ end
639
+ ```
640
+
641
+ #### Groups of selectors
642
+
643
+ Gammo supports [groups of selectors](https://www.w3.org/TR/2018/REC-selectors-3-20181106/#grouping), this means you can use `,` to traverse DOM tree by multiple selectors.
550
644
 
551
- TBD.
645
+ ```ruby
646
+ require 'gammo'
647
+
648
+ @doc = Gammo.new(<<-EOS).parse
649
+ <!DOCTYPE html>
650
+ <html>
651
+ <head>
652
+ <title>hello</title>
653
+ <meta charset="utf8">
654
+ </head>
655
+ <body>
656
+ <p id="hello">hello</p>
657
+ <p id="world">world</p>
658
+ EOS
659
+
660
+ @doc.css('#hello, #world').map(&:inner_text).join(' ') #=> 'hello world'
661
+ ```
662
+
663
+ #### Simple selectors
664
+
665
+ ##### Type selector & Universal selector
666
+
667
+ Gammo supports the basic grammar of type selector and universal selector, but not namespaces.
668
+
669
+ ##### Attribute selectors
670
+
671
+ See more details: [6.3. Attribute selectors](https://www.w3.org/TR/2018/REC-selectors-3-20181106/#attribute-selectors)
672
+
673
+ <table>
674
+ <thead>
675
+ <tr>
676
+ <th>Syntax</th>
677
+ <th>Supported</th>
678
+ </tr>
679
+ </thead>
680
+ <tbody>
681
+ <tr>
682
+ <td><code>[att]</code></td>
683
+ <td>yes</td>
684
+ </tr>
685
+ <tr>
686
+ <td><code>[att=val]</code></td>
687
+ <td>yes</td>
688
+ </tr>
689
+ <tr>
690
+ <td><code>[att~=val]</code></td>
691
+ <td>yes</td>
692
+ </tr>
693
+ <tr>
694
+ <td><code>[att|=val]</code></td>
695
+ <td>yes</td>
696
+ </tr>
697
+ </tbody>
698
+ </table>
699
+
700
+ ##### Class selectors
701
+
702
+ Supported. See more details: [6.4. Class selectors](https://www.w3.org/TR/2018/REC-selectors-3-20181106/#class-html)
703
+
704
+ ##### ID selectors
705
+
706
+ Supported. See more details: [6.5. ID selectors](https://www.w3.org/TR/2018/REC-selectors-3-20181106/#id-selectors)
707
+
708
+ ##### Pseudo-classes
709
+
710
+ Partially supported. See the table below.
711
+
712
+ <table>
713
+ <thead>
714
+ <tr>
715
+ <th>Class name</th>
716
+ <th>Supported</th>
717
+ <th>Can support?</th>
718
+ </tr>
719
+ </thead>
720
+ <tbody>
721
+ <tr>
722
+ <td><code>:link</code></td>
723
+ <td>no</td>
724
+ <td>no</td>
725
+ </tr>
726
+ <tr>
727
+ <td><code>:visited</code></td>
728
+ <td>no</td>
729
+ <td>no</td>
730
+ </tr>
731
+ <tr>
732
+ <td><code>:hover</code></td>
733
+ <td>no</td>
734
+ <td>no</td>
735
+ </tr>
736
+ <tr>
737
+ <td><code>:active</code></td>
738
+ <td>no</td>
739
+ <td>no</td>
740
+ </tr>
741
+ <tr>
742
+ <td><code>:focus</code></td>
743
+ <td>no</td>
744
+ <td>no</td>
745
+ </tr>
746
+ <tr>
747
+ <td><code>:target</code></td>
748
+ <td>no</td>
749
+ <td>no</td>
750
+ </tr>
751
+ <tr>
752
+ <td><code>:lang</code></td>
753
+ <td>no</td>
754
+ <td>yes</td>
755
+ </tr>
756
+ <tr>
757
+ <td><code>:enabled</code></td>
758
+ <td>yes</td>
759
+ <td>yes</td>
760
+ </tr>
761
+ <tr>
762
+ <td><code>:disabled</code></td>
763
+ <td>yes</td>
764
+ <td>yes</td>
765
+ </tr>
766
+ <tr>
767
+ <td><code>:checked</code></td>
768
+ <td>yes</td>
769
+ <td>yes</td>
770
+ </tr>
771
+ <tr>
772
+ <td><code>:root</code></td>
773
+ <td>yes</td>
774
+ <td>yes</td>
775
+ </tr>
776
+ <tr>
777
+ <td><code>:nth-child</code></td>
778
+ <td>yes</td>
779
+ <td>yes</td>
780
+ </tr>
781
+ <tr>
782
+ <td><code>:nth-last-child</code></td>
783
+ <td>no</td>
784
+ <td>yes</td>
785
+ </tr>
786
+ <tr>
787
+ <td><code>:nth-of-type</code></td>
788
+ <td>no</td>
789
+ <td>yes</td>
790
+ </tr>
791
+ <tr>
792
+ <td><code>:nth-last-of-type</code></td>
793
+ <td>no</td>
794
+ <td>yes</td>
795
+ </tr>
796
+ <tr>
797
+ <td><code>:first-child</code></td>
798
+ <td>no</td>
799
+ <td>yes</td>
800
+ </tr>
801
+ <tr>
802
+ <td><code>:last-child</code></td>
803
+ <td>no</td>
804
+ <td>yes</td>
805
+ </tr>
806
+ <tr>
807
+ <td><code>:first-of-type</code></td>
808
+ <td>no</td>
809
+ <td>yes</td>
810
+ </tr>
811
+ <tr>
812
+ <td><code>:last-of-type</code></td>
813
+ <td>no</td>
814
+ <td>yes</td>
815
+ </tr>
816
+ <tr>
817
+ <td><code>:only-child</code></td>
818
+ <td>no</td>
819
+ <td>yes</td>
820
+ </tr>
821
+ <tr>
822
+ <td><code>:only-of-type</code></td>
823
+ <td>no</td>
824
+ <td>yes</td>
825
+ </tr>
826
+ <tr>
827
+ <td><code>:empty</code></td>
828
+ <td>no</td>
829
+ <td>yes</td>
830
+ </tr>
831
+ <tr>
832
+ <td><code>:not</code></td>
833
+ <td>yes</td>
834
+ <td>yes</td>
835
+ </tr>
836
+ </tbody>
837
+ </table>
838
+
839
+ #### Combinators
840
+
841
+ See more details: [8. Combinators](https://www.w3.org/TR/2018/REC-selectors-3-20181106/#combinators)
842
+
843
+ <table>
844
+ <thead>
845
+ <tr>
846
+ <th>Syntax</th>
847
+ <th>Supported</th>
848
+ <th>Desc</th>
849
+ </tr>
850
+ </thead>
851
+ <tbody>
852
+ <tr>
853
+ <td><code>h1 em</code></td>
854
+ <td>yes</td>
855
+ <td>Descendant combinator</td>
856
+ </tr>
857
+ <tr>
858
+ <td><code>h1 > em</code></td>
859
+ <td>yes</td>
860
+ <td>Child combinator</td>
861
+ </tr>
862
+ <tr>
863
+ <td><code>math + p</code></td>
864
+ <td>yes</td>
865
+ <td>Next-sibling combinator</td>
866
+ </tr>
867
+ <tr>
868
+ <td><code>h1 ~ pre</code></td>
869
+ <td>yes</td>
870
+ <td>Subsequent-sibling combinator</td>
871
+ </tr>
872
+ </tbody>
873
+ </table>
552
874
 
553
875
  ## Performance
554
876
 
@@ -571,6 +893,8 @@ The gem is available as open source under the terms of the [MIT License](https:/
571
893
 
572
894
  ## Release History
573
895
 
896
+ - v0.3.0
897
+ - CSS selector support [#11](https://github.com/namusyaka/gammo/pull/11)
574
898
  - v0.2.0
575
899
  - XPath 1.0 support [#4](https://github.com/namusyaka/gammo/pull/4)
576
900
  - v0.1.0
data/Rakefile CHANGED
@@ -25,7 +25,11 @@ task :generate do
25
25
  end
26
26
 
27
27
  namespace :racc do
28
- task :parser do
28
+ task :xpath do
29
29
  `bundle exec racc lib/gammo/xpath/parser.y -o lib/gammo/xpath/parser.rb`
30
30
  end
31
+
32
+ task :css do
33
+ `bundle exec racc lib/gammo/css_selector/parser.y -o lib/gammo/css_selector/parser.rb`
34
+ end
31
35
  end
@@ -86,6 +86,11 @@ module Gammo
86
86
  @attributes_hash.to_s
87
87
  end
88
88
 
89
+ def has_key?(key)
90
+ @attributes_hash.key?(key.to_s)
91
+ end
92
+ alias_method :key?, :has_key?
93
+
89
94
  private
90
95
 
91
96
  def attributes_to_hash(attrs)
@@ -0,0 +1,92 @@
1
+ require 'set'
2
+ require 'gammo/css_selector/node_set'
3
+ require 'gammo/modules/subclassify'
4
+
5
+ module Gammo
6
+ module CSSSelector
7
+ module AST
8
+ # Class for representing combinator defined in the CSS selector specification.
9
+ # @!visibility private
10
+ class Combinator
11
+ extend Subclassify
12
+
13
+ def initialize(selector)
14
+ @selector = selector
15
+ end
16
+
17
+ def evaluate(context)
18
+ strain context, NodeSet.new
19
+ end
20
+
21
+ # Class for representing the descendant combinator.
22
+ # @!visibility private
23
+ class Descendant < Combinator
24
+ declare :descendant
25
+
26
+ def strain(context, node_set)
27
+ @selector.search_descendant(context.dup, node_set)
28
+ node_set
29
+ end
30
+ end
31
+
32
+ # Class for representing the child combinator.
33
+ # @!visibility private
34
+ class Child < Combinator
35
+ declare :child
36
+
37
+ def strain(context, node_set)
38
+ context.node.children.inject(0) do |i, child|
39
+ next i unless child.kind_of?(Node::Element)
40
+ i += 1
41
+ node_set << child if @selector.match?(Context.new(node: child, position: i))
42
+ i
43
+ end
44
+ node_set
45
+ end
46
+ end
47
+
48
+ # Class for representing the next-sibling combinator.
49
+ # @!visibility private
50
+ class NextSibling < Combinator
51
+ declare :next_sibling
52
+
53
+ def strain(context, node_set)
54
+ node = context.node
55
+ context_position = context.position
56
+ context_node = context.node
57
+ while node = node.next_sibling
58
+ context.position += 1
59
+ context.node = node
60
+ next unless node.is_a?(Node::Element)
61
+ node_set << node if @selector.match?(context)
62
+ break
63
+ end
64
+ context.position = context_position
65
+ context.node = context_node
66
+ node_set
67
+ end
68
+ end
69
+
70
+ # Class for representing the subsequent-sibling combinator.
71
+ # @!visibility private
72
+ class SubsequentSibling < Combinator
73
+ declare :subsequent_sibling
74
+
75
+ def strain(context, node_set)
76
+ node = context.node
77
+ context_node = context.node
78
+ position = context.position
79
+ while node = node.next_sibling
80
+ context.position += 1
81
+ context.node = node
82
+ node_set << node if @selector.match?(context)
83
+ end
84
+ context.position = position
85
+ context.node = context_node
86
+ node_set
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,86 @@
1
+ module Gammo
2
+ module CSSSelector
3
+ module AST
4
+ module Selector
5
+ class Attrib
6
+ attr_accessor :value
7
+
8
+ extend Subclassify
9
+
10
+ def initialize(key:, value:, namespace_prefix: nil)
11
+ @key = key
12
+ @value = value
13
+ @namespace_prefix = namespace_prefix
14
+ end
15
+
16
+ def match?(context)
17
+ raise NotImplemented, "#match? must be implemented by sub class"
18
+ end
19
+
20
+ private
21
+
22
+ def attrib_value(node)
23
+ node.attributes[@key.to_sym]
24
+ end
25
+
26
+ class Equal < Attrib
27
+ declare :equal
28
+
29
+ def match?(context)
30
+ attrib_value(context.node) == @value
31
+ end
32
+ end
33
+
34
+ class PrefixMatch < Attrib
35
+ declare :prefix_match
36
+
37
+ def match?(context)
38
+ return false if !@value || @value.empty?
39
+ return false unless val = attrib_value(context.node)
40
+ val.start_with?(@value)
41
+ end
42
+ end
43
+
44
+ class SuffixMatch < Attrib
45
+ declare :suffix_match
46
+
47
+ def match?(context)
48
+ return false if !@value || @value.empty?
49
+ return false unless val = attrib_value(context.node)
50
+ val.end_with?(@value)
51
+ end
52
+ end
53
+
54
+ class SubstringMatch < Attrib
55
+ declare :substring_match
56
+
57
+ def match?(context)
58
+ return false if !@value || @value.empty?
59
+ return false unless val = attrib_value(context.node)
60
+ val.include?(@value)
61
+ end
62
+ end
63
+
64
+ class DashMatch < Attrib
65
+ declare :dash_match
66
+
67
+ def match?(context)
68
+ val = attrib_value(context.node) || ''
69
+ val == @value || (val.start_with?(@value) && val[@value.length] == ?-)
70
+ end
71
+ end
72
+
73
+ class Includes < Attrib
74
+ declare :includes
75
+
76
+ def match?(context)
77
+ return false if !@value || @value.empty?
78
+ val = attrib_value(context.node) || ''
79
+ val == @value || (val.split(/\s/).include?(@value))
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end