gammo 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +32 -0
- data/Gemfile.lock +6 -6
- data/README.md +334 -10
- data/Rakefile +5 -1
- data/lib/gammo/attributes.rb +5 -0
- data/lib/gammo/css_selector/ast/combinator.rb +92 -0
- data/lib/gammo/css_selector/ast/selector/attrib_selector.rb +86 -0
- data/lib/gammo/css_selector/ast/selector/class_selector.rb +19 -0
- data/lib/gammo/css_selector/ast/selector/id_selector.rb +18 -0
- data/lib/gammo/css_selector/ast/selector/negation.rb +21 -0
- data/lib/gammo/css_selector/ast/selector/pseudo_class.rb +92 -0
- data/lib/gammo/css_selector/ast/selector.rb +100 -0
- data/lib/gammo/css_selector/context.rb +17 -0
- data/lib/gammo/css_selector/errors.rb +6 -0
- data/lib/gammo/css_selector/node_set.rb +44 -0
- data/lib/gammo/css_selector/parser.rb +790 -0
- data/lib/gammo/css_selector/parser.y +321 -0
- data/lib/gammo/css_selector.rb +33 -0
- data/lib/gammo/modules/subclassify.rb +31 -0
- data/lib/gammo/node.rb +2 -0
- data/lib/gammo/parser/foreign.rb +3 -3
- data/lib/gammo/parser/insertion_mode/after_after_body.rb +1 -1
- data/lib/gammo/parser/insertion_mode/after_after_frameset.rb +1 -1
- data/lib/gammo/parser/insertion_mode/after_body.rb +1 -1
- data/lib/gammo/parser/insertion_mode/after_frameset.rb +1 -1
- data/lib/gammo/parser/insertion_mode/after_head.rb +1 -1
- data/lib/gammo/parser/insertion_mode/before_head.rb +1 -1
- data/lib/gammo/parser/insertion_mode/before_html.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_body.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_column_group.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_frameset.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_head.rb +3 -2
- data/lib/gammo/parser/insertion_mode/in_head_noscript.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_select.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_table.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_template.rb +1 -1
- data/lib/gammo/parser/insertion_mode/initial.rb +1 -1
- data/lib/gammo/parser/insertion_mode/text.rb +1 -1
- data/lib/gammo/parser/insertion_mode.rb +1 -1
- data/lib/gammo/tokenizer/tokens.rb +10 -1
- data/lib/gammo/tokenizer.rb +10 -10
- data/lib/gammo/version.rb +1 -1
- data/lib/gammo/xpath/ast/axis.rb +1 -1
- data/lib/gammo/xpath/ast/expression.rb +2 -0
- data/lib/gammo/xpath/ast/function.rb +1 -1
- data/lib/gammo/xpath/ast/node_test.rb +1 -1
- data/lib/gammo/xpath/ast/path.rb +1 -0
- data/lib/gammo/xpath.rb +4 -5
- metadata +17 -4
- data/.travis.yml +0 -6
- data/lib/gammo/xpath/ast/subclassify.rb +0 -35
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f8fc3eae3f0b1cbe125012fef023b8796430f699f1bcac5a8336770070346315
|
4
|
+
data.tar.gz: ec33fbc6c045d1b458544ecbec141939a301fdce1ce9dd46542ba61dae5e5d6a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3a6f7dda7321a8cebba91755efe435a5207e95be939b87c8abccdf5b177730ac402975bf52572026d0776b9cc3ab8852e873099b9013e41c1222f9ba496a9085
|
7
|
+
data.tar.gz: 8cbe7ea25c29514dca39ded2407bee031bec9ecf2a02a06c82066ca34248b1ecb885e4d392c50801d1a874b13f282c8a5fc715220acbbde582e421c1bcce2814
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: Testing
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches:
|
13
|
+
- master
|
14
|
+
pull_request:
|
15
|
+
|
16
|
+
jobs:
|
17
|
+
test:
|
18
|
+
name: Test with Ruby-${{ matrix.ruby }}
|
19
|
+
runs-on: ubuntu-latest
|
20
|
+
strategy:
|
21
|
+
matrix:
|
22
|
+
ruby: [2.4, 2.5, 2.6, 2.7, 3.0]
|
23
|
+
steps:
|
24
|
+
- uses: actions/checkout@v2
|
25
|
+
- uses: ruby/setup-ruby@v1
|
26
|
+
with:
|
27
|
+
ruby-version: ${{ matrix.ruby }}
|
28
|
+
bundler-cache: true
|
29
|
+
- name: Install dependencies
|
30
|
+
run: bundle install
|
31
|
+
- name: Run tests
|
32
|
+
run: bundle exec rake
|
data/Gemfile.lock
CHANGED
@@ -1,23 +1,23 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
gammo (0.
|
4
|
+
gammo (0.2.0)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
9
|
docile (1.3.2)
|
10
10
|
erubi (1.9.0)
|
11
|
-
power_assert (1.
|
11
|
+
power_assert (1.2.0)
|
12
12
|
racc (1.5.0)
|
13
13
|
rake (12.3.3)
|
14
14
|
simplecov (0.18.5)
|
15
15
|
docile (~> 1.1)
|
16
16
|
simplecov-html (~> 0.11)
|
17
|
-
simplecov-html (0.12.
|
18
|
-
test-unit (3.3.
|
17
|
+
simplecov-html (0.12.3)
|
18
|
+
test-unit (3.3.6)
|
19
19
|
power_assert
|
20
|
-
yard (0.9.
|
20
|
+
yard (0.9.25)
|
21
21
|
|
22
22
|
PLATFORMS
|
23
23
|
ruby
|
@@ -32,4 +32,4 @@ DEPENDENCIES
|
|
32
32
|
yard
|
33
33
|
|
34
34
|
BUNDLED WITH
|
35
|
-
2.
|
35
|
+
2.2.3
|
data/README.md
CHANGED
@@ -7,7 +7,8 @@
|
|
7
7
|
[![GitHub license](https://img.shields.io/github/license/namusyaka/gammo?color=brightgreen)](https://github.com/namusyaka/gammo/blob/master/LICENSE.txt)
|
8
8
|
[![Documentation](http://img.shields.io/:yard-docs-38c800.svg)](http://www.rubydoc.info/gems/gammo/frames)
|
9
9
|
|
10
|
-
Gammo
|
10
|
+
Gammo provides a pure Ruby HTML5-compliant parser and CSS selector / XPath support for traversing the DOM tree built by Gammo.
|
11
|
+
The implementation of the HTML5 parsing algorithm in Gammo conforms [the WHATWG specification](https://html.spec.whatwg.org/multipage/parsing.html). Given an HTML string, Gammo parses it and builds DOM tree based on the tokenization and tree-construction algorithm defined in WHATWG parsing algorithm, these implementations are provided without any external dependencies.
|
11
12
|
|
12
13
|
Gammo, its naming is inspired by [Gumbo](https://github.com/google/gumbo-parser). But Gammo is a fried tofu fritter made with vegetables.
|
13
14
|
|
@@ -15,10 +16,45 @@ Gammo, its naming is inspired by [Gumbo](https://github.com/google/gumbo-parser)
|
|
15
16
|
require 'gammo'
|
16
17
|
require 'open-uri'
|
17
18
|
|
18
|
-
parser = open('https://google.com') { |f| Gammo.new(f.read) }
|
19
|
-
parser.parse #=> #<Gammo::Node::Document>
|
19
|
+
parser = URI.open('https://google.com') { |f| Gammo.new(f.read) }
|
20
|
+
document = parser.parse #=> #<Gammo::Node::Document>
|
21
|
+
|
22
|
+
puts document.css('title').first.inner_text #=> 'Google'
|
20
23
|
```
|
21
24
|
|
25
|
+
* [Overview](#overview)
|
26
|
+
* [Features](#features)
|
27
|
+
* [Tokenizaton](#tokenizaton)
|
28
|
+
* [Token types](#token-types)
|
29
|
+
* [Parsing](#parsing)
|
30
|
+
* [Notes](#notes)
|
31
|
+
* [Node](#node)
|
32
|
+
* [DOM Tree Traversal](#dom-tree-traversal)
|
33
|
+
* [XPath 1.0 (experimental)](#xpath-10-experimental)
|
34
|
+
* [Example](#example)
|
35
|
+
* [Axis Specifiers](#axis-specifiers)
|
36
|
+
* [Node Test](#node-test)
|
37
|
+
* [Operators](#operators)
|
38
|
+
* [Functions](#functions)
|
39
|
+
* [Node set functions](#node-set-functions)
|
40
|
+
* [String Functions](#string-functions)
|
41
|
+
* [Boolean Functions](#boolean-functions)
|
42
|
+
* [Number Functions](#number-functions)
|
43
|
+
* [CSS Selector (experimental)](#css-selector-experimental)
|
44
|
+
* [Example](#example)
|
45
|
+
* [Groups of selectors](#groups-of-selectors)
|
46
|
+
* [Simple selectors](#simple-selectors)
|
47
|
+
* [Type selector & Universal selector](#type-selector--universal-selector)
|
48
|
+
* [Attribute selectors](#attribute-selectors)
|
49
|
+
* [Class selectors](#class-selectors)
|
50
|
+
* [ID selectors](#id-selectors)
|
51
|
+
* [Pseudo-classes](#pseudo-classes)
|
52
|
+
* [Combinators](#combinators)
|
53
|
+
* [Performance](#performance)
|
54
|
+
* [References](#references)
|
55
|
+
* [License](#license)
|
56
|
+
* [Release History](#release-history)
|
57
|
+
|
22
58
|
## Overview
|
23
59
|
|
24
60
|
### Features
|
@@ -26,7 +62,7 @@ parser.parse #=> #<Gammo::Node::Document>
|
|
26
62
|
- [Tokenization](#tokenization): Gammo has a tokenizer for implementing [the tokenization algorithm](https://html.spec.whatwg.org/multipage/parsing.html#tokenization).
|
27
63
|
- [Parsing](#parsing): Gammo provides a parser which implements the parsing algorithm by the above tokenization and [the tree-construction algorithm](https://html.spec.whatwg.org/multipage/parsing.html#tree-construction).
|
28
64
|
- [Node](#node): Gammo provides the nodes which implement [WHATWG DOM specification](https://dom.spec.whatwg.org/) partially.
|
29
|
-
- [DOM Tree Traversal](#dom-tree-traversal): Gammo provides a way of DOM tree traversal.
|
65
|
+
- [DOM Tree Traversal](#dom-tree-traversal): Gammo provides a way of DOM tree traversal (CSS selector / XPath).
|
30
66
|
- [Performance](#performance): Gammo does not prioritize performance, and there are a few potential performance notes.
|
31
67
|
|
32
68
|
## Tokenizaton
|
@@ -165,8 +201,7 @@ For some nodes such as `Gammo::Node::Element` and `Gammo::Node::Document`, they
|
|
165
201
|
|
166
202
|
## DOM Tree Traversal
|
167
203
|
|
168
|
-
|
169
|
-
CSS selector support is also planned but not having any ETA.
|
204
|
+
CSS selector and XPath-1.0 are the way for traversing DOM tree built by Gammo.
|
170
205
|
|
171
206
|
### XPath 1.0 (experimental)
|
172
207
|
|
@@ -346,12 +381,12 @@ Node tests consist of specific node names or more general expressions. Although
|
|
346
381
|
<tr>
|
347
382
|
<td><code>text()</code></td>
|
348
383
|
<td>yes</td>
|
349
|
-
<td>Finds a node of type text, e.g. <code>hello</code> in <code
|
384
|
+
<td>Finds a node of type text, e.g. <code>hello</code> in <code><p>hello <a href="https://hello">world</a></p></td>
|
350
385
|
</tr>
|
351
386
|
<tr>
|
352
387
|
<td><code>comment()</code></td>
|
353
388
|
<td>yes</td>
|
354
|
-
<td>Finds a node of type comment, e.g. <code
|
389
|
+
<td>Finds a node of type comment, e.g. <code><!-- comment --></code></td>
|
355
390
|
</tr>
|
356
391
|
<tr>
|
357
392
|
<td><code>node()</code></td>
|
@@ -546,9 +581,296 @@ XPath 1.0 defines four data types (nodeset, string, number, boolean) and there a
|
|
546
581
|
</tbody>
|
547
582
|
</table>
|
548
583
|
|
549
|
-
### CSS Selector
|
584
|
+
### CSS Selector (experimental)
|
585
|
+
|
586
|
+
Gammo has an original lexer/parser for CSS Selector, it's provided as a helper in the DOM tree built by Gammo.
|
587
|
+
Here is a simple example:
|
588
|
+
|
589
|
+
```ruby
|
590
|
+
document = Gammo.new('<!doctype html><input type="button">').parse
|
591
|
+
node_set = document.css('input[type="button"]') #=> "<Gammo::CSSSelector::NodeSet>"
|
592
|
+
|
593
|
+
node_set.length #=> 1
|
594
|
+
node_set.first #=> "<Gammo::Node::Element>"
|
595
|
+
```
|
596
|
+
|
597
|
+
Since this is implemented by full scratch, Gammo is providing this support as a very experimental feature. Please file an issue if you find bugs.
|
598
|
+
|
599
|
+
#### Example
|
600
|
+
|
601
|
+
Before proceeding at the details of CSS Selector support, let's have a look at a few simple examples. Given a sample HTML text and its DOM tree:
|
602
|
+
|
603
|
+
```ruby
|
604
|
+
document = Gammo.new(<<-EOS).parse
|
605
|
+
<!DOCTYPE html>
|
606
|
+
<html>
|
607
|
+
<head>
|
608
|
+
</head>
|
609
|
+
<body>
|
610
|
+
<h1>namusyaka.com</h1>
|
611
|
+
<p class="description">Here is a sample web site.</p>
|
612
|
+
<ul>
|
613
|
+
<li>hello</li>
|
614
|
+
<li>world</li>
|
615
|
+
</ul>
|
616
|
+
<ul id="links">
|
617
|
+
<li>Google <a href="https://google.com/">google.com</a></li>
|
618
|
+
<li>GitHub <a href="https://github.com/namusyaka">github.com/namusyaka</a></li>
|
619
|
+
</ul>
|
620
|
+
</body>
|
621
|
+
</html>
|
622
|
+
EOS
|
623
|
+
```
|
624
|
+
|
625
|
+
The following CSS selector gets all `li` elements and prints thoese text contents:
|
626
|
+
|
627
|
+
```ruby
|
628
|
+
document.css('li').each do |elm|
|
629
|
+
puts elm.inner_text
|
630
|
+
end
|
631
|
+
```
|
632
|
+
|
633
|
+
The following CSS selector gets all `li` elements under the `ul` element having the `id=links` attribute:
|
634
|
+
|
635
|
+
```ruby
|
636
|
+
document.xpath('ul#links li').each do |elm|
|
637
|
+
puts elm.inner_text
|
638
|
+
end
|
639
|
+
```
|
640
|
+
|
641
|
+
#### Groups of selectors
|
642
|
+
|
643
|
+
Gammo supports [groups of selectors](https://www.w3.org/TR/2018/REC-selectors-3-20181106/#grouping), this means you can use `,` to traverse DOM tree by multiple selectors.
|
550
644
|
|
551
|
-
|
645
|
+
```ruby
|
646
|
+
require 'gammo'
|
647
|
+
|
648
|
+
@doc = Gammo.new(<<-EOS).parse
|
649
|
+
<!DOCTYPE html>
|
650
|
+
<html>
|
651
|
+
<head>
|
652
|
+
<title>hello</title>
|
653
|
+
<meta charset="utf8">
|
654
|
+
</head>
|
655
|
+
<body>
|
656
|
+
<p id="hello">hello</p>
|
657
|
+
<p id="world">world</p>
|
658
|
+
EOS
|
659
|
+
|
660
|
+
@doc.css('#hello, #world').map(&:inner_text).join(' ') #=> 'hello world'
|
661
|
+
```
|
662
|
+
|
663
|
+
#### Simple selectors
|
664
|
+
|
665
|
+
##### Type selector & Universal selector
|
666
|
+
|
667
|
+
Gammo supports the basic grammar of type selector and universal selector, but not namespaces.
|
668
|
+
|
669
|
+
##### Attribute selectors
|
670
|
+
|
671
|
+
See more details: [6.3. Attribute selectors](https://www.w3.org/TR/2018/REC-selectors-3-20181106/#attribute-selectors)
|
672
|
+
|
673
|
+
<table>
|
674
|
+
<thead>
|
675
|
+
<tr>
|
676
|
+
<th>Syntax</th>
|
677
|
+
<th>Supported</th>
|
678
|
+
</tr>
|
679
|
+
</thead>
|
680
|
+
<tbody>
|
681
|
+
<tr>
|
682
|
+
<td><code>[att]</code></td>
|
683
|
+
<td>yes</td>
|
684
|
+
</tr>
|
685
|
+
<tr>
|
686
|
+
<td><code>[att=val]</code></td>
|
687
|
+
<td>yes</td>
|
688
|
+
</tr>
|
689
|
+
<tr>
|
690
|
+
<td><code>[att~=val]</code></td>
|
691
|
+
<td>yes</td>
|
692
|
+
</tr>
|
693
|
+
<tr>
|
694
|
+
<td><code>[att|=val]</code></td>
|
695
|
+
<td>yes</td>
|
696
|
+
</tr>
|
697
|
+
</tbody>
|
698
|
+
</table>
|
699
|
+
|
700
|
+
##### Class selectors
|
701
|
+
|
702
|
+
Supported. See more details: [6.4. Class selectors](https://www.w3.org/TR/2018/REC-selectors-3-20181106/#class-html)
|
703
|
+
|
704
|
+
##### ID selectors
|
705
|
+
|
706
|
+
Supported. See more details: [6.5. ID selectors](https://www.w3.org/TR/2018/REC-selectors-3-20181106/#id-selectors)
|
707
|
+
|
708
|
+
##### Pseudo-classes
|
709
|
+
|
710
|
+
Partially supported. See the table below.
|
711
|
+
|
712
|
+
<table>
|
713
|
+
<thead>
|
714
|
+
<tr>
|
715
|
+
<th>Class name</th>
|
716
|
+
<th>Supported</th>
|
717
|
+
<th>Can support?</th>
|
718
|
+
</tr>
|
719
|
+
</thead>
|
720
|
+
<tbody>
|
721
|
+
<tr>
|
722
|
+
<td><code>:link</code></td>
|
723
|
+
<td>no</td>
|
724
|
+
<td>no</td>
|
725
|
+
</tr>
|
726
|
+
<tr>
|
727
|
+
<td><code>:visited</code></td>
|
728
|
+
<td>no</td>
|
729
|
+
<td>no</td>
|
730
|
+
</tr>
|
731
|
+
<tr>
|
732
|
+
<td><code>:hover</code></td>
|
733
|
+
<td>no</td>
|
734
|
+
<td>no</td>
|
735
|
+
</tr>
|
736
|
+
<tr>
|
737
|
+
<td><code>:active</code></td>
|
738
|
+
<td>no</td>
|
739
|
+
<td>no</td>
|
740
|
+
</tr>
|
741
|
+
<tr>
|
742
|
+
<td><code>:focus</code></td>
|
743
|
+
<td>no</td>
|
744
|
+
<td>no</td>
|
745
|
+
</tr>
|
746
|
+
<tr>
|
747
|
+
<td><code>:target</code></td>
|
748
|
+
<td>no</td>
|
749
|
+
<td>no</td>
|
750
|
+
</tr>
|
751
|
+
<tr>
|
752
|
+
<td><code>:lang</code></td>
|
753
|
+
<td>no</td>
|
754
|
+
<td>yes</td>
|
755
|
+
</tr>
|
756
|
+
<tr>
|
757
|
+
<td><code>:enabled</code></td>
|
758
|
+
<td>yes</td>
|
759
|
+
<td>yes</td>
|
760
|
+
</tr>
|
761
|
+
<tr>
|
762
|
+
<td><code>:disabled</code></td>
|
763
|
+
<td>yes</td>
|
764
|
+
<td>yes</td>
|
765
|
+
</tr>
|
766
|
+
<tr>
|
767
|
+
<td><code>:checked</code></td>
|
768
|
+
<td>yes</td>
|
769
|
+
<td>yes</td>
|
770
|
+
</tr>
|
771
|
+
<tr>
|
772
|
+
<td><code>:root</code></td>
|
773
|
+
<td>yes</td>
|
774
|
+
<td>yes</td>
|
775
|
+
</tr>
|
776
|
+
<tr>
|
777
|
+
<td><code>:nth-child</code></td>
|
778
|
+
<td>yes</td>
|
779
|
+
<td>yes</td>
|
780
|
+
</tr>
|
781
|
+
<tr>
|
782
|
+
<td><code>:nth-last-child</code></td>
|
783
|
+
<td>no</td>
|
784
|
+
<td>yes</td>
|
785
|
+
</tr>
|
786
|
+
<tr>
|
787
|
+
<td><code>:nth-of-type</code></td>
|
788
|
+
<td>no</td>
|
789
|
+
<td>yes</td>
|
790
|
+
</tr>
|
791
|
+
<tr>
|
792
|
+
<td><code>:nth-last-of-type</code></td>
|
793
|
+
<td>no</td>
|
794
|
+
<td>yes</td>
|
795
|
+
</tr>
|
796
|
+
<tr>
|
797
|
+
<td><code>:first-child</code></td>
|
798
|
+
<td>no</td>
|
799
|
+
<td>yes</td>
|
800
|
+
</tr>
|
801
|
+
<tr>
|
802
|
+
<td><code>:last-child</code></td>
|
803
|
+
<td>no</td>
|
804
|
+
<td>yes</td>
|
805
|
+
</tr>
|
806
|
+
<tr>
|
807
|
+
<td><code>:first-of-type</code></td>
|
808
|
+
<td>no</td>
|
809
|
+
<td>yes</td>
|
810
|
+
</tr>
|
811
|
+
<tr>
|
812
|
+
<td><code>:last-of-type</code></td>
|
813
|
+
<td>no</td>
|
814
|
+
<td>yes</td>
|
815
|
+
</tr>
|
816
|
+
<tr>
|
817
|
+
<td><code>:only-child</code></td>
|
818
|
+
<td>no</td>
|
819
|
+
<td>yes</td>
|
820
|
+
</tr>
|
821
|
+
<tr>
|
822
|
+
<td><code>:only-of-type</code></td>
|
823
|
+
<td>no</td>
|
824
|
+
<td>yes</td>
|
825
|
+
</tr>
|
826
|
+
<tr>
|
827
|
+
<td><code>:empty</code></td>
|
828
|
+
<td>no</td>
|
829
|
+
<td>yes</td>
|
830
|
+
</tr>
|
831
|
+
<tr>
|
832
|
+
<td><code>:not</code></td>
|
833
|
+
<td>yes</td>
|
834
|
+
<td>yes</td>
|
835
|
+
</tr>
|
836
|
+
</tbody>
|
837
|
+
</table>
|
838
|
+
|
839
|
+
#### Combinators
|
840
|
+
|
841
|
+
See more details: [8. Combinators](https://www.w3.org/TR/2018/REC-selectors-3-20181106/#combinators)
|
842
|
+
|
843
|
+
<table>
|
844
|
+
<thead>
|
845
|
+
<tr>
|
846
|
+
<th>Syntax</th>
|
847
|
+
<th>Supported</th>
|
848
|
+
<th>Desc</th>
|
849
|
+
</tr>
|
850
|
+
</thead>
|
851
|
+
<tbody>
|
852
|
+
<tr>
|
853
|
+
<td><code>h1 em</code></td>
|
854
|
+
<td>yes</td>
|
855
|
+
<td>Descendant combinator</td>
|
856
|
+
</tr>
|
857
|
+
<tr>
|
858
|
+
<td><code>h1 > em</code></td>
|
859
|
+
<td>yes</td>
|
860
|
+
<td>Child combinator</td>
|
861
|
+
</tr>
|
862
|
+
<tr>
|
863
|
+
<td><code>math + p</code></td>
|
864
|
+
<td>yes</td>
|
865
|
+
<td>Next-sibling combinator</td>
|
866
|
+
</tr>
|
867
|
+
<tr>
|
868
|
+
<td><code>h1 ~ pre</code></td>
|
869
|
+
<td>yes</td>
|
870
|
+
<td>Subsequent-sibling combinator</td>
|
871
|
+
</tr>
|
872
|
+
</tbody>
|
873
|
+
</table>
|
552
874
|
|
553
875
|
## Performance
|
554
876
|
|
@@ -571,6 +893,8 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
571
893
|
|
572
894
|
## Release History
|
573
895
|
|
896
|
+
- v0.3.0
|
897
|
+
- CSS selector support [#11](https://github.com/namusyaka/gammo/pull/11)
|
574
898
|
- v0.2.0
|
575
899
|
- XPath 1.0 support [#4](https://github.com/namusyaka/gammo/pull/4)
|
576
900
|
- v0.1.0
|
data/Rakefile
CHANGED
@@ -25,7 +25,11 @@ task :generate do
|
|
25
25
|
end
|
26
26
|
|
27
27
|
namespace :racc do
|
28
|
-
task :
|
28
|
+
task :xpath do
|
29
29
|
`bundle exec racc lib/gammo/xpath/parser.y -o lib/gammo/xpath/parser.rb`
|
30
30
|
end
|
31
|
+
|
32
|
+
task :css do
|
33
|
+
`bundle exec racc lib/gammo/css_selector/parser.y -o lib/gammo/css_selector/parser.rb`
|
34
|
+
end
|
31
35
|
end
|
data/lib/gammo/attributes.rb
CHANGED
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'gammo/css_selector/node_set'
|
3
|
+
require 'gammo/modules/subclassify'
|
4
|
+
|
5
|
+
module Gammo
|
6
|
+
module CSSSelector
|
7
|
+
module AST
|
8
|
+
# Class for representing combinator defined in the CSS selector specification.
|
9
|
+
# @!visibility private
|
10
|
+
class Combinator
|
11
|
+
extend Subclassify
|
12
|
+
|
13
|
+
def initialize(selector)
|
14
|
+
@selector = selector
|
15
|
+
end
|
16
|
+
|
17
|
+
def evaluate(context)
|
18
|
+
strain context, NodeSet.new
|
19
|
+
end
|
20
|
+
|
21
|
+
# Class for representing the descendant combinator.
|
22
|
+
# @!visibility private
|
23
|
+
class Descendant < Combinator
|
24
|
+
declare :descendant
|
25
|
+
|
26
|
+
def strain(context, node_set)
|
27
|
+
@selector.search_descendant(context.dup, node_set)
|
28
|
+
node_set
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Class for representing the child combinator.
|
33
|
+
# @!visibility private
|
34
|
+
class Child < Combinator
|
35
|
+
declare :child
|
36
|
+
|
37
|
+
def strain(context, node_set)
|
38
|
+
context.node.children.inject(0) do |i, child|
|
39
|
+
next i unless child.kind_of?(Node::Element)
|
40
|
+
i += 1
|
41
|
+
node_set << child if @selector.match?(Context.new(node: child, position: i))
|
42
|
+
i
|
43
|
+
end
|
44
|
+
node_set
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Class for representing the next-sibling combinator.
|
49
|
+
# @!visibility private
|
50
|
+
class NextSibling < Combinator
|
51
|
+
declare :next_sibling
|
52
|
+
|
53
|
+
def strain(context, node_set)
|
54
|
+
node = context.node
|
55
|
+
context_position = context.position
|
56
|
+
context_node = context.node
|
57
|
+
while node = node.next_sibling
|
58
|
+
context.position += 1
|
59
|
+
context.node = node
|
60
|
+
next unless node.is_a?(Node::Element)
|
61
|
+
node_set << node if @selector.match?(context)
|
62
|
+
break
|
63
|
+
end
|
64
|
+
context.position = context_position
|
65
|
+
context.node = context_node
|
66
|
+
node_set
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# Class for representing the subsequent-sibling combinator.
|
71
|
+
# @!visibility private
|
72
|
+
class SubsequentSibling < Combinator
|
73
|
+
declare :subsequent_sibling
|
74
|
+
|
75
|
+
def strain(context, node_set)
|
76
|
+
node = context.node
|
77
|
+
context_node = context.node
|
78
|
+
position = context.position
|
79
|
+
while node = node.next_sibling
|
80
|
+
context.position += 1
|
81
|
+
context.node = node
|
82
|
+
node_set << node if @selector.match?(context)
|
83
|
+
end
|
84
|
+
context.position = position
|
85
|
+
context.node = context_node
|
86
|
+
node_set
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module Gammo
|
2
|
+
module CSSSelector
|
3
|
+
module AST
|
4
|
+
module Selector
|
5
|
+
class Attrib
|
6
|
+
attr_accessor :value
|
7
|
+
|
8
|
+
extend Subclassify
|
9
|
+
|
10
|
+
def initialize(key:, value:, namespace_prefix: nil)
|
11
|
+
@key = key
|
12
|
+
@value = value
|
13
|
+
@namespace_prefix = namespace_prefix
|
14
|
+
end
|
15
|
+
|
16
|
+
def match?(context)
|
17
|
+
raise NotImplemented, "#match? must be implemented by sub class"
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def attrib_value(node)
|
23
|
+
node.attributes[@key.to_sym]
|
24
|
+
end
|
25
|
+
|
26
|
+
class Equal < Attrib
|
27
|
+
declare :equal
|
28
|
+
|
29
|
+
def match?(context)
|
30
|
+
attrib_value(context.node) == @value
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class PrefixMatch < Attrib
|
35
|
+
declare :prefix_match
|
36
|
+
|
37
|
+
def match?(context)
|
38
|
+
return false if !@value || @value.empty?
|
39
|
+
return false unless val = attrib_value(context.node)
|
40
|
+
val.start_with?(@value)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class SuffixMatch < Attrib
|
45
|
+
declare :suffix_match
|
46
|
+
|
47
|
+
def match?(context)
|
48
|
+
return false if !@value || @value.empty?
|
49
|
+
return false unless val = attrib_value(context.node)
|
50
|
+
val.end_with?(@value)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class SubstringMatch < Attrib
|
55
|
+
declare :substring_match
|
56
|
+
|
57
|
+
def match?(context)
|
58
|
+
return false if !@value || @value.empty?
|
59
|
+
return false unless val = attrib_value(context.node)
|
60
|
+
val.include?(@value)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class DashMatch < Attrib
|
65
|
+
declare :dash_match
|
66
|
+
|
67
|
+
def match?(context)
|
68
|
+
val = attrib_value(context.node) || ''
|
69
|
+
val == @value || (val.start_with?(@value) && val[@value.length] == ?-)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
class Includes < Attrib
|
74
|
+
declare :includes
|
75
|
+
|
76
|
+
def match?(context)
|
77
|
+
return false if !@value || @value.empty?
|
78
|
+
val = attrib_value(context.node) || ''
|
79
|
+
val == @value || (val.split(/\s/).include?(@value))
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|