gammo 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +32 -0
- data/Gemfile.lock +6 -6
- data/README.md +334 -10
- data/Rakefile +5 -1
- data/lib/gammo/attributes.rb +5 -0
- data/lib/gammo/css_selector/ast/combinator.rb +92 -0
- data/lib/gammo/css_selector/ast/selector/attrib_selector.rb +86 -0
- data/lib/gammo/css_selector/ast/selector/class_selector.rb +19 -0
- data/lib/gammo/css_selector/ast/selector/id_selector.rb +18 -0
- data/lib/gammo/css_selector/ast/selector/negation.rb +21 -0
- data/lib/gammo/css_selector/ast/selector/pseudo_class.rb +92 -0
- data/lib/gammo/css_selector/ast/selector.rb +100 -0
- data/lib/gammo/css_selector/context.rb +17 -0
- data/lib/gammo/css_selector/errors.rb +6 -0
- data/lib/gammo/css_selector/node_set.rb +44 -0
- data/lib/gammo/css_selector/parser.rb +790 -0
- data/lib/gammo/css_selector/parser.y +321 -0
- data/lib/gammo/css_selector.rb +33 -0
- data/lib/gammo/modules/subclassify.rb +31 -0
- data/lib/gammo/node.rb +2 -0
- data/lib/gammo/parser/foreign.rb +3 -3
- data/lib/gammo/parser/insertion_mode/after_after_body.rb +1 -1
- data/lib/gammo/parser/insertion_mode/after_after_frameset.rb +1 -1
- data/lib/gammo/parser/insertion_mode/after_body.rb +1 -1
- data/lib/gammo/parser/insertion_mode/after_frameset.rb +1 -1
- data/lib/gammo/parser/insertion_mode/after_head.rb +1 -1
- data/lib/gammo/parser/insertion_mode/before_head.rb +1 -1
- data/lib/gammo/parser/insertion_mode/before_html.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_body.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_column_group.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_frameset.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_head.rb +3 -2
- data/lib/gammo/parser/insertion_mode/in_head_noscript.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_select.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_table.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_template.rb +1 -1
- data/lib/gammo/parser/insertion_mode/initial.rb +1 -1
- data/lib/gammo/parser/insertion_mode/text.rb +1 -1
- data/lib/gammo/parser/insertion_mode.rb +1 -1
- data/lib/gammo/tokenizer/tokens.rb +10 -1
- data/lib/gammo/tokenizer.rb +10 -10
- data/lib/gammo/version.rb +1 -1
- data/lib/gammo/xpath/ast/axis.rb +1 -1
- data/lib/gammo/xpath/ast/expression.rb +2 -0
- data/lib/gammo/xpath/ast/function.rb +1 -1
- data/lib/gammo/xpath/ast/node_test.rb +1 -1
- data/lib/gammo/xpath/ast/path.rb +1 -0
- data/lib/gammo/xpath.rb +4 -5
- metadata +17 -4
- data/.travis.yml +0 -6
- data/lib/gammo/xpath/ast/subclassify.rb +0 -35
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f8fc3eae3f0b1cbe125012fef023b8796430f699f1bcac5a8336770070346315
|
4
|
+
data.tar.gz: ec33fbc6c045d1b458544ecbec141939a301fdce1ce9dd46542ba61dae5e5d6a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3a6f7dda7321a8cebba91755efe435a5207e95be939b87c8abccdf5b177730ac402975bf52572026d0776b9cc3ab8852e873099b9013e41c1222f9ba496a9085
|
7
|
+
data.tar.gz: 8cbe7ea25c29514dca39ded2407bee031bec9ecf2a02a06c82066ca34248b1ecb885e4d392c50801d1a874b13f282c8a5fc715220acbbde582e421c1bcce2814
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: Testing
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches:
|
13
|
+
- master
|
14
|
+
pull_request:
|
15
|
+
|
16
|
+
jobs:
|
17
|
+
test:
|
18
|
+
name: Test with Ruby-${{ matrix.ruby }}
|
19
|
+
runs-on: ubuntu-latest
|
20
|
+
strategy:
|
21
|
+
matrix:
|
22
|
+
ruby: [2.4, 2.5, 2.6, 2.7, 3.0]
|
23
|
+
steps:
|
24
|
+
- uses: actions/checkout@v2
|
25
|
+
- uses: ruby/setup-ruby@v1
|
26
|
+
with:
|
27
|
+
ruby-version: ${{ matrix.ruby }}
|
28
|
+
bundler-cache: true
|
29
|
+
- name: Install dependencies
|
30
|
+
run: bundle install
|
31
|
+
- name: Run tests
|
32
|
+
run: bundle exec rake
|
data/Gemfile.lock
CHANGED
@@ -1,23 +1,23 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
gammo (0.
|
4
|
+
gammo (0.2.0)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
9
|
docile (1.3.2)
|
10
10
|
erubi (1.9.0)
|
11
|
-
power_assert (1.
|
11
|
+
power_assert (1.2.0)
|
12
12
|
racc (1.5.0)
|
13
13
|
rake (12.3.3)
|
14
14
|
simplecov (0.18.5)
|
15
15
|
docile (~> 1.1)
|
16
16
|
simplecov-html (~> 0.11)
|
17
|
-
simplecov-html (0.12.
|
18
|
-
test-unit (3.3.
|
17
|
+
simplecov-html (0.12.3)
|
18
|
+
test-unit (3.3.6)
|
19
19
|
power_assert
|
20
|
-
yard (0.9.
|
20
|
+
yard (0.9.25)
|
21
21
|
|
22
22
|
PLATFORMS
|
23
23
|
ruby
|
@@ -32,4 +32,4 @@ DEPENDENCIES
|
|
32
32
|
yard
|
33
33
|
|
34
34
|
BUNDLED WITH
|
35
|
-
2.
|
35
|
+
2.2.3
|
data/README.md
CHANGED
@@ -7,7 +7,8 @@
|
|
7
7
|
[](https://github.com/namusyaka/gammo/blob/master/LICENSE.txt)
|
8
8
|
[](http://www.rubydoc.info/gems/gammo/frames)
|
9
9
|
|
10
|
-
Gammo
|
10
|
+
Gammo provides a pure Ruby HTML5-compliant parser and CSS selector / XPath support for traversing the DOM tree built by Gammo.
|
11
|
+
The implementation of the HTML5 parsing algorithm in Gammo conforms [the WHATWG specification](https://html.spec.whatwg.org/multipage/parsing.html). Given an HTML string, Gammo parses it and builds DOM tree based on the tokenization and tree-construction algorithm defined in WHATWG parsing algorithm, these implementations are provided without any external dependencies.
|
11
12
|
|
12
13
|
Gammo, its naming is inspired by [Gumbo](https://github.com/google/gumbo-parser). But Gammo is a fried tofu fritter made with vegetables.
|
13
14
|
|
@@ -15,10 +16,45 @@ Gammo, its naming is inspired by [Gumbo](https://github.com/google/gumbo-parser)
|
|
15
16
|
require 'gammo'
|
16
17
|
require 'open-uri'
|
17
18
|
|
18
|
-
parser = open('https://google.com') { |f| Gammo.new(f.read) }
|
19
|
-
parser.parse #=> #<Gammo::Node::Document>
|
19
|
+
parser = URI.open('https://google.com') { |f| Gammo.new(f.read) }
|
20
|
+
document = parser.parse #=> #<Gammo::Node::Document>
|
21
|
+
|
22
|
+
puts document.css('title').first.inner_text #=> 'Google'
|
20
23
|
```
|
21
24
|
|
25
|
+
* [Overview](#overview)
|
26
|
+
* [Features](#features)
|
27
|
+
* [Tokenizaton](#tokenizaton)
|
28
|
+
* [Token types](#token-types)
|
29
|
+
* [Parsing](#parsing)
|
30
|
+
* [Notes](#notes)
|
31
|
+
* [Node](#node)
|
32
|
+
* [DOM Tree Traversal](#dom-tree-traversal)
|
33
|
+
* [XPath 1.0 (experimental)](#xpath-10-experimental)
|
34
|
+
* [Example](#example)
|
35
|
+
* [Axis Specifiers](#axis-specifiers)
|
36
|
+
* [Node Test](#node-test)
|
37
|
+
* [Operators](#operators)
|
38
|
+
* [Functions](#functions)
|
39
|
+
* [Node set functions](#node-set-functions)
|
40
|
+
* [String Functions](#string-functions)
|
41
|
+
* [Boolean Functions](#boolean-functions)
|
42
|
+
* [Number Functions](#number-functions)
|
43
|
+
* [CSS Selector (experimental)](#css-selector-experimental)
|
44
|
+
* [Example](#example)
|
45
|
+
* [Groups of selectors](#groups-of-selectors)
|
46
|
+
* [Simple selectors](#simple-selectors)
|
47
|
+
* [Type selector & Universal selector](#type-selector--universal-selector)
|
48
|
+
* [Attribute selectors](#attribute-selectors)
|
49
|
+
* [Class selectors](#class-selectors)
|
50
|
+
* [ID selectors](#id-selectors)
|
51
|
+
* [Pseudo-classes](#pseudo-classes)
|
52
|
+
* [Combinators](#combinators)
|
53
|
+
* [Performance](#performance)
|
54
|
+
* [References](#references)
|
55
|
+
* [License](#license)
|
56
|
+
* [Release History](#release-history)
|
57
|
+
|
22
58
|
## Overview
|
23
59
|
|
24
60
|
### Features
|
@@ -26,7 +62,7 @@ parser.parse #=> #<Gammo::Node::Document>
|
|
26
62
|
- [Tokenization](#tokenization): Gammo has a tokenizer for implementing [the tokenization algorithm](https://html.spec.whatwg.org/multipage/parsing.html#tokenization).
|
27
63
|
- [Parsing](#parsing): Gammo provides a parser which implements the parsing algorithm by the above tokenization and [the tree-construction algorithm](https://html.spec.whatwg.org/multipage/parsing.html#tree-construction).
|
28
64
|
- [Node](#node): Gammo provides the nodes which implement [WHATWG DOM specification](https://dom.spec.whatwg.org/) partially.
|
29
|
-
- [DOM Tree Traversal](#dom-tree-traversal): Gammo provides a way of DOM tree traversal.
|
65
|
+
- [DOM Tree Traversal](#dom-tree-traversal): Gammo provides a way of DOM tree traversal (CSS selector / XPath).
|
30
66
|
- [Performance](#performance): Gammo does not prioritize performance, and there are a few potential performance notes.
|
31
67
|
|
32
68
|
## Tokenizaton
|
@@ -165,8 +201,7 @@ For some nodes such as `Gammo::Node::Element` and `Gammo::Node::Document`, they
|
|
165
201
|
|
166
202
|
## DOM Tree Traversal
|
167
203
|
|
168
|
-
|
169
|
-
CSS selector support is also planned but not having any ETA.
|
204
|
+
CSS selector and XPath-1.0 are the way for traversing DOM tree built by Gammo.
|
170
205
|
|
171
206
|
### XPath 1.0 (experimental)
|
172
207
|
|
@@ -346,12 +381,12 @@ Node tests consist of specific node names or more general expressions. Although
|
|
346
381
|
<tr>
|
347
382
|
<td><code>text()</code></td>
|
348
383
|
<td>yes</td>
|
349
|
-
<td>Finds a node of type text, e.g. <code>hello</code> in <code
|
384
|
+
<td>Finds a node of type text, e.g. <code>hello</code> in <code><p>hello <a href="https://hello">world</a></p></td>
|
350
385
|
</tr>
|
351
386
|
<tr>
|
352
387
|
<td><code>comment()</code></td>
|
353
388
|
<td>yes</td>
|
354
|
-
<td>Finds a node of type comment, e.g. <code
|
389
|
+
<td>Finds a node of type comment, e.g. <code><!-- comment --></code></td>
|
355
390
|
</tr>
|
356
391
|
<tr>
|
357
392
|
<td><code>node()</code></td>
|
@@ -546,9 +581,296 @@ XPath 1.0 defines four data types (nodeset, string, number, boolean) and there a
|
|
546
581
|
</tbody>
|
547
582
|
</table>
|
548
583
|
|
549
|
-
### CSS Selector
|
584
|
+
### CSS Selector (experimental)
|
585
|
+
|
586
|
+
Gammo has an original lexer/parser for CSS Selector, it's provided as a helper in the DOM tree built by Gammo.
|
587
|
+
Here is a simple example:
|
588
|
+
|
589
|
+
```ruby
|
590
|
+
document = Gammo.new('<!doctype html><input type="button">').parse
|
591
|
+
node_set = document.css('input[type="button"]') #=> "<Gammo::CSSSelector::NodeSet>"
|
592
|
+
|
593
|
+
node_set.length #=> 1
|
594
|
+
node_set.first #=> "<Gammo::Node::Element>"
|
595
|
+
```
|
596
|
+
|
597
|
+
Since this is implemented by full scratch, Gammo is providing this support as a very experimental feature. Please file an issue if you find bugs.
|
598
|
+
|
599
|
+
#### Example
|
600
|
+
|
601
|
+
Before proceeding at the details of CSS Selector support, let's have a look at a few simple examples. Given a sample HTML text and its DOM tree:
|
602
|
+
|
603
|
+
```ruby
|
604
|
+
document = Gammo.new(<<-EOS).parse
|
605
|
+
<!DOCTYPE html>
|
606
|
+
<html>
|
607
|
+
<head>
|
608
|
+
</head>
|
609
|
+
<body>
|
610
|
+
<h1>namusyaka.com</h1>
|
611
|
+
<p class="description">Here is a sample web site.</p>
|
612
|
+
<ul>
|
613
|
+
<li>hello</li>
|
614
|
+
<li>world</li>
|
615
|
+
</ul>
|
616
|
+
<ul id="links">
|
617
|
+
<li>Google <a href="https://google.com/">google.com</a></li>
|
618
|
+
<li>GitHub <a href="https://github.com/namusyaka">github.com/namusyaka</a></li>
|
619
|
+
</ul>
|
620
|
+
</body>
|
621
|
+
</html>
|
622
|
+
EOS
|
623
|
+
```
|
624
|
+
|
625
|
+
The following CSS selector gets all `li` elements and prints thoese text contents:
|
626
|
+
|
627
|
+
```ruby
|
628
|
+
document.css('li').each do |elm|
|
629
|
+
puts elm.inner_text
|
630
|
+
end
|
631
|
+
```
|
632
|
+
|
633
|
+
The following CSS selector gets all `li` elements under the `ul` element having the `id=links` attribute:
|
634
|
+
|
635
|
+
```ruby
|
636
|
+
document.xpath('ul#links li').each do |elm|
|
637
|
+
puts elm.inner_text
|
638
|
+
end
|
639
|
+
```
|
640
|
+
|
641
|
+
#### Groups of selectors
|
642
|
+
|
643
|
+
Gammo supports [groups of selectors](https://www.w3.org/TR/2018/REC-selectors-3-20181106/#grouping), this means you can use `,` to traverse DOM tree by multiple selectors.
|
550
644
|
|
551
|
-
|
645
|
+
```ruby
|
646
|
+
require 'gammo'
|
647
|
+
|
648
|
+
@doc = Gammo.new(<<-EOS).parse
|
649
|
+
<!DOCTYPE html>
|
650
|
+
<html>
|
651
|
+
<head>
|
652
|
+
<title>hello</title>
|
653
|
+
<meta charset="utf8">
|
654
|
+
</head>
|
655
|
+
<body>
|
656
|
+
<p id="hello">hello</p>
|
657
|
+
<p id="world">world</p>
|
658
|
+
EOS
|
659
|
+
|
660
|
+
@doc.css('#hello, #world').map(&:inner_text).join(' ') #=> 'hello world'
|
661
|
+
```
|
662
|
+
|
663
|
+
#### Simple selectors
|
664
|
+
|
665
|
+
##### Type selector & Universal selector
|
666
|
+
|
667
|
+
Gammo supports the basic grammar of type selector and universal selector, but not namespaces.
|
668
|
+
|
669
|
+
##### Attribute selectors
|
670
|
+
|
671
|
+
See more details: [6.3. Attribute selectors](https://www.w3.org/TR/2018/REC-selectors-3-20181106/#attribute-selectors)
|
672
|
+
|
673
|
+
<table>
|
674
|
+
<thead>
|
675
|
+
<tr>
|
676
|
+
<th>Syntax</th>
|
677
|
+
<th>Supported</th>
|
678
|
+
</tr>
|
679
|
+
</thead>
|
680
|
+
<tbody>
|
681
|
+
<tr>
|
682
|
+
<td><code>[att]</code></td>
|
683
|
+
<td>yes</td>
|
684
|
+
</tr>
|
685
|
+
<tr>
|
686
|
+
<td><code>[att=val]</code></td>
|
687
|
+
<td>yes</td>
|
688
|
+
</tr>
|
689
|
+
<tr>
|
690
|
+
<td><code>[att~=val]</code></td>
|
691
|
+
<td>yes</td>
|
692
|
+
</tr>
|
693
|
+
<tr>
|
694
|
+
<td><code>[att|=val]</code></td>
|
695
|
+
<td>yes</td>
|
696
|
+
</tr>
|
697
|
+
</tbody>
|
698
|
+
</table>
|
699
|
+
|
700
|
+
##### Class selectors
|
701
|
+
|
702
|
+
Supported. See more details: [6.4. Class selectors](https://www.w3.org/TR/2018/REC-selectors-3-20181106/#class-html)
|
703
|
+
|
704
|
+
##### ID selectors
|
705
|
+
|
706
|
+
Supported. See more details: [6.5. ID selectors](https://www.w3.org/TR/2018/REC-selectors-3-20181106/#id-selectors)
|
707
|
+
|
708
|
+
##### Pseudo-classes
|
709
|
+
|
710
|
+
Partially supported. See the table below.
|
711
|
+
|
712
|
+
<table>
|
713
|
+
<thead>
|
714
|
+
<tr>
|
715
|
+
<th>Class name</th>
|
716
|
+
<th>Supported</th>
|
717
|
+
<th>Can support?</th>
|
718
|
+
</tr>
|
719
|
+
</thead>
|
720
|
+
<tbody>
|
721
|
+
<tr>
|
722
|
+
<td><code>:link</code></td>
|
723
|
+
<td>no</td>
|
724
|
+
<td>no</td>
|
725
|
+
</tr>
|
726
|
+
<tr>
|
727
|
+
<td><code>:visited</code></td>
|
728
|
+
<td>no</td>
|
729
|
+
<td>no</td>
|
730
|
+
</tr>
|
731
|
+
<tr>
|
732
|
+
<td><code>:hover</code></td>
|
733
|
+
<td>no</td>
|
734
|
+
<td>no</td>
|
735
|
+
</tr>
|
736
|
+
<tr>
|
737
|
+
<td><code>:active</code></td>
|
738
|
+
<td>no</td>
|
739
|
+
<td>no</td>
|
740
|
+
</tr>
|
741
|
+
<tr>
|
742
|
+
<td><code>:focus</code></td>
|
743
|
+
<td>no</td>
|
744
|
+
<td>no</td>
|
745
|
+
</tr>
|
746
|
+
<tr>
|
747
|
+
<td><code>:target</code></td>
|
748
|
+
<td>no</td>
|
749
|
+
<td>no</td>
|
750
|
+
</tr>
|
751
|
+
<tr>
|
752
|
+
<td><code>:lang</code></td>
|
753
|
+
<td>no</td>
|
754
|
+
<td>yes</td>
|
755
|
+
</tr>
|
756
|
+
<tr>
|
757
|
+
<td><code>:enabled</code></td>
|
758
|
+
<td>yes</td>
|
759
|
+
<td>yes</td>
|
760
|
+
</tr>
|
761
|
+
<tr>
|
762
|
+
<td><code>:disabled</code></td>
|
763
|
+
<td>yes</td>
|
764
|
+
<td>yes</td>
|
765
|
+
</tr>
|
766
|
+
<tr>
|
767
|
+
<td><code>:checked</code></td>
|
768
|
+
<td>yes</td>
|
769
|
+
<td>yes</td>
|
770
|
+
</tr>
|
771
|
+
<tr>
|
772
|
+
<td><code>:root</code></td>
|
773
|
+
<td>yes</td>
|
774
|
+
<td>yes</td>
|
775
|
+
</tr>
|
776
|
+
<tr>
|
777
|
+
<td><code>:nth-child</code></td>
|
778
|
+
<td>yes</td>
|
779
|
+
<td>yes</td>
|
780
|
+
</tr>
|
781
|
+
<tr>
|
782
|
+
<td><code>:nth-last-child</code></td>
|
783
|
+
<td>no</td>
|
784
|
+
<td>yes</td>
|
785
|
+
</tr>
|
786
|
+
<tr>
|
787
|
+
<td><code>:nth-of-type</code></td>
|
788
|
+
<td>no</td>
|
789
|
+
<td>yes</td>
|
790
|
+
</tr>
|
791
|
+
<tr>
|
792
|
+
<td><code>:nth-last-of-type</code></td>
|
793
|
+
<td>no</td>
|
794
|
+
<td>yes</td>
|
795
|
+
</tr>
|
796
|
+
<tr>
|
797
|
+
<td><code>:first-child</code></td>
|
798
|
+
<td>no</td>
|
799
|
+
<td>yes</td>
|
800
|
+
</tr>
|
801
|
+
<tr>
|
802
|
+
<td><code>:last-child</code></td>
|
803
|
+
<td>no</td>
|
804
|
+
<td>yes</td>
|
805
|
+
</tr>
|
806
|
+
<tr>
|
807
|
+
<td><code>:first-of-type</code></td>
|
808
|
+
<td>no</td>
|
809
|
+
<td>yes</td>
|
810
|
+
</tr>
|
811
|
+
<tr>
|
812
|
+
<td><code>:last-of-type</code></td>
|
813
|
+
<td>no</td>
|
814
|
+
<td>yes</td>
|
815
|
+
</tr>
|
816
|
+
<tr>
|
817
|
+
<td><code>:only-child</code></td>
|
818
|
+
<td>no</td>
|
819
|
+
<td>yes</td>
|
820
|
+
</tr>
|
821
|
+
<tr>
|
822
|
+
<td><code>:only-of-type</code></td>
|
823
|
+
<td>no</td>
|
824
|
+
<td>yes</td>
|
825
|
+
</tr>
|
826
|
+
<tr>
|
827
|
+
<td><code>:empty</code></td>
|
828
|
+
<td>no</td>
|
829
|
+
<td>yes</td>
|
830
|
+
</tr>
|
831
|
+
<tr>
|
832
|
+
<td><code>:not</code></td>
|
833
|
+
<td>yes</td>
|
834
|
+
<td>yes</td>
|
835
|
+
</tr>
|
836
|
+
</tbody>
|
837
|
+
</table>
|
838
|
+
|
839
|
+
#### Combinators
|
840
|
+
|
841
|
+
See more details: [8. Combinators](https://www.w3.org/TR/2018/REC-selectors-3-20181106/#combinators)
|
842
|
+
|
843
|
+
<table>
|
844
|
+
<thead>
|
845
|
+
<tr>
|
846
|
+
<th>Syntax</th>
|
847
|
+
<th>Supported</th>
|
848
|
+
<th>Desc</th>
|
849
|
+
</tr>
|
850
|
+
</thead>
|
851
|
+
<tbody>
|
852
|
+
<tr>
|
853
|
+
<td><code>h1 em</code></td>
|
854
|
+
<td>yes</td>
|
855
|
+
<td>Descendant combinator</td>
|
856
|
+
</tr>
|
857
|
+
<tr>
|
858
|
+
<td><code>h1 > em</code></td>
|
859
|
+
<td>yes</td>
|
860
|
+
<td>Child combinator</td>
|
861
|
+
</tr>
|
862
|
+
<tr>
|
863
|
+
<td><code>math + p</code></td>
|
864
|
+
<td>yes</td>
|
865
|
+
<td>Next-sibling combinator</td>
|
866
|
+
</tr>
|
867
|
+
<tr>
|
868
|
+
<td><code>h1 ~ pre</code></td>
|
869
|
+
<td>yes</td>
|
870
|
+
<td>Subsequent-sibling combinator</td>
|
871
|
+
</tr>
|
872
|
+
</tbody>
|
873
|
+
</table>
|
552
874
|
|
553
875
|
## Performance
|
554
876
|
|
@@ -571,6 +893,8 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
571
893
|
|
572
894
|
## Release History
|
573
895
|
|
896
|
+
- v0.3.0
|
897
|
+
- CSS selector support [#11](https://github.com/namusyaka/gammo/pull/11)
|
574
898
|
- v0.2.0
|
575
899
|
- XPath 1.0 support [#4](https://github.com/namusyaka/gammo/pull/4)
|
576
900
|
- v0.1.0
|
data/Rakefile
CHANGED
@@ -25,7 +25,11 @@ task :generate do
|
|
25
25
|
end
|
26
26
|
|
27
27
|
namespace :racc do
|
28
|
-
task :
|
28
|
+
task :xpath do
|
29
29
|
`bundle exec racc lib/gammo/xpath/parser.y -o lib/gammo/xpath/parser.rb`
|
30
30
|
end
|
31
|
+
|
32
|
+
task :css do
|
33
|
+
`bundle exec racc lib/gammo/css_selector/parser.y -o lib/gammo/css_selector/parser.rb`
|
34
|
+
end
|
31
35
|
end
|
data/lib/gammo/attributes.rb
CHANGED
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'gammo/css_selector/node_set'
|
3
|
+
require 'gammo/modules/subclassify'
|
4
|
+
|
5
|
+
module Gammo
|
6
|
+
module CSSSelector
|
7
|
+
module AST
|
8
|
+
# Class for representing combinator defined in the CSS selector specification.
|
9
|
+
# @!visibility private
|
10
|
+
class Combinator
|
11
|
+
extend Subclassify
|
12
|
+
|
13
|
+
def initialize(selector)
|
14
|
+
@selector = selector
|
15
|
+
end
|
16
|
+
|
17
|
+
def evaluate(context)
|
18
|
+
strain context, NodeSet.new
|
19
|
+
end
|
20
|
+
|
21
|
+
# Class for representing the descendant combinator.
|
22
|
+
# @!visibility private
|
23
|
+
class Descendant < Combinator
|
24
|
+
declare :descendant
|
25
|
+
|
26
|
+
def strain(context, node_set)
|
27
|
+
@selector.search_descendant(context.dup, node_set)
|
28
|
+
node_set
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Class for representing the child combinator.
|
33
|
+
# @!visibility private
|
34
|
+
class Child < Combinator
|
35
|
+
declare :child
|
36
|
+
|
37
|
+
def strain(context, node_set)
|
38
|
+
context.node.children.inject(0) do |i, child|
|
39
|
+
next i unless child.kind_of?(Node::Element)
|
40
|
+
i += 1
|
41
|
+
node_set << child if @selector.match?(Context.new(node: child, position: i))
|
42
|
+
i
|
43
|
+
end
|
44
|
+
node_set
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Class for representing the next-sibling combinator.
|
49
|
+
# @!visibility private
|
50
|
+
class NextSibling < Combinator
|
51
|
+
declare :next_sibling
|
52
|
+
|
53
|
+
def strain(context, node_set)
|
54
|
+
node = context.node
|
55
|
+
context_position = context.position
|
56
|
+
context_node = context.node
|
57
|
+
while node = node.next_sibling
|
58
|
+
context.position += 1
|
59
|
+
context.node = node
|
60
|
+
next unless node.is_a?(Node::Element)
|
61
|
+
node_set << node if @selector.match?(context)
|
62
|
+
break
|
63
|
+
end
|
64
|
+
context.position = context_position
|
65
|
+
context.node = context_node
|
66
|
+
node_set
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# Class for representing the subsequent-sibling combinator.
|
71
|
+
# @!visibility private
|
72
|
+
class SubsequentSibling < Combinator
|
73
|
+
declare :subsequent_sibling
|
74
|
+
|
75
|
+
def strain(context, node_set)
|
76
|
+
node = context.node
|
77
|
+
context_node = context.node
|
78
|
+
position = context.position
|
79
|
+
while node = node.next_sibling
|
80
|
+
context.position += 1
|
81
|
+
context.node = node
|
82
|
+
node_set << node if @selector.match?(context)
|
83
|
+
end
|
84
|
+
context.position = position
|
85
|
+
context.node = context_node
|
86
|
+
node_set
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module Gammo
|
2
|
+
module CSSSelector
|
3
|
+
module AST
|
4
|
+
module Selector
|
5
|
+
class Attrib
|
6
|
+
attr_accessor :value
|
7
|
+
|
8
|
+
extend Subclassify
|
9
|
+
|
10
|
+
def initialize(key:, value:, namespace_prefix: nil)
|
11
|
+
@key = key
|
12
|
+
@value = value
|
13
|
+
@namespace_prefix = namespace_prefix
|
14
|
+
end
|
15
|
+
|
16
|
+
def match?(context)
|
17
|
+
raise NotImplemented, "#match? must be implemented by sub class"
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def attrib_value(node)
|
23
|
+
node.attributes[@key.to_sym]
|
24
|
+
end
|
25
|
+
|
26
|
+
class Equal < Attrib
|
27
|
+
declare :equal
|
28
|
+
|
29
|
+
def match?(context)
|
30
|
+
attrib_value(context.node) == @value
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class PrefixMatch < Attrib
|
35
|
+
declare :prefix_match
|
36
|
+
|
37
|
+
def match?(context)
|
38
|
+
return false if !@value || @value.empty?
|
39
|
+
return false unless val = attrib_value(context.node)
|
40
|
+
val.start_with?(@value)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class SuffixMatch < Attrib
|
45
|
+
declare :suffix_match
|
46
|
+
|
47
|
+
def match?(context)
|
48
|
+
return false if !@value || @value.empty?
|
49
|
+
return false unless val = attrib_value(context.node)
|
50
|
+
val.end_with?(@value)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class SubstringMatch < Attrib
|
55
|
+
declare :substring_match
|
56
|
+
|
57
|
+
def match?(context)
|
58
|
+
return false if !@value || @value.empty?
|
59
|
+
return false unless val = attrib_value(context.node)
|
60
|
+
val.include?(@value)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class DashMatch < Attrib
|
65
|
+
declare :dash_match
|
66
|
+
|
67
|
+
def match?(context)
|
68
|
+
val = attrib_value(context.node) || ''
|
69
|
+
val == @value || (val.start_with?(@value) && val[@value.length] == ?-)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
class Includes < Attrib
|
74
|
+
declare :includes
|
75
|
+
|
76
|
+
def match?(context)
|
77
|
+
return false if !@value || @value.empty?
|
78
|
+
val = attrib_value(context.node) || ''
|
79
|
+
val == @value || (val.split(/\s/).include?(@value))
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|