gammo 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +32 -0
- data/Gemfile.lock +6 -6
- data/README.md +334 -10
- data/Rakefile +5 -1
- data/lib/gammo/attributes.rb +5 -0
- data/lib/gammo/css_selector/ast/combinator.rb +92 -0
- data/lib/gammo/css_selector/ast/selector/attrib_selector.rb +86 -0
- data/lib/gammo/css_selector/ast/selector/class_selector.rb +19 -0
- data/lib/gammo/css_selector/ast/selector/id_selector.rb +18 -0
- data/lib/gammo/css_selector/ast/selector/negation.rb +21 -0
- data/lib/gammo/css_selector/ast/selector/pseudo_class.rb +92 -0
- data/lib/gammo/css_selector/ast/selector.rb +100 -0
- data/lib/gammo/css_selector/context.rb +17 -0
- data/lib/gammo/css_selector/errors.rb +6 -0
- data/lib/gammo/css_selector/node_set.rb +44 -0
- data/lib/gammo/css_selector/parser.rb +790 -0
- data/lib/gammo/css_selector/parser.y +321 -0
- data/lib/gammo/css_selector.rb +33 -0
- data/lib/gammo/modules/subclassify.rb +31 -0
- data/lib/gammo/node.rb +2 -0
- data/lib/gammo/parser/foreign.rb +3 -3
- data/lib/gammo/parser/insertion_mode/after_after_body.rb +1 -1
- data/lib/gammo/parser/insertion_mode/after_after_frameset.rb +1 -1
- data/lib/gammo/parser/insertion_mode/after_body.rb +1 -1
- data/lib/gammo/parser/insertion_mode/after_frameset.rb +1 -1
- data/lib/gammo/parser/insertion_mode/after_head.rb +1 -1
- data/lib/gammo/parser/insertion_mode/before_head.rb +1 -1
- data/lib/gammo/parser/insertion_mode/before_html.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_body.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_column_group.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_frameset.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_head.rb +3 -2
- data/lib/gammo/parser/insertion_mode/in_head_noscript.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_select.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_table.rb +1 -1
- data/lib/gammo/parser/insertion_mode/in_template.rb +1 -1
- data/lib/gammo/parser/insertion_mode/initial.rb +1 -1
- data/lib/gammo/parser/insertion_mode/text.rb +1 -1
- data/lib/gammo/parser/insertion_mode.rb +1 -1
- data/lib/gammo/tokenizer/tokens.rb +10 -1
- data/lib/gammo/tokenizer.rb +10 -10
- data/lib/gammo/version.rb +1 -1
- data/lib/gammo/xpath/ast/axis.rb +1 -1
- data/lib/gammo/xpath/ast/expression.rb +2 -0
- data/lib/gammo/xpath/ast/function.rb +1 -1
- data/lib/gammo/xpath/ast/node_test.rb +1 -1
- data/lib/gammo/xpath/ast/path.rb +1 -0
- data/lib/gammo/xpath.rb +4 -5
- metadata +17 -4
- data/.travis.yml +0 -6
- data/lib/gammo/xpath/ast/subclassify.rb +0 -35
data/lib/gammo/tokenizer.rb
CHANGED
@@ -62,7 +62,7 @@ module Gammo
|
|
62
62
|
if pos < (scanner.pos - 2)
|
63
63
|
scanner.pos -= 2
|
64
64
|
buffer = buffer.slice(0, buffer.length - 2)
|
65
|
-
return
|
65
|
+
return character_token(buffer)
|
66
66
|
end
|
67
67
|
case byte
|
68
68
|
when %r{[a-zA-Z]}
|
@@ -71,7 +71,7 @@ module Gammo
|
|
71
71
|
when ?! then return scan_markup_declaration
|
72
72
|
when ?? then return comment_token(?? + scan_until_close_angle)
|
73
73
|
when ?/
|
74
|
-
return
|
74
|
+
return character_token(buffer) if scanner.eos?
|
75
75
|
# "</>" does not generate a token at all. treat this as empty comment token.
|
76
76
|
return comment_token('') if scan(/>/)
|
77
77
|
# Expects chars like "</a"
|
@@ -89,7 +89,7 @@ module Gammo
|
|
89
89
|
next
|
90
90
|
end
|
91
91
|
end
|
92
|
-
return
|
92
|
+
return character_token(buffer) if pos < scanner.pos
|
93
93
|
EOS
|
94
94
|
end
|
95
95
|
|
@@ -106,7 +106,7 @@ module Gammo
|
|
106
106
|
scan_raw_or_rcdata
|
107
107
|
else
|
108
108
|
@raw = true
|
109
|
-
|
109
|
+
character_token(scan_until(/\z/) || '')
|
110
110
|
end
|
111
111
|
if token && scanner.pos > pos
|
112
112
|
@convert_null = true
|
@@ -145,11 +145,11 @@ module Gammo
|
|
145
145
|
end
|
146
146
|
@raw = raw_tag != 'textarea' && raw_tag != 'title'
|
147
147
|
@raw_tag = ''
|
148
|
-
|
148
|
+
character_token(buffer) unless buffer.empty?
|
149
149
|
end
|
150
150
|
|
151
151
|
def scan_script
|
152
|
-
|
152
|
+
character_token(ScriptScanner.new(scanner, raw_tag: raw_tag).scan)
|
153
153
|
end
|
154
154
|
|
155
155
|
def scan_start_tag
|
@@ -325,7 +325,7 @@ module Gammo
|
|
325
325
|
buffer = ''
|
326
326
|
loop do
|
327
327
|
byte = scanner.get_byte
|
328
|
-
return
|
328
|
+
return character_token(buffer) unless byte
|
329
329
|
buffer << byte
|
330
330
|
case byte
|
331
331
|
when ?]
|
@@ -340,7 +340,7 @@ module Gammo
|
|
340
340
|
brackets = 0
|
341
341
|
end
|
342
342
|
end
|
343
|
-
|
343
|
+
character_token(buffer)
|
344
344
|
end
|
345
345
|
|
346
346
|
RAW_TAGS = ['iframe', 'noembed', 'noframes', 'noscript', 'plaintext', 'script', 'style', 'textarea', 'title', 'xmp'].freeze
|
@@ -350,8 +350,8 @@ module Gammo
|
|
350
350
|
RAW_TAGS.include?(name)
|
351
351
|
end
|
352
352
|
|
353
|
-
def
|
354
|
-
|
353
|
+
def character_token(text)
|
354
|
+
CharacterToken.new(text, raw: raw, convert_null: convert_null)
|
355
355
|
end
|
356
356
|
|
357
357
|
def error_token(pos)
|
data/lib/gammo/version.rb
CHANGED
data/lib/gammo/xpath/ast/axis.rb
CHANGED
data/lib/gammo/xpath/ast/path.rb
CHANGED
data/lib/gammo/xpath.rb
CHANGED
@@ -24,7 +24,7 @@ module Gammo
|
|
24
24
|
# @param [String] expr
|
25
25
|
# @param [Integer] result_type
|
26
26
|
# @!visibility private
|
27
|
-
def initialize(expr
|
27
|
+
def initialize(expr, result_type)
|
28
28
|
@expr = expr
|
29
29
|
@result_type = result_type
|
30
30
|
end
|
@@ -65,10 +65,9 @@ module Gammo
|
|
65
65
|
# @param [Gammo::Node] context_node
|
66
66
|
# @return [String, Integer, TrueClass, FalseClass, Gammo::XPath::NodeSet]
|
67
67
|
def xpath(expr, variables: {}, result_type: UNORDERED_NODE_ITERATOR_TYPE, context_node: self)
|
68
|
-
Traverser.new(
|
69
|
-
|
70
|
-
|
71
|
-
).evaluate(Context.new(node: context_node, variables: variables))
|
68
|
+
Traverser.new(expr, result_type).evaluate(
|
69
|
+
Context.new(node: context_node, variables: variables)
|
70
|
+
)
|
72
71
|
end
|
73
72
|
end
|
74
73
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gammo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- namusyaka
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-10-24 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Gammo is an implementation of the HTML5 parsing algorithm which conforms
|
14
14
|
the WHATWG specification with pure Ruby.
|
@@ -18,8 +18,8 @@ executables: []
|
|
18
18
|
extensions: []
|
19
19
|
extra_rdoc_files: []
|
20
20
|
files:
|
21
|
+
- ".github/workflows/test.yml"
|
21
22
|
- ".gitignore"
|
22
|
-
- ".travis.yml"
|
23
23
|
- Gemfile
|
24
24
|
- Gemfile.lock
|
25
25
|
- LICENSE.txt
|
@@ -29,7 +29,21 @@ files:
|
|
29
29
|
- lib/gammo.rb
|
30
30
|
- lib/gammo/attribute.rb
|
31
31
|
- lib/gammo/attributes.rb
|
32
|
+
- lib/gammo/css_selector.rb
|
33
|
+
- lib/gammo/css_selector/ast/combinator.rb
|
34
|
+
- lib/gammo/css_selector/ast/selector.rb
|
35
|
+
- lib/gammo/css_selector/ast/selector/attrib_selector.rb
|
36
|
+
- lib/gammo/css_selector/ast/selector/class_selector.rb
|
37
|
+
- lib/gammo/css_selector/ast/selector/id_selector.rb
|
38
|
+
- lib/gammo/css_selector/ast/selector/negation.rb
|
39
|
+
- lib/gammo/css_selector/ast/selector/pseudo_class.rb
|
40
|
+
- lib/gammo/css_selector/context.rb
|
41
|
+
- lib/gammo/css_selector/errors.rb
|
42
|
+
- lib/gammo/css_selector/node_set.rb
|
43
|
+
- lib/gammo/css_selector/parser.rb
|
44
|
+
- lib/gammo/css_selector/parser.y
|
32
45
|
- lib/gammo/fragment_parser.rb
|
46
|
+
- lib/gammo/modules/subclassify.rb
|
33
47
|
- lib/gammo/node.rb
|
34
48
|
- lib/gammo/parser.rb
|
35
49
|
- lib/gammo/parser/constants.rb
|
@@ -74,7 +88,6 @@ files:
|
|
74
88
|
- lib/gammo/xpath/ast/function.rb
|
75
89
|
- lib/gammo/xpath/ast/node_test.rb
|
76
90
|
- lib/gammo/xpath/ast/path.rb
|
77
|
-
- lib/gammo/xpath/ast/subclassify.rb
|
78
91
|
- lib/gammo/xpath/ast/value.rb
|
79
92
|
- lib/gammo/xpath/context.rb
|
80
93
|
- lib/gammo/xpath/errors.rb
|
data/.travis.yml
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
require 'gammo/xpath/errors'
|
2
|
-
|
3
|
-
module Gammo
|
4
|
-
module XPath
|
5
|
-
module AST
|
6
|
-
# Class for making subclass declarable/fetchable
|
7
|
-
# @!visibility private
|
8
|
-
module Subclassify
|
9
|
-
# @!visibility private
|
10
|
-
def map
|
11
|
-
@map ||= {}
|
12
|
-
end
|
13
|
-
|
14
|
-
# @!visibility private
|
15
|
-
def declare(key)
|
16
|
-
look_for_superclass.map[key] = self
|
17
|
-
end
|
18
|
-
|
19
|
-
# @!visibility private
|
20
|
-
def fetch(key)
|
21
|
-
fail NotFoundError, "%s not found" % key unless klass = map[key.to_sym]
|
22
|
-
klass
|
23
|
-
end
|
24
|
-
|
25
|
-
private
|
26
|
-
|
27
|
-
# @!visibility private
|
28
|
-
def look_for_superclass
|
29
|
-
klass = superclass
|
30
|
-
ancestors.find { |ancestor| ancestor == klass }
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|