stream_parser 0.2 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/lib/stream_parser/html/tag.rb +39 -0
- data/lib/stream_parser/html.rb +112 -0
- data/lib/stream_parser/version.rb +1 -1
- data/lib/stream_parser.rb +22 -1
- metadata +5 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3cc1101615985f944d1a8f88cd37509d985860be8950fed026bd75399184ce2d
|
|
4
|
+
data.tar.gz: c5088acc9378bb08a988a4d53c6fc3d9ec0d92ddfb20a4484a8c5b83e5ce6031
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8519f3bbcd0dc82e16fb52d366f893fc2465720ac626aa7822604a294c55baed11567bf62ae4b2f6971de0df8f67c2b5722413f1fdd8a86466b894810ef105ec
|
|
7
|
+
data.tar.gz: 7c5888f18e4dd6f65d86d117826294bfa4d3aac80a9c14c529764ac93243beb06fbef751107374818b5497fd9e571cfeab5f585c4012cf6763b440d5d23c8885
|
data/README.md
CHANGED
|
@@ -55,5 +55,7 @@ QuotedStringFinder.parse(%q{Here "ar})
|
|
|
55
55
|
# => SyntaxError "Unbalanced Quotes in string"
|
|
56
56
|
```
|
|
57
57
|
|
|
58
|
+
Although we grab quoted values ourselfs in this example there is a `quoted_value` helper as well as a
|
|
59
|
+
`StreamParser::HTML` which provides additional helpers such as `next_tag`, `scan_for_tag`, `next_end_tag` and others.
|
|
58
60
|
|
|
59
61
|
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
class StreamParser::HTML::Tag
|
|
2
|
+
attr_reader :name, :attributes
|
|
3
|
+
attr_writer :self_closing
|
|
4
|
+
|
|
5
|
+
def initialize(name, closing=false)
|
|
6
|
+
@name = name
|
|
7
|
+
@attributes = {}
|
|
8
|
+
@closing = closing
|
|
9
|
+
@self_closing = false
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def [](key)
|
|
13
|
+
@attributes[key.to_sym]
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def []=(key, value)
|
|
17
|
+
@attributes[key.to_sym] = value
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def self_closing?
|
|
21
|
+
@self_closing
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def closing?
|
|
25
|
+
@closing
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def opening?
|
|
29
|
+
!@closing
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def match(name: nil, closing: nil, attributes: nil)
|
|
33
|
+
return false if name && @name != name
|
|
34
|
+
return false if !closing.nil? && @closing != closing
|
|
35
|
+
return false if attributes && !attributes.all? { |k,v| @attributes[k] == v }
|
|
36
|
+
|
|
37
|
+
true
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
module StreamParser::HTML
|
|
2
|
+
|
|
3
|
+
autoload :Tag, File.expand_path('../html/tag', __FILE__)
|
|
4
|
+
|
|
5
|
+
def self.included(base)
|
|
6
|
+
base.include(StreamParser)
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def next_tag(old_index: nil)
|
|
10
|
+
old_index ||= @index
|
|
11
|
+
return unless scan_until(/<\s*/)
|
|
12
|
+
start_index = @index-1
|
|
13
|
+
|
|
14
|
+
while peek(3) == '!--'
|
|
15
|
+
forward(3)
|
|
16
|
+
scan_until(/-->\s*/)
|
|
17
|
+
scan_until(/<\s*/)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# HTMLComment.new(pre_match)
|
|
21
|
+
if peek(1) == '/'
|
|
22
|
+
scan_until(/[^>\s\/]+/)
|
|
23
|
+
scan_tag(match, old_index: old_index, start_index: start_index, closing: true)
|
|
24
|
+
else
|
|
25
|
+
scan_until(/[^>\s\/]+/)
|
|
26
|
+
scan_tag(match, old_index: old_index, start_index: start_index)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def scan_for_tag(name, closing: nil, **attributes)
|
|
31
|
+
old_index ||= @index
|
|
32
|
+
tag = next_tag
|
|
33
|
+
while tag && !tag.match(name: name, closing: closing, attributes: attributes)
|
|
34
|
+
tag = next_tag(old_index: old_index)
|
|
35
|
+
end
|
|
36
|
+
tag
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def scan_for_closing_tag
|
|
40
|
+
old_index = @index
|
|
41
|
+
heap = []
|
|
42
|
+
|
|
43
|
+
tag = next_tag
|
|
44
|
+
puts tag.inspect
|
|
45
|
+
while tag && !(tag.closing? && heap.empty?)
|
|
46
|
+
if !tag.closing? && !tag.self_closing?
|
|
47
|
+
heap << tag
|
|
48
|
+
elsif !tag.self_closing?
|
|
49
|
+
heap.pop
|
|
50
|
+
end
|
|
51
|
+
tag = next_tag(old_index: old_index)
|
|
52
|
+
end
|
|
53
|
+
@old_index = old_index
|
|
54
|
+
tag
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def scan_tag(name, closing: false, old_index:, start_index:)
|
|
58
|
+
tag = Tag.new(name, closing)
|
|
59
|
+
|
|
60
|
+
while !eos?
|
|
61
|
+
gobble(/\s+/)
|
|
62
|
+
key = case peek(1)
|
|
63
|
+
when '>'
|
|
64
|
+
forward(1)
|
|
65
|
+
@old_index = old_index
|
|
66
|
+
@match = @source[start_index...@index]
|
|
67
|
+
return tag
|
|
68
|
+
when '/'
|
|
69
|
+
forward(1)
|
|
70
|
+
gobble(/\s*\>/)
|
|
71
|
+
@old_index = old_index
|
|
72
|
+
@match = @source[start_index...@index]
|
|
73
|
+
tag.self_closing = true
|
|
74
|
+
return tag
|
|
75
|
+
when '"', "'"
|
|
76
|
+
quote_char = next_char
|
|
77
|
+
forward(1)
|
|
78
|
+
quoted_value(quote_char)
|
|
79
|
+
else
|
|
80
|
+
scan_until(/[^>\s\/=]+/)[0]
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
tag[key] = if next?(/\s*=/)
|
|
84
|
+
gobble(/\s*=/)
|
|
85
|
+
html_tag_value
|
|
86
|
+
else
|
|
87
|
+
true
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
@old_index = old_index
|
|
92
|
+
@match = @source[start_index...@index]
|
|
93
|
+
tag
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def html_tag_value
|
|
97
|
+
gobble(/\s+/)
|
|
98
|
+
case peek(1)
|
|
99
|
+
when '"', "'"
|
|
100
|
+
quote_char = next_char
|
|
101
|
+
forward(1)
|
|
102
|
+
quoted_value(quote_char)
|
|
103
|
+
else
|
|
104
|
+
scan_until(/[^>\s\/=]+/)[0]
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def next_end_tag(name)
|
|
109
|
+
scan_until(/<\/\s*li>/)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
end
|
data/lib/stream_parser.rb
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
module StreamParser
|
|
2
2
|
|
|
3
|
+
autoload :HTML, File.expand_path('../stream_parser/html', __FILE__)
|
|
4
|
+
|
|
3
5
|
def self.included(base)
|
|
4
6
|
base.extend ClassMethods
|
|
5
7
|
end
|
|
@@ -29,8 +31,8 @@ module StreamParser
|
|
|
29
31
|
|
|
30
32
|
def scan_until(r)
|
|
31
33
|
r = Regexp.new(Regexp.escape(r)) if r.is_a?(String)
|
|
32
|
-
index = @source.index(r, @index)
|
|
33
34
|
match = @source.match(r, @index)
|
|
35
|
+
index = match&.begin(0)
|
|
34
36
|
|
|
35
37
|
if match
|
|
36
38
|
@match = match.to_s
|
|
@@ -43,6 +45,13 @@ module StreamParser
|
|
|
43
45
|
end
|
|
44
46
|
match
|
|
45
47
|
end
|
|
48
|
+
|
|
49
|
+
def gobble(r)
|
|
50
|
+
m = @source.match(r, @index)
|
|
51
|
+
if m&.begin(0) == @index
|
|
52
|
+
scan_until(r)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
46
55
|
|
|
47
56
|
def pre_match
|
|
48
57
|
@source[@old_index...(@index-(@match&.size || 0))]
|
|
@@ -70,6 +79,18 @@ module StreamParser
|
|
|
70
79
|
@source[@index-1]
|
|
71
80
|
end
|
|
72
81
|
|
|
82
|
+
def next?(r)
|
|
83
|
+
@source.match(r, @index)&.begin(0) == @index
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def peek(n=1)
|
|
87
|
+
if n.is_a?(Regexp)
|
|
88
|
+
@source.match(n, @index)
|
|
89
|
+
else
|
|
90
|
+
@source.slice(@index, n)
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
73
94
|
def next_word
|
|
74
95
|
nw = @source.match(/\s*(\S+)/, @index)
|
|
75
96
|
nw.nil? ? nil : nw[1]
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: stream_parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: '0.
|
|
4
|
+
version: '0.3'
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Jon Bracy
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2023-08-09 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake
|
|
@@ -118,6 +118,8 @@ extra_rdoc_files:
|
|
|
118
118
|
files:
|
|
119
119
|
- README.md
|
|
120
120
|
- lib/stream_parser.rb
|
|
121
|
+
- lib/stream_parser/html.rb
|
|
122
|
+
- lib/stream_parser/html/tag.rb
|
|
121
123
|
- lib/stream_parser/version.rb
|
|
122
124
|
homepage: https://github.com/malomalo/stream_parser
|
|
123
125
|
licenses:
|
|
@@ -140,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
140
142
|
- !ruby/object:Gem::Version
|
|
141
143
|
version: '0'
|
|
142
144
|
requirements: []
|
|
143
|
-
rubygems_version: 3.
|
|
145
|
+
rubygems_version: 3.4.13
|
|
144
146
|
signing_key:
|
|
145
147
|
specification_version: 4
|
|
146
148
|
summary: SAX/Stream style parse helpers
|