stream_parser 0.2 → 0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/lib/stream_parser/html/tag.rb +39 -0
- data/lib/stream_parser/html.rb +112 -0
- data/lib/stream_parser/syntax_error.rb +1 -0
- data/lib/stream_parser/version.rb +1 -1
- data/lib/stream_parser.rb +24 -2
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1467f9df4e5b2bcf033a774a97ea24f61635a8249be9a1c7daad54d77251ede6
|
4
|
+
data.tar.gz: dbb178a552f4d194b25a1df946bd10ae546225bc140ce5047695370310562937
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: faadb8fe5f2706851da234ff47e7a6d01f78a13cc06f663d213e31cf179c35773ef30ae483619070822dad456500e66e1b5a22ba98eb4afb5935bdb6f8fd715f
|
7
|
+
data.tar.gz: 56ace2ac66d7ccdbf217d1e21250cc3d91f54320501a7d75232f613e49fb3d8920eddef54eeb6f20797c9cf5e57807256119a4fc92c0f5676624a25ef556cde3
|
data/README.md
CHANGED
@@ -55,5 +55,7 @@ QuotedStringFinder.parse(%q{Here "ar})
|
|
55
55
|
# => SyntaxError "Unbalanced Quotes in string"
|
56
56
|
```
|
57
57
|
|
58
|
+
Although we grab quoted values ourselfs in this example there is a `quoted_value` helper as well as a
|
59
|
+
`StreamParser::HTML` which provides additional helpers such as `next_tag`, `scan_for_tag`, `next_end_tag` and others.
|
58
60
|
|
59
61
|
|
@@ -0,0 +1,39 @@
|
|
1
|
+
class StreamParser::HTML::Tag
|
2
|
+
attr_reader :name, :attributes
|
3
|
+
attr_writer :self_closing
|
4
|
+
|
5
|
+
def initialize(name, closing=false)
|
6
|
+
@name = name
|
7
|
+
@attributes = {}
|
8
|
+
@closing = closing
|
9
|
+
@self_closing = false
|
10
|
+
end
|
11
|
+
|
12
|
+
def [](key)
|
13
|
+
@attributes[key.to_sym]
|
14
|
+
end
|
15
|
+
|
16
|
+
def []=(key, value)
|
17
|
+
@attributes[key.to_sym] = value
|
18
|
+
end
|
19
|
+
|
20
|
+
def self_closing?
|
21
|
+
@self_closing
|
22
|
+
end
|
23
|
+
|
24
|
+
def closing?
|
25
|
+
@closing
|
26
|
+
end
|
27
|
+
|
28
|
+
def opening?
|
29
|
+
!@closing
|
30
|
+
end
|
31
|
+
|
32
|
+
def match(name: nil, closing: nil, attributes: nil)
|
33
|
+
return false if name && @name != name
|
34
|
+
return false if !closing.nil? && @closing != closing
|
35
|
+
return false if attributes && !attributes.all? { |k,v| @attributes[k] == v }
|
36
|
+
|
37
|
+
true
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
module StreamParser::HTML
|
2
|
+
|
3
|
+
autoload :Tag, File.expand_path('../html/tag', __FILE__)
|
4
|
+
|
5
|
+
def self.included(base)
|
6
|
+
base.include(StreamParser)
|
7
|
+
end
|
8
|
+
|
9
|
+
def next_tag(old_index: nil)
|
10
|
+
old_index ||= @index
|
11
|
+
return unless scan_until(/<\s*/)
|
12
|
+
start_index = @index-1
|
13
|
+
|
14
|
+
while peek(3) == '!--'
|
15
|
+
forward(3)
|
16
|
+
scan_until(/-->\s*/)
|
17
|
+
scan_until(/<\s*/)
|
18
|
+
end
|
19
|
+
|
20
|
+
# HTMLComment.new(pre_match)
|
21
|
+
if peek(1) == '/'
|
22
|
+
scan_until(/[^>\s\/]+/)
|
23
|
+
scan_tag(match, old_index: old_index, start_index: start_index, closing: true)
|
24
|
+
else
|
25
|
+
scan_until(/[^>\s\/]+/)
|
26
|
+
scan_tag(match, old_index: old_index, start_index: start_index)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def scan_for_tag(name, closing: nil, **attributes)
|
31
|
+
old_index ||= @index
|
32
|
+
tag = next_tag
|
33
|
+
while tag && !tag.match(name: name, closing: closing, attributes: attributes)
|
34
|
+
tag = next_tag(old_index: old_index)
|
35
|
+
end
|
36
|
+
tag
|
37
|
+
end
|
38
|
+
|
39
|
+
def scan_for_closing_tag
|
40
|
+
old_index = @index
|
41
|
+
heap = []
|
42
|
+
|
43
|
+
tag = next_tag
|
44
|
+
puts tag.inspect
|
45
|
+
while tag && !(tag.closing? && heap.empty?)
|
46
|
+
if !tag.closing? && !tag.self_closing?
|
47
|
+
heap << tag
|
48
|
+
elsif !tag.self_closing?
|
49
|
+
heap.pop
|
50
|
+
end
|
51
|
+
tag = next_tag(old_index: old_index)
|
52
|
+
end
|
53
|
+
@old_index = old_index
|
54
|
+
tag
|
55
|
+
end
|
56
|
+
|
57
|
+
def scan_tag(name, closing: false, old_index:, start_index:)
|
58
|
+
tag = Tag.new(name, closing)
|
59
|
+
|
60
|
+
while !eos?
|
61
|
+
gobble(/\s+/)
|
62
|
+
key = case peek(1)
|
63
|
+
when '>'
|
64
|
+
forward(1)
|
65
|
+
@old_index = old_index
|
66
|
+
@match = @source[start_index...@index]
|
67
|
+
return tag
|
68
|
+
when '/'
|
69
|
+
forward(1)
|
70
|
+
gobble(/\s*\>/)
|
71
|
+
@old_index = old_index
|
72
|
+
@match = @source[start_index...@index]
|
73
|
+
tag.self_closing = true
|
74
|
+
return tag
|
75
|
+
when '"', "'"
|
76
|
+
quote_char = next_char
|
77
|
+
forward(1)
|
78
|
+
quoted_value(quote_char)
|
79
|
+
else
|
80
|
+
scan_until(/[^>\s\/=]+/)[0]
|
81
|
+
end
|
82
|
+
|
83
|
+
tag[key] = if next?(/\s*=/)
|
84
|
+
gobble(/\s*=/)
|
85
|
+
html_tag_value
|
86
|
+
else
|
87
|
+
true
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
@old_index = old_index
|
92
|
+
@match = @source[start_index...@index]
|
93
|
+
tag
|
94
|
+
end
|
95
|
+
|
96
|
+
def html_tag_value
|
97
|
+
gobble(/\s+/)
|
98
|
+
case peek(1)
|
99
|
+
when '"', "'"
|
100
|
+
quote_char = next_char
|
101
|
+
forward(1)
|
102
|
+
quoted_value(quote_char)
|
103
|
+
else
|
104
|
+
scan_until(/[^>\s\/=]+/)[0]
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def next_end_tag(name)
|
109
|
+
scan_until(/<\/\s*li>/)
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
class StreamParser::SyntaxError < StandardError; end
|
data/lib/stream_parser.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
module StreamParser
|
2
2
|
|
3
|
+
autoload :HTML, 'stream_parser/html'
|
4
|
+
autoload :SyntaxError, 'stream_parser/syntax_error'
|
5
|
+
|
3
6
|
def self.included(base)
|
4
7
|
base.extend ClassMethods
|
5
8
|
end
|
@@ -29,8 +32,8 @@ module StreamParser
|
|
29
32
|
|
30
33
|
def scan_until(r)
|
31
34
|
r = Regexp.new(Regexp.escape(r)) if r.is_a?(String)
|
32
|
-
index = @source.index(r, @index)
|
33
35
|
match = @source.match(r, @index)
|
36
|
+
index = match&.begin(0)
|
34
37
|
|
35
38
|
if match
|
36
39
|
@match = match.to_s
|
@@ -43,6 +46,13 @@ module StreamParser
|
|
43
46
|
end
|
44
47
|
match
|
45
48
|
end
|
49
|
+
|
50
|
+
def gobble(r)
|
51
|
+
m = @source.match(r, @index)
|
52
|
+
if m&.begin(0) == @index
|
53
|
+
scan_until(r)
|
54
|
+
end
|
55
|
+
end
|
46
56
|
|
47
57
|
def pre_match
|
48
58
|
@source[@old_index...(@index-(@match&.size || 0))]
|
@@ -70,6 +80,18 @@ module StreamParser
|
|
70
80
|
@source[@index-1]
|
71
81
|
end
|
72
82
|
|
83
|
+
def next?(r)
|
84
|
+
@source.match(r, @index)&.begin(0) == @index
|
85
|
+
end
|
86
|
+
|
87
|
+
def peek(n=1)
|
88
|
+
if n.is_a?(Regexp)
|
89
|
+
@source.match(n, @index)
|
90
|
+
else
|
91
|
+
@source.slice(@index, n)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
73
95
|
def next_word
|
74
96
|
nw = @source.match(/\s*(\S+)/, @index)
|
75
97
|
nw.nil? ? nil : nw[1]
|
@@ -107,7 +129,7 @@ module StreamParser
|
|
107
129
|
ret_value = ""
|
108
130
|
while scan_until(/(#{quote_char}|\Z)/)
|
109
131
|
if match != quote_char
|
110
|
-
raise
|
132
|
+
raise StreamParser::SyntaxError.new("Unbalanced quotes #{quote_char}")
|
111
133
|
elsif !escape_chars.include?(pre_match[-1])
|
112
134
|
ret_value << pre_match
|
113
135
|
return ret_value
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stream_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.4'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jon Bracy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-04-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -118,6 +118,9 @@ extra_rdoc_files:
|
|
118
118
|
files:
|
119
119
|
- README.md
|
120
120
|
- lib/stream_parser.rb
|
121
|
+
- lib/stream_parser/html.rb
|
122
|
+
- lib/stream_parser/html/tag.rb
|
123
|
+
- lib/stream_parser/syntax_error.rb
|
121
124
|
- lib/stream_parser/version.rb
|
122
125
|
homepage: https://github.com/malomalo/stream_parser
|
123
126
|
licenses:
|
@@ -140,7 +143,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
140
143
|
- !ruby/object:Gem::Version
|
141
144
|
version: '0'
|
142
145
|
requirements: []
|
143
|
-
rubygems_version: 3.
|
146
|
+
rubygems_version: 3.5.4
|
144
147
|
signing_key:
|
145
148
|
specification_version: 4
|
146
149
|
summary: SAX/Stream style parse helpers
|