stream_parser 0.2 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/lib/stream_parser/html/tag.rb +39 -0
- data/lib/stream_parser/html.rb +112 -0
- data/lib/stream_parser/syntax_error.rb +1 -0
- data/lib/stream_parser/version.rb +1 -1
- data/lib/stream_parser.rb +24 -2
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1467f9df4e5b2bcf033a774a97ea24f61635a8249be9a1c7daad54d77251ede6
|
4
|
+
data.tar.gz: dbb178a552f4d194b25a1df946bd10ae546225bc140ce5047695370310562937
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: faadb8fe5f2706851da234ff47e7a6d01f78a13cc06f663d213e31cf179c35773ef30ae483619070822dad456500e66e1b5a22ba98eb4afb5935bdb6f8fd715f
|
7
|
+
data.tar.gz: 56ace2ac66d7ccdbf217d1e21250cc3d91f54320501a7d75232f613e49fb3d8920eddef54eeb6f20797c9cf5e57807256119a4fc92c0f5676624a25ef556cde3
|
data/README.md
CHANGED
@@ -55,5 +55,7 @@ QuotedStringFinder.parse(%q{Here "ar})
|
|
55
55
|
# => SyntaxError "Unbalanced Quotes in string"
|
56
56
|
```
|
57
57
|
|
58
|
+
Although we grab quoted values ourselfs in this example there is a `quoted_value` helper as well as a
|
59
|
+
`StreamParser::HTML` which provides additional helpers such as `next_tag`, `scan_for_tag`, `next_end_tag` and others.
|
58
60
|
|
59
61
|
|
@@ -0,0 +1,39 @@
|
|
1
|
+
class StreamParser::HTML::Tag
|
2
|
+
attr_reader :name, :attributes
|
3
|
+
attr_writer :self_closing
|
4
|
+
|
5
|
+
def initialize(name, closing=false)
|
6
|
+
@name = name
|
7
|
+
@attributes = {}
|
8
|
+
@closing = closing
|
9
|
+
@self_closing = false
|
10
|
+
end
|
11
|
+
|
12
|
+
def [](key)
|
13
|
+
@attributes[key.to_sym]
|
14
|
+
end
|
15
|
+
|
16
|
+
def []=(key, value)
|
17
|
+
@attributes[key.to_sym] = value
|
18
|
+
end
|
19
|
+
|
20
|
+
def self_closing?
|
21
|
+
@self_closing
|
22
|
+
end
|
23
|
+
|
24
|
+
def closing?
|
25
|
+
@closing
|
26
|
+
end
|
27
|
+
|
28
|
+
def opening?
|
29
|
+
!@closing
|
30
|
+
end
|
31
|
+
|
32
|
+
def match(name: nil, closing: nil, attributes: nil)
|
33
|
+
return false if name && @name != name
|
34
|
+
return false if !closing.nil? && @closing != closing
|
35
|
+
return false if attributes && !attributes.all? { |k,v| @attributes[k] == v }
|
36
|
+
|
37
|
+
true
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
module StreamParser::HTML
|
2
|
+
|
3
|
+
autoload :Tag, File.expand_path('../html/tag', __FILE__)
|
4
|
+
|
5
|
+
def self.included(base)
|
6
|
+
base.include(StreamParser)
|
7
|
+
end
|
8
|
+
|
9
|
+
def next_tag(old_index: nil)
|
10
|
+
old_index ||= @index
|
11
|
+
return unless scan_until(/<\s*/)
|
12
|
+
start_index = @index-1
|
13
|
+
|
14
|
+
while peek(3) == '!--'
|
15
|
+
forward(3)
|
16
|
+
scan_until(/-->\s*/)
|
17
|
+
scan_until(/<\s*/)
|
18
|
+
end
|
19
|
+
|
20
|
+
# HTMLComment.new(pre_match)
|
21
|
+
if peek(1) == '/'
|
22
|
+
scan_until(/[^>\s\/]+/)
|
23
|
+
scan_tag(match, old_index: old_index, start_index: start_index, closing: true)
|
24
|
+
else
|
25
|
+
scan_until(/[^>\s\/]+/)
|
26
|
+
scan_tag(match, old_index: old_index, start_index: start_index)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def scan_for_tag(name, closing: nil, **attributes)
|
31
|
+
old_index ||= @index
|
32
|
+
tag = next_tag
|
33
|
+
while tag && !tag.match(name: name, closing: closing, attributes: attributes)
|
34
|
+
tag = next_tag(old_index: old_index)
|
35
|
+
end
|
36
|
+
tag
|
37
|
+
end
|
38
|
+
|
39
|
+
def scan_for_closing_tag
|
40
|
+
old_index = @index
|
41
|
+
heap = []
|
42
|
+
|
43
|
+
tag = next_tag
|
44
|
+
puts tag.inspect
|
45
|
+
while tag && !(tag.closing? && heap.empty?)
|
46
|
+
if !tag.closing? && !tag.self_closing?
|
47
|
+
heap << tag
|
48
|
+
elsif !tag.self_closing?
|
49
|
+
heap.pop
|
50
|
+
end
|
51
|
+
tag = next_tag(old_index: old_index)
|
52
|
+
end
|
53
|
+
@old_index = old_index
|
54
|
+
tag
|
55
|
+
end
|
56
|
+
|
57
|
+
def scan_tag(name, closing: false, old_index:, start_index:)
|
58
|
+
tag = Tag.new(name, closing)
|
59
|
+
|
60
|
+
while !eos?
|
61
|
+
gobble(/\s+/)
|
62
|
+
key = case peek(1)
|
63
|
+
when '>'
|
64
|
+
forward(1)
|
65
|
+
@old_index = old_index
|
66
|
+
@match = @source[start_index...@index]
|
67
|
+
return tag
|
68
|
+
when '/'
|
69
|
+
forward(1)
|
70
|
+
gobble(/\s*\>/)
|
71
|
+
@old_index = old_index
|
72
|
+
@match = @source[start_index...@index]
|
73
|
+
tag.self_closing = true
|
74
|
+
return tag
|
75
|
+
when '"', "'"
|
76
|
+
quote_char = next_char
|
77
|
+
forward(1)
|
78
|
+
quoted_value(quote_char)
|
79
|
+
else
|
80
|
+
scan_until(/[^>\s\/=]+/)[0]
|
81
|
+
end
|
82
|
+
|
83
|
+
tag[key] = if next?(/\s*=/)
|
84
|
+
gobble(/\s*=/)
|
85
|
+
html_tag_value
|
86
|
+
else
|
87
|
+
true
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
@old_index = old_index
|
92
|
+
@match = @source[start_index...@index]
|
93
|
+
tag
|
94
|
+
end
|
95
|
+
|
96
|
+
def html_tag_value
|
97
|
+
gobble(/\s+/)
|
98
|
+
case peek(1)
|
99
|
+
when '"', "'"
|
100
|
+
quote_char = next_char
|
101
|
+
forward(1)
|
102
|
+
quoted_value(quote_char)
|
103
|
+
else
|
104
|
+
scan_until(/[^>\s\/=]+/)[0]
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def next_end_tag(name)
|
109
|
+
scan_until(/<\/\s*li>/)
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
class StreamParser::SyntaxError < StandardError; end
|
data/lib/stream_parser.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
module StreamParser
|
2
2
|
|
3
|
+
autoload :HTML, 'stream_parser/html'
|
4
|
+
autoload :SyntaxError, 'stream_parser/syntax_error'
|
5
|
+
|
3
6
|
def self.included(base)
|
4
7
|
base.extend ClassMethods
|
5
8
|
end
|
@@ -29,8 +32,8 @@ module StreamParser
|
|
29
32
|
|
30
33
|
def scan_until(r)
|
31
34
|
r = Regexp.new(Regexp.escape(r)) if r.is_a?(String)
|
32
|
-
index = @source.index(r, @index)
|
33
35
|
match = @source.match(r, @index)
|
36
|
+
index = match&.begin(0)
|
34
37
|
|
35
38
|
if match
|
36
39
|
@match = match.to_s
|
@@ -43,6 +46,13 @@ module StreamParser
|
|
43
46
|
end
|
44
47
|
match
|
45
48
|
end
|
49
|
+
|
50
|
+
def gobble(r)
|
51
|
+
m = @source.match(r, @index)
|
52
|
+
if m&.begin(0) == @index
|
53
|
+
scan_until(r)
|
54
|
+
end
|
55
|
+
end
|
46
56
|
|
47
57
|
def pre_match
|
48
58
|
@source[@old_index...(@index-(@match&.size || 0))]
|
@@ -70,6 +80,18 @@ module StreamParser
|
|
70
80
|
@source[@index-1]
|
71
81
|
end
|
72
82
|
|
83
|
+
def next?(r)
|
84
|
+
@source.match(r, @index)&.begin(0) == @index
|
85
|
+
end
|
86
|
+
|
87
|
+
def peek(n=1)
|
88
|
+
if n.is_a?(Regexp)
|
89
|
+
@source.match(n, @index)
|
90
|
+
else
|
91
|
+
@source.slice(@index, n)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
73
95
|
def next_word
|
74
96
|
nw = @source.match(/\s*(\S+)/, @index)
|
75
97
|
nw.nil? ? nil : nw[1]
|
@@ -107,7 +129,7 @@ module StreamParser
|
|
107
129
|
ret_value = ""
|
108
130
|
while scan_until(/(#{quote_char}|\Z)/)
|
109
131
|
if match != quote_char
|
110
|
-
raise
|
132
|
+
raise StreamParser::SyntaxError.new("Unbalanced quotes #{quote_char}")
|
111
133
|
elsif !escape_chars.include?(pre_match[-1])
|
112
134
|
ret_value << pre_match
|
113
135
|
return ret_value
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stream_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.4'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jon Bracy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-04-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -118,6 +118,9 @@ extra_rdoc_files:
|
|
118
118
|
files:
|
119
119
|
- README.md
|
120
120
|
- lib/stream_parser.rb
|
121
|
+
- lib/stream_parser/html.rb
|
122
|
+
- lib/stream_parser/html/tag.rb
|
123
|
+
- lib/stream_parser/syntax_error.rb
|
121
124
|
- lib/stream_parser/version.rb
|
122
125
|
homepage: https://github.com/malomalo/stream_parser
|
123
126
|
licenses:
|
@@ -140,7 +143,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
140
143
|
- !ruby/object:Gem::Version
|
141
144
|
version: '0'
|
142
145
|
requirements: []
|
143
|
-
rubygems_version: 3.
|
146
|
+
rubygems_version: 3.5.4
|
144
147
|
signing_key:
|
145
148
|
specification_version: 4
|
146
149
|
summary: SAX/Stream style parse helpers
|