stream_parser 0.1 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/lib/stream_parser/html/tag.rb +39 -0
- data/lib/stream_parser/html.rb +112 -0
- data/lib/stream_parser/version.rb +1 -1
- data/lib/stream_parser.rb +40 -5
- metadata +19 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3cc1101615985f944d1a8f88cd37509d985860be8950fed026bd75399184ce2d
|
4
|
+
data.tar.gz: c5088acc9378bb08a988a4d53c6fc3d9ec0d92ddfb20a4484a8c5b83e5ce6031
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8519f3bbcd0dc82e16fb52d366f893fc2465720ac626aa7822604a294c55baed11567bf62ae4b2f6971de0df8f67c2b5722413f1fdd8a86466b894810ef105ec
|
7
|
+
data.tar.gz: 7c5888f18e4dd6f65d86d117826294bfa4d3aac80a9c14c529764ac93243beb06fbef751107374818b5497fd9e571cfeab5f585c4012cf6763b440d5d23c8885
|
data/README.md
CHANGED
@@ -55,5 +55,7 @@ QuotedStringFinder.parse(%q{Here "ar})
|
|
55
55
|
# => SyntaxError "Unbalanced Quotes in string"
|
56
56
|
```
|
57
57
|
|
58
|
+
Although we grab quoted values ourselfs in this example there is a `quoted_value` helper as well as a
|
59
|
+
`StreamParser::HTML` which provides additional helpers such as `next_tag`, `scan_for_tag`, `next_end_tag` and others.
|
58
60
|
|
59
61
|
|
@@ -0,0 +1,39 @@
|
|
1
|
+
class StreamParser::HTML::Tag
|
2
|
+
attr_reader :name, :attributes
|
3
|
+
attr_writer :self_closing
|
4
|
+
|
5
|
+
def initialize(name, closing=false)
|
6
|
+
@name = name
|
7
|
+
@attributes = {}
|
8
|
+
@closing = closing
|
9
|
+
@self_closing = false
|
10
|
+
end
|
11
|
+
|
12
|
+
def [](key)
|
13
|
+
@attributes[key.to_sym]
|
14
|
+
end
|
15
|
+
|
16
|
+
def []=(key, value)
|
17
|
+
@attributes[key.to_sym] = value
|
18
|
+
end
|
19
|
+
|
20
|
+
def self_closing?
|
21
|
+
@self_closing
|
22
|
+
end
|
23
|
+
|
24
|
+
def closing?
|
25
|
+
@closing
|
26
|
+
end
|
27
|
+
|
28
|
+
def opening?
|
29
|
+
!@closing
|
30
|
+
end
|
31
|
+
|
32
|
+
def match(name: nil, closing: nil, attributes: nil)
|
33
|
+
return false if name && @name != name
|
34
|
+
return false if !closing.nil? && @closing != closing
|
35
|
+
return false if attributes && !attributes.all? { |k,v| @attributes[k] == v }
|
36
|
+
|
37
|
+
true
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
module StreamParser::HTML
|
2
|
+
|
3
|
+
autoload :Tag, File.expand_path('../html/tag', __FILE__)
|
4
|
+
|
5
|
+
def self.included(base)
|
6
|
+
base.include(StreamParser)
|
7
|
+
end
|
8
|
+
|
9
|
+
def next_tag(old_index: nil)
|
10
|
+
old_index ||= @index
|
11
|
+
return unless scan_until(/<\s*/)
|
12
|
+
start_index = @index-1
|
13
|
+
|
14
|
+
while peek(3) == '!--'
|
15
|
+
forward(3)
|
16
|
+
scan_until(/-->\s*/)
|
17
|
+
scan_until(/<\s*/)
|
18
|
+
end
|
19
|
+
|
20
|
+
# HTMLComment.new(pre_match)
|
21
|
+
if peek(1) == '/'
|
22
|
+
scan_until(/[^>\s\/]+/)
|
23
|
+
scan_tag(match, old_index: old_index, start_index: start_index, closing: true)
|
24
|
+
else
|
25
|
+
scan_until(/[^>\s\/]+/)
|
26
|
+
scan_tag(match, old_index: old_index, start_index: start_index)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def scan_for_tag(name, closing: nil, **attributes)
|
31
|
+
old_index ||= @index
|
32
|
+
tag = next_tag
|
33
|
+
while tag && !tag.match(name: name, closing: closing, attributes: attributes)
|
34
|
+
tag = next_tag(old_index: old_index)
|
35
|
+
end
|
36
|
+
tag
|
37
|
+
end
|
38
|
+
|
39
|
+
def scan_for_closing_tag
|
40
|
+
old_index = @index
|
41
|
+
heap = []
|
42
|
+
|
43
|
+
tag = next_tag
|
44
|
+
puts tag.inspect
|
45
|
+
while tag && !(tag.closing? && heap.empty?)
|
46
|
+
if !tag.closing? && !tag.self_closing?
|
47
|
+
heap << tag
|
48
|
+
elsif !tag.self_closing?
|
49
|
+
heap.pop
|
50
|
+
end
|
51
|
+
tag = next_tag(old_index: old_index)
|
52
|
+
end
|
53
|
+
@old_index = old_index
|
54
|
+
tag
|
55
|
+
end
|
56
|
+
|
57
|
+
def scan_tag(name, closing: false, old_index:, start_index:)
|
58
|
+
tag = Tag.new(name, closing)
|
59
|
+
|
60
|
+
while !eos?
|
61
|
+
gobble(/\s+/)
|
62
|
+
key = case peek(1)
|
63
|
+
when '>'
|
64
|
+
forward(1)
|
65
|
+
@old_index = old_index
|
66
|
+
@match = @source[start_index...@index]
|
67
|
+
return tag
|
68
|
+
when '/'
|
69
|
+
forward(1)
|
70
|
+
gobble(/\s*\>/)
|
71
|
+
@old_index = old_index
|
72
|
+
@match = @source[start_index...@index]
|
73
|
+
tag.self_closing = true
|
74
|
+
return tag
|
75
|
+
when '"', "'"
|
76
|
+
quote_char = next_char
|
77
|
+
forward(1)
|
78
|
+
quoted_value(quote_char)
|
79
|
+
else
|
80
|
+
scan_until(/[^>\s\/=]+/)[0]
|
81
|
+
end
|
82
|
+
|
83
|
+
tag[key] = if next?(/\s*=/)
|
84
|
+
gobble(/\s*=/)
|
85
|
+
html_tag_value
|
86
|
+
else
|
87
|
+
true
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
@old_index = old_index
|
92
|
+
@match = @source[start_index...@index]
|
93
|
+
tag
|
94
|
+
end
|
95
|
+
|
96
|
+
def html_tag_value
|
97
|
+
gobble(/\s+/)
|
98
|
+
case peek(1)
|
99
|
+
when '"', "'"
|
100
|
+
quote_char = next_char
|
101
|
+
forward(1)
|
102
|
+
quoted_value(quote_char)
|
103
|
+
else
|
104
|
+
scan_until(/[^>\s\/=]+/)[0]
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def next_end_tag(name)
|
109
|
+
scan_until(/<\/\s*li>/)
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
data/lib/stream_parser.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
module StreamParser
|
2
2
|
|
3
|
+
autoload :HTML, File.expand_path('../stream_parser/html', __FILE__)
|
4
|
+
|
3
5
|
def self.included(base)
|
4
6
|
base.extend ClassMethods
|
5
7
|
end
|
@@ -24,13 +26,13 @@ module StreamParser
|
|
24
26
|
# end
|
25
27
|
|
26
28
|
def eos?
|
27
|
-
@index >= (@source.size -
|
29
|
+
@index >= (@source.size - 0)
|
28
30
|
end
|
29
31
|
|
30
32
|
def scan_until(r)
|
31
33
|
r = Regexp.new(Regexp.escape(r)) if r.is_a?(String)
|
32
|
-
index = @source.index(r, @index)
|
33
34
|
match = @source.match(r, @index)
|
35
|
+
index = match&.begin(0)
|
34
36
|
|
35
37
|
if match
|
36
38
|
@match = match.to_s
|
@@ -39,13 +41,20 @@ module StreamParser
|
|
39
41
|
else
|
40
42
|
@match = nil
|
41
43
|
@old_index = @index
|
42
|
-
@index = @source.size
|
44
|
+
@index = @source.size
|
43
45
|
end
|
44
46
|
match
|
45
47
|
end
|
48
|
+
|
49
|
+
def gobble(r)
|
50
|
+
m = @source.match(r, @index)
|
51
|
+
if m&.begin(0) == @index
|
52
|
+
scan_until(r)
|
53
|
+
end
|
54
|
+
end
|
46
55
|
|
47
56
|
def pre_match
|
48
|
-
@source[@old_index...(@index
|
57
|
+
@source[@old_index...(@index-(@match&.size || 0))]
|
49
58
|
end
|
50
59
|
|
51
60
|
def rewind(by=1)
|
@@ -63,13 +72,25 @@ module StreamParser
|
|
63
72
|
end
|
64
73
|
|
65
74
|
def next_char
|
66
|
-
@source[@index
|
75
|
+
@source[@index]
|
67
76
|
end
|
68
77
|
|
69
78
|
def prev_char
|
70
79
|
@source[@index-1]
|
71
80
|
end
|
72
81
|
|
82
|
+
def next?(r)
|
83
|
+
@source.match(r, @index)&.begin(0) == @index
|
84
|
+
end
|
85
|
+
|
86
|
+
def peek(n=1)
|
87
|
+
if n.is_a?(Regexp)
|
88
|
+
@source.match(n, @index)
|
89
|
+
else
|
90
|
+
@source.slice(@index, n)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
73
94
|
def next_word
|
74
95
|
nw = @source.match(/\s*(\S+)/, @index)
|
75
96
|
nw.nil? ? nil : nw[1]
|
@@ -103,4 +124,18 @@ module StreamParser
|
|
103
124
|
output
|
104
125
|
end
|
105
126
|
|
127
|
+
def quoted_value(quote_char = '"', escape_chars = ["\\"])
|
128
|
+
ret_value = ""
|
129
|
+
while scan_until(/(#{quote_char}|\Z)/)
|
130
|
+
if match != quote_char
|
131
|
+
raise Net::HTTPHeaderSyntaxError.new("Invalid Set-Cookie header format: unbalanced quotes (#{quote_char})")
|
132
|
+
elsif !escape_chars.include?(pre_match[-1])
|
133
|
+
ret_value << pre_match
|
134
|
+
return ret_value
|
135
|
+
else
|
136
|
+
ret_value << pre_match[0...-1] << match
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
106
141
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stream_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.3'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jon Bracy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-08-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: activesupport
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
description:
|
98
112
|
email:
|
99
113
|
- jonbracy@gmail.com
|
@@ -104,6 +118,8 @@ extra_rdoc_files:
|
|
104
118
|
files:
|
105
119
|
- README.md
|
106
120
|
- lib/stream_parser.rb
|
121
|
+
- lib/stream_parser/html.rb
|
122
|
+
- lib/stream_parser/html/tag.rb
|
107
123
|
- lib/stream_parser/version.rb
|
108
124
|
homepage: https://github.com/malomalo/stream_parser
|
109
125
|
licenses:
|
@@ -126,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
126
142
|
- !ruby/object:Gem::Version
|
127
143
|
version: '0'
|
128
144
|
requirements: []
|
129
|
-
rubygems_version: 3.
|
145
|
+
rubygems_version: 3.4.13
|
130
146
|
signing_key:
|
131
147
|
specification_version: 4
|
132
148
|
summary: SAX/Stream style parse helpers
|