stream_parser 0.1 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0e47f922c40712cf85591d6fe3f5e39b8fd974964a9c8604bc1d91dcaebedd36
4
- data.tar.gz: fed08e64284d9b798822d620d06d7618932ff3ac3546e5033186aba807befc54
3
+ metadata.gz: 3cc1101615985f944d1a8f88cd37509d985860be8950fed026bd75399184ce2d
4
+ data.tar.gz: c5088acc9378bb08a988a4d53c6fc3d9ec0d92ddfb20a4484a8c5b83e5ce6031
5
5
  SHA512:
6
- metadata.gz: d25b559fb053168d5637ad0f774eb291b98ee34cd7baf30858699b1584e533f8322fdcd677685f4647bc101df997251cea1cb843b9d3d5d511fbd234b75e1ce7
7
- data.tar.gz: e2fa502f87ed3285044086780c0e615641edccdb1e3680418a004e247f19dbd389a2f6465068bfb1081b44a2599e159ede05101f5f1e691d1f6f89802d967b4c
6
+ metadata.gz: 8519f3bbcd0dc82e16fb52d366f893fc2465720ac626aa7822604a294c55baed11567bf62ae4b2f6971de0df8f67c2b5722413f1fdd8a86466b894810ef105ec
7
+ data.tar.gz: 7c5888f18e4dd6f65d86d117826294bfa4d3aac80a9c14c529764ac93243beb06fbef751107374818b5497fd9e571cfeab5f585c4012cf6763b440d5d23c8885
data/README.md CHANGED
@@ -55,5 +55,7 @@ QuotedStringFinder.parse(%q{Here "ar})
55
55
  # => SyntaxError "Unbalanced Quotes in string"
56
56
  ```
57
57
 
58
+ Although we grab quoted values ourselfs in this example there is a `quoted_value` helper as well as a
59
+ `StreamParser::HTML` which provides additional helpers such as `next_tag`, `scan_for_tag`, `next_end_tag` and others.
58
60
 
59
61
 
@@ -0,0 +1,39 @@
1
+ class StreamParser::HTML::Tag
2
+ attr_reader :name, :attributes
3
+ attr_writer :self_closing
4
+
5
+ def initialize(name, closing=false)
6
+ @name = name
7
+ @attributes = {}
8
+ @closing = closing
9
+ @self_closing = false
10
+ end
11
+
12
+ def [](key)
13
+ @attributes[key.to_sym]
14
+ end
15
+
16
+ def []=(key, value)
17
+ @attributes[key.to_sym] = value
18
+ end
19
+
20
+ def self_closing?
21
+ @self_closing
22
+ end
23
+
24
+ def closing?
25
+ @closing
26
+ end
27
+
28
+ def opening?
29
+ !@closing
30
+ end
31
+
32
+ def match(name: nil, closing: nil, attributes: nil)
33
+ return false if name && @name != name
34
+ return false if !closing.nil? && @closing != closing
35
+ return false if attributes && !attributes.all? { |k,v| @attributes[k] == v }
36
+
37
+ true
38
+ end
39
+ end
@@ -0,0 +1,112 @@
1
+ module StreamParser::HTML
2
+
3
+ autoload :Tag, File.expand_path('../html/tag', __FILE__)
4
+
5
+ def self.included(base)
6
+ base.include(StreamParser)
7
+ end
8
+
9
+ def next_tag(old_index: nil)
10
+ old_index ||= @index
11
+ return unless scan_until(/<\s*/)
12
+ start_index = @index-1
13
+
14
+ while peek(3) == '!--'
15
+ forward(3)
16
+ scan_until(/-->\s*/)
17
+ scan_until(/<\s*/)
18
+ end
19
+
20
+ # HTMLComment.new(pre_match)
21
+ if peek(1) == '/'
22
+ scan_until(/[^>\s\/]+/)
23
+ scan_tag(match, old_index: old_index, start_index: start_index, closing: true)
24
+ else
25
+ scan_until(/[^>\s\/]+/)
26
+ scan_tag(match, old_index: old_index, start_index: start_index)
27
+ end
28
+ end
29
+
30
+ def scan_for_tag(name, closing: nil, **attributes)
31
+ old_index ||= @index
32
+ tag = next_tag
33
+ while tag && !tag.match(name: name, closing: closing, attributes: attributes)
34
+ tag = next_tag(old_index: old_index)
35
+ end
36
+ tag
37
+ end
38
+
39
+ def scan_for_closing_tag
40
+ old_index = @index
41
+ heap = []
42
+
43
+ tag = next_tag
44
+ puts tag.inspect
45
+ while tag && !(tag.closing? && heap.empty?)
46
+ if !tag.closing? && !tag.self_closing?
47
+ heap << tag
48
+ elsif !tag.self_closing?
49
+ heap.pop
50
+ end
51
+ tag = next_tag(old_index: old_index)
52
+ end
53
+ @old_index = old_index
54
+ tag
55
+ end
56
+
57
+ def scan_tag(name, closing: false, old_index:, start_index:)
58
+ tag = Tag.new(name, closing)
59
+
60
+ while !eos?
61
+ gobble(/\s+/)
62
+ key = case peek(1)
63
+ when '>'
64
+ forward(1)
65
+ @old_index = old_index
66
+ @match = @source[start_index...@index]
67
+ return tag
68
+ when '/'
69
+ forward(1)
70
+ gobble(/\s*\>/)
71
+ @old_index = old_index
72
+ @match = @source[start_index...@index]
73
+ tag.self_closing = true
74
+ return tag
75
+ when '"', "'"
76
+ quote_char = next_char
77
+ forward(1)
78
+ quoted_value(quote_char)
79
+ else
80
+ scan_until(/[^>\s\/=]+/)[0]
81
+ end
82
+
83
+ tag[key] = if next?(/\s*=/)
84
+ gobble(/\s*=/)
85
+ html_tag_value
86
+ else
87
+ true
88
+ end
89
+ end
90
+
91
+ @old_index = old_index
92
+ @match = @source[start_index...@index]
93
+ tag
94
+ end
95
+
96
+ def html_tag_value
97
+ gobble(/\s+/)
98
+ case peek(1)
99
+ when '"', "'"
100
+ quote_char = next_char
101
+ forward(1)
102
+ quoted_value(quote_char)
103
+ else
104
+ scan_until(/[^>\s\/=]+/)[0]
105
+ end
106
+ end
107
+
108
+ def next_end_tag(name)
109
+ scan_until(/<\/\s*li>/)
110
+ end
111
+
112
+ end
@@ -1,3 +1,3 @@
1
1
  module StreamParser
2
- VERSION = '0.1'
2
+ VERSION = '0.3'
3
3
  end
data/lib/stream_parser.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  module StreamParser
2
2
 
3
+ autoload :HTML, File.expand_path('../stream_parser/html', __FILE__)
4
+
3
5
  def self.included(base)
4
6
  base.extend ClassMethods
5
7
  end
@@ -24,13 +26,13 @@ module StreamParser
24
26
  # end
25
27
 
26
28
  def eos?
27
- @index >= (@source.size - 1)
29
+ @index >= (@source.size - 0)
28
30
  end
29
31
 
30
32
  def scan_until(r)
31
33
  r = Regexp.new(Regexp.escape(r)) if r.is_a?(String)
32
- index = @source.index(r, @index)
33
34
  match = @source.match(r, @index)
35
+ index = match&.begin(0)
34
36
 
35
37
  if match
36
38
  @match = match.to_s
@@ -39,13 +41,20 @@ module StreamParser
39
41
  else
40
42
  @match = nil
41
43
  @old_index = @index
42
- @index = @source.size - 1
44
+ @index = @source.size
43
45
  end
44
46
  match
45
47
  end
48
+
49
+ def gobble(r)
50
+ m = @source.match(r, @index)
51
+ if m&.begin(0) == @index
52
+ scan_until(r)
53
+ end
54
+ end
46
55
 
47
56
  def pre_match
48
- @source[@old_index...(@index-@match.size)]
57
+ @source[@old_index...(@index-(@match&.size || 0))]
49
58
  end
50
59
 
51
60
  def rewind(by=1)
@@ -63,13 +72,25 @@ module StreamParser
63
72
  end
64
73
 
65
74
  def next_char
66
- @source[@index+1]
75
+ @source[@index]
67
76
  end
68
77
 
69
78
  def prev_char
70
79
  @source[@index-1]
71
80
  end
72
81
 
82
+ def next?(r)
83
+ @source.match(r, @index)&.begin(0) == @index
84
+ end
85
+
86
+ def peek(n=1)
87
+ if n.is_a?(Regexp)
88
+ @source.match(n, @index)
89
+ else
90
+ @source.slice(@index, n)
91
+ end
92
+ end
93
+
73
94
  def next_word
74
95
  nw = @source.match(/\s*(\S+)/, @index)
75
96
  nw.nil? ? nil : nw[1]
@@ -103,4 +124,18 @@ module StreamParser
103
124
  output
104
125
  end
105
126
 
127
+ def quoted_value(quote_char = '"', escape_chars = ["\\"])
128
+ ret_value = ""
129
+ while scan_until(/(#{quote_char}|\Z)/)
130
+ if match != quote_char
131
+ raise Net::HTTPHeaderSyntaxError.new("Invalid Set-Cookie header format: unbalanced quotes (#{quote_char})")
132
+ elsif !escape_chars.include?(pre_match[-1])
133
+ ret_value << pre_match
134
+ return ret_value
135
+ else
136
+ ret_value << pre_match[0...-1] << match
137
+ end
138
+ end
139
+ end
140
+
106
141
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stream_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.1'
4
+ version: '0.3'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jon Bracy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-06 00:00:00.000000000 Z
11
+ date: 2023-08-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: activesupport
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  description:
98
112
  email:
99
113
  - jonbracy@gmail.com
@@ -104,6 +118,8 @@ extra_rdoc_files:
104
118
  files:
105
119
  - README.md
106
120
  - lib/stream_parser.rb
121
+ - lib/stream_parser/html.rb
122
+ - lib/stream_parser/html/tag.rb
107
123
  - lib/stream_parser/version.rb
108
124
  homepage: https://github.com/malomalo/stream_parser
109
125
  licenses:
@@ -126,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
126
142
  - !ruby/object:Gem::Version
127
143
  version: '0'
128
144
  requirements: []
129
- rubygems_version: 3.2.3
145
+ rubygems_version: 3.4.13
130
146
  signing_key:
131
147
  specification_version: 4
132
148
  summary: SAX/Stream style parse helpers