stream_parser 0.1 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0e47f922c40712cf85591d6fe3f5e39b8fd974964a9c8604bc1d91dcaebedd36
4
- data.tar.gz: fed08e64284d9b798822d620d06d7618932ff3ac3546e5033186aba807befc54
3
+ metadata.gz: 3cc1101615985f944d1a8f88cd37509d985860be8950fed026bd75399184ce2d
4
+ data.tar.gz: c5088acc9378bb08a988a4d53c6fc3d9ec0d92ddfb20a4484a8c5b83e5ce6031
5
5
  SHA512:
6
- metadata.gz: d25b559fb053168d5637ad0f774eb291b98ee34cd7baf30858699b1584e533f8322fdcd677685f4647bc101df997251cea1cb843b9d3d5d511fbd234b75e1ce7
7
- data.tar.gz: e2fa502f87ed3285044086780c0e615641edccdb1e3680418a004e247f19dbd389a2f6465068bfb1081b44a2599e159ede05101f5f1e691d1f6f89802d967b4c
6
+ metadata.gz: 8519f3bbcd0dc82e16fb52d366f893fc2465720ac626aa7822604a294c55baed11567bf62ae4b2f6971de0df8f67c2b5722413f1fdd8a86466b894810ef105ec
7
+ data.tar.gz: 7c5888f18e4dd6f65d86d117826294bfa4d3aac80a9c14c529764ac93243beb06fbef751107374818b5497fd9e571cfeab5f585c4012cf6763b440d5d23c8885
data/README.md CHANGED
@@ -55,5 +55,7 @@ QuotedStringFinder.parse(%q{Here "ar})
55
55
  # => SyntaxError "Unbalanced Quotes in string"
56
56
  ```
57
57
 
58
+ Although we grab quoted values ourselfs in this example there is a `quoted_value` helper as well as a
59
+ `StreamParser::HTML` which provides additional helpers such as `next_tag`, `scan_for_tag`, `next_end_tag` and others.
58
60
 
59
61
 
@@ -0,0 +1,39 @@
1
+ class StreamParser::HTML::Tag
2
+ attr_reader :name, :attributes
3
+ attr_writer :self_closing
4
+
5
+ def initialize(name, closing=false)
6
+ @name = name
7
+ @attributes = {}
8
+ @closing = closing
9
+ @self_closing = false
10
+ end
11
+
12
+ def [](key)
13
+ @attributes[key.to_sym]
14
+ end
15
+
16
+ def []=(key, value)
17
+ @attributes[key.to_sym] = value
18
+ end
19
+
20
+ def self_closing?
21
+ @self_closing
22
+ end
23
+
24
+ def closing?
25
+ @closing
26
+ end
27
+
28
+ def opening?
29
+ !@closing
30
+ end
31
+
32
+ def match(name: nil, closing: nil, attributes: nil)
33
+ return false if name && @name != name
34
+ return false if !closing.nil? && @closing != closing
35
+ return false if attributes && !attributes.all? { |k,v| @attributes[k] == v }
36
+
37
+ true
38
+ end
39
+ end
@@ -0,0 +1,112 @@
1
+ module StreamParser::HTML
2
+
3
+ autoload :Tag, File.expand_path('../html/tag', __FILE__)
4
+
5
+ def self.included(base)
6
+ base.include(StreamParser)
7
+ end
8
+
9
+ def next_tag(old_index: nil)
10
+ old_index ||= @index
11
+ return unless scan_until(/<\s*/)
12
+ start_index = @index-1
13
+
14
+ while peek(3) == '!--'
15
+ forward(3)
16
+ scan_until(/-->\s*/)
17
+ scan_until(/<\s*/)
18
+ end
19
+
20
+ # HTMLComment.new(pre_match)
21
+ if peek(1) == '/'
22
+ scan_until(/[^>\s\/]+/)
23
+ scan_tag(match, old_index: old_index, start_index: start_index, closing: true)
24
+ else
25
+ scan_until(/[^>\s\/]+/)
26
+ scan_tag(match, old_index: old_index, start_index: start_index)
27
+ end
28
+ end
29
+
30
+ def scan_for_tag(name, closing: nil, **attributes)
31
+ old_index ||= @index
32
+ tag = next_tag
33
+ while tag && !tag.match(name: name, closing: closing, attributes: attributes)
34
+ tag = next_tag(old_index: old_index)
35
+ end
36
+ tag
37
+ end
38
+
39
+ def scan_for_closing_tag
40
+ old_index = @index
41
+ heap = []
42
+
43
+ tag = next_tag
44
+ puts tag.inspect
45
+ while tag && !(tag.closing? && heap.empty?)
46
+ if !tag.closing? && !tag.self_closing?
47
+ heap << tag
48
+ elsif !tag.self_closing?
49
+ heap.pop
50
+ end
51
+ tag = next_tag(old_index: old_index)
52
+ end
53
+ @old_index = old_index
54
+ tag
55
+ end
56
+
57
+ def scan_tag(name, closing: false, old_index:, start_index:)
58
+ tag = Tag.new(name, closing)
59
+
60
+ while !eos?
61
+ gobble(/\s+/)
62
+ key = case peek(1)
63
+ when '>'
64
+ forward(1)
65
+ @old_index = old_index
66
+ @match = @source[start_index...@index]
67
+ return tag
68
+ when '/'
69
+ forward(1)
70
+ gobble(/\s*\>/)
71
+ @old_index = old_index
72
+ @match = @source[start_index...@index]
73
+ tag.self_closing = true
74
+ return tag
75
+ when '"', "'"
76
+ quote_char = next_char
77
+ forward(1)
78
+ quoted_value(quote_char)
79
+ else
80
+ scan_until(/[^>\s\/=]+/)[0]
81
+ end
82
+
83
+ tag[key] = if next?(/\s*=/)
84
+ gobble(/\s*=/)
85
+ html_tag_value
86
+ else
87
+ true
88
+ end
89
+ end
90
+
91
+ @old_index = old_index
92
+ @match = @source[start_index...@index]
93
+ tag
94
+ end
95
+
96
+ def html_tag_value
97
+ gobble(/\s+/)
98
+ case peek(1)
99
+ when '"', "'"
100
+ quote_char = next_char
101
+ forward(1)
102
+ quoted_value(quote_char)
103
+ else
104
+ scan_until(/[^>\s\/=]+/)[0]
105
+ end
106
+ end
107
+
108
+ def next_end_tag(name)
109
+ scan_until(/<\/\s*li>/)
110
+ end
111
+
112
+ end
@@ -1,3 +1,3 @@
1
1
  module StreamParser
2
- VERSION = '0.1'
2
+ VERSION = '0.3'
3
3
  end
data/lib/stream_parser.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  module StreamParser
2
2
 
3
+ autoload :HTML, File.expand_path('../stream_parser/html', __FILE__)
4
+
3
5
  def self.included(base)
4
6
  base.extend ClassMethods
5
7
  end
@@ -24,13 +26,13 @@ module StreamParser
24
26
  # end
25
27
 
26
28
  def eos?
27
- @index >= (@source.size - 1)
29
+ @index >= (@source.size - 0)
28
30
  end
29
31
 
30
32
  def scan_until(r)
31
33
  r = Regexp.new(Regexp.escape(r)) if r.is_a?(String)
32
- index = @source.index(r, @index)
33
34
  match = @source.match(r, @index)
35
+ index = match&.begin(0)
34
36
 
35
37
  if match
36
38
  @match = match.to_s
@@ -39,13 +41,20 @@ module StreamParser
39
41
  else
40
42
  @match = nil
41
43
  @old_index = @index
42
- @index = @source.size - 1
44
+ @index = @source.size
43
45
  end
44
46
  match
45
47
  end
48
+
49
+ def gobble(r)
50
+ m = @source.match(r, @index)
51
+ if m&.begin(0) == @index
52
+ scan_until(r)
53
+ end
54
+ end
46
55
 
47
56
  def pre_match
48
- @source[@old_index...(@index-@match.size)]
57
+ @source[@old_index...(@index-(@match&.size || 0))]
49
58
  end
50
59
 
51
60
  def rewind(by=1)
@@ -63,13 +72,25 @@ module StreamParser
63
72
  end
64
73
 
65
74
  def next_char
66
- @source[@index+1]
75
+ @source[@index]
67
76
  end
68
77
 
69
78
  def prev_char
70
79
  @source[@index-1]
71
80
  end
72
81
 
82
+ def next?(r)
83
+ @source.match(r, @index)&.begin(0) == @index
84
+ end
85
+
86
+ def peek(n=1)
87
+ if n.is_a?(Regexp)
88
+ @source.match(n, @index)
89
+ else
90
+ @source.slice(@index, n)
91
+ end
92
+ end
93
+
73
94
  def next_word
74
95
  nw = @source.match(/\s*(\S+)/, @index)
75
96
  nw.nil? ? nil : nw[1]
@@ -103,4 +124,18 @@ module StreamParser
103
124
  output
104
125
  end
105
126
 
127
+ def quoted_value(quote_char = '"', escape_chars = ["\\"])
128
+ ret_value = ""
129
+ while scan_until(/(#{quote_char}|\Z)/)
130
+ if match != quote_char
131
+ raise Net::HTTPHeaderSyntaxError.new("Invalid Set-Cookie header format: unbalanced quotes (#{quote_char})")
132
+ elsif !escape_chars.include?(pre_match[-1])
133
+ ret_value << pre_match
134
+ return ret_value
135
+ else
136
+ ret_value << pre_match[0...-1] << match
137
+ end
138
+ end
139
+ end
140
+
106
141
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stream_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.1'
4
+ version: '0.3'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jon Bracy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-06 00:00:00.000000000 Z
11
+ date: 2023-08-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: activesupport
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  description:
98
112
  email:
99
113
  - jonbracy@gmail.com
@@ -104,6 +118,8 @@ extra_rdoc_files:
104
118
  files:
105
119
  - README.md
106
120
  - lib/stream_parser.rb
121
+ - lib/stream_parser/html.rb
122
+ - lib/stream_parser/html/tag.rb
107
123
  - lib/stream_parser/version.rb
108
124
  homepage: https://github.com/malomalo/stream_parser
109
125
  licenses:
@@ -126,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
126
142
  - !ruby/object:Gem::Version
127
143
  version: '0'
128
144
  requirements: []
129
- rubygems_version: 3.2.3
145
+ rubygems_version: 3.4.13
130
146
  signing_key:
131
147
  specification_version: 4
132
148
  summary: SAX/Stream style parse helpers