stream_parser 0.2 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1b99a423c7791ac03b0e45b390d22c10b455b45e51066edb1912eb4c9aec7a65
4
- data.tar.gz: edb3c225aeaff8d4400e0929a8bde7120326ab95c39fe42c11c8d4bf6f8ba2ee
3
+ metadata.gz: 1467f9df4e5b2bcf033a774a97ea24f61635a8249be9a1c7daad54d77251ede6
4
+ data.tar.gz: dbb178a552f4d194b25a1df946bd10ae546225bc140ce5047695370310562937
5
5
  SHA512:
6
- metadata.gz: 24ae770c7090a31b80abc9b7c216de440e61ca38d87476e17ef855ba37aa90a30bd1cb7f990831b0ebd68fead35a385d98c7307b608e9f34759d54dfcfd43fe3
7
- data.tar.gz: 2ce893bdd9e79aa3b094ba15bc612ac598f1e11b0325bf5fa3e44a521ff678ada20705e805cef40314eeb4a274d7e877603b5dfc4ee6ffbebc96b23980e370aa
6
+ metadata.gz: faadb8fe5f2706851da234ff47e7a6d01f78a13cc06f663d213e31cf179c35773ef30ae483619070822dad456500e66e1b5a22ba98eb4afb5935bdb6f8fd715f
7
+ data.tar.gz: 56ace2ac66d7ccdbf217d1e21250cc3d91f54320501a7d75232f613e49fb3d8920eddef54eeb6f20797c9cf5e57807256119a4fc92c0f5676624a25ef556cde3
data/README.md CHANGED
@@ -55,5 +55,7 @@ QuotedStringFinder.parse(%q{Here "ar})
55
55
  # => SyntaxError "Unbalanced Quotes in string"
56
56
  ```
57
57
 
58
+ Although we grab quoted values ourselfs in this example there is a `quoted_value` helper as well as a
59
+ `StreamParser::HTML` which provides additional helpers such as `next_tag`, `scan_for_tag`, `next_end_tag` and others.
58
60
 
59
61
 
@@ -0,0 +1,39 @@
1
+ class StreamParser::HTML::Tag
2
+ attr_reader :name, :attributes
3
+ attr_writer :self_closing
4
+
5
+ def initialize(name, closing=false)
6
+ @name = name
7
+ @attributes = {}
8
+ @closing = closing
9
+ @self_closing = false
10
+ end
11
+
12
+ def [](key)
13
+ @attributes[key.to_sym]
14
+ end
15
+
16
+ def []=(key, value)
17
+ @attributes[key.to_sym] = value
18
+ end
19
+
20
+ def self_closing?
21
+ @self_closing
22
+ end
23
+
24
+ def closing?
25
+ @closing
26
+ end
27
+
28
+ def opening?
29
+ !@closing
30
+ end
31
+
32
+ def match(name: nil, closing: nil, attributes: nil)
33
+ return false if name && @name != name
34
+ return false if !closing.nil? && @closing != closing
35
+ return false if attributes && !attributes.all? { |k,v| @attributes[k] == v }
36
+
37
+ true
38
+ end
39
+ end
@@ -0,0 +1,112 @@
1
+ module StreamParser::HTML
2
+
3
+ autoload :Tag, File.expand_path('../html/tag', __FILE__)
4
+
5
+ def self.included(base)
6
+ base.include(StreamParser)
7
+ end
8
+
9
+ def next_tag(old_index: nil)
10
+ old_index ||= @index
11
+ return unless scan_until(/<\s*/)
12
+ start_index = @index-1
13
+
14
+ while peek(3) == '!--'
15
+ forward(3)
16
+ scan_until(/-->\s*/)
17
+ scan_until(/<\s*/)
18
+ end
19
+
20
+ # HTMLComment.new(pre_match)
21
+ if peek(1) == '/'
22
+ scan_until(/[^>\s\/]+/)
23
+ scan_tag(match, old_index: old_index, start_index: start_index, closing: true)
24
+ else
25
+ scan_until(/[^>\s\/]+/)
26
+ scan_tag(match, old_index: old_index, start_index: start_index)
27
+ end
28
+ end
29
+
30
+ def scan_for_tag(name, closing: nil, **attributes)
31
+ old_index ||= @index
32
+ tag = next_tag
33
+ while tag && !tag.match(name: name, closing: closing, attributes: attributes)
34
+ tag = next_tag(old_index: old_index)
35
+ end
36
+ tag
37
+ end
38
+
39
+ def scan_for_closing_tag
40
+ old_index = @index
41
+ heap = []
42
+
43
+ tag = next_tag
44
+ puts tag.inspect
45
+ while tag && !(tag.closing? && heap.empty?)
46
+ if !tag.closing? && !tag.self_closing?
47
+ heap << tag
48
+ elsif !tag.self_closing?
49
+ heap.pop
50
+ end
51
+ tag = next_tag(old_index: old_index)
52
+ end
53
+ @old_index = old_index
54
+ tag
55
+ end
56
+
57
+ def scan_tag(name, closing: false, old_index:, start_index:)
58
+ tag = Tag.new(name, closing)
59
+
60
+ while !eos?
61
+ gobble(/\s+/)
62
+ key = case peek(1)
63
+ when '>'
64
+ forward(1)
65
+ @old_index = old_index
66
+ @match = @source[start_index...@index]
67
+ return tag
68
+ when '/'
69
+ forward(1)
70
+ gobble(/\s*\>/)
71
+ @old_index = old_index
72
+ @match = @source[start_index...@index]
73
+ tag.self_closing = true
74
+ return tag
75
+ when '"', "'"
76
+ quote_char = next_char
77
+ forward(1)
78
+ quoted_value(quote_char)
79
+ else
80
+ scan_until(/[^>\s\/=]+/)[0]
81
+ end
82
+
83
+ tag[key] = if next?(/\s*=/)
84
+ gobble(/\s*=/)
85
+ html_tag_value
86
+ else
87
+ true
88
+ end
89
+ end
90
+
91
+ @old_index = old_index
92
+ @match = @source[start_index...@index]
93
+ tag
94
+ end
95
+
96
+ def html_tag_value
97
+ gobble(/\s+/)
98
+ case peek(1)
99
+ when '"', "'"
100
+ quote_char = next_char
101
+ forward(1)
102
+ quoted_value(quote_char)
103
+ else
104
+ scan_until(/[^>\s\/=]+/)[0]
105
+ end
106
+ end
107
+
108
+ def next_end_tag(name)
109
+ scan_until(/<\/\s*li>/)
110
+ end
111
+
112
+ end
@@ -0,0 +1 @@
1
+ class StreamParser::SyntaxError < StandardError; end
@@ -1,3 +1,3 @@
1
1
  module StreamParser
2
- VERSION = '0.2'
2
+ VERSION = '0.4'
3
3
  end
data/lib/stream_parser.rb CHANGED
@@ -1,5 +1,8 @@
1
1
  module StreamParser
2
2
 
3
+ autoload :HTML, 'stream_parser/html'
4
+ autoload :SyntaxError, 'stream_parser/syntax_error'
5
+
3
6
  def self.included(base)
4
7
  base.extend ClassMethods
5
8
  end
@@ -29,8 +32,8 @@ module StreamParser
29
32
 
30
33
  def scan_until(r)
31
34
  r = Regexp.new(Regexp.escape(r)) if r.is_a?(String)
32
- index = @source.index(r, @index)
33
35
  match = @source.match(r, @index)
36
+ index = match&.begin(0)
34
37
 
35
38
  if match
36
39
  @match = match.to_s
@@ -43,6 +46,13 @@ module StreamParser
43
46
  end
44
47
  match
45
48
  end
49
+
50
+ def gobble(r)
51
+ m = @source.match(r, @index)
52
+ if m&.begin(0) == @index
53
+ scan_until(r)
54
+ end
55
+ end
46
56
 
47
57
  def pre_match
48
58
  @source[@old_index...(@index-(@match&.size || 0))]
@@ -70,6 +80,18 @@ module StreamParser
70
80
  @source[@index-1]
71
81
  end
72
82
 
83
+ def next?(r)
84
+ @source.match(r, @index)&.begin(0) == @index
85
+ end
86
+
87
+ def peek(n=1)
88
+ if n.is_a?(Regexp)
89
+ @source.match(n, @index)
90
+ else
91
+ @source.slice(@index, n)
92
+ end
93
+ end
94
+
73
95
  def next_word
74
96
  nw = @source.match(/\s*(\S+)/, @index)
75
97
  nw.nil? ? nil : nw[1]
@@ -107,7 +129,7 @@ module StreamParser
107
129
  ret_value = ""
108
130
  while scan_until(/(#{quote_char}|\Z)/)
109
131
  if match != quote_char
110
- raise Net::HTTPHeaderSyntaxError.new("Invalid Set-Cookie header format: unbalanced quotes (#{quote_char})")
132
+ raise StreamParser::SyntaxError.new("Unbalanced quotes #{quote_char}")
111
133
  elsif !escape_chars.include?(pre_match[-1])
112
134
  ret_value << pre_match
113
135
  return ret_value
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stream_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: '0.4'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jon Bracy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-07 00:00:00.000000000 Z
11
+ date: 2024-04-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -118,6 +118,9 @@ extra_rdoc_files:
118
118
  files:
119
119
  - README.md
120
120
  - lib/stream_parser.rb
121
+ - lib/stream_parser/html.rb
122
+ - lib/stream_parser/html/tag.rb
123
+ - lib/stream_parser/syntax_error.rb
121
124
  - lib/stream_parser/version.rb
122
125
  homepage: https://github.com/malomalo/stream_parser
123
126
  licenses:
@@ -140,7 +143,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
143
  - !ruby/object:Gem::Version
141
144
  version: '0'
142
145
  requirements: []
143
- rubygems_version: 3.2.3
146
+ rubygems_version: 3.5.4
144
147
  signing_key:
145
148
  specification_version: 4
146
149
  summary: SAX/Stream style parse helpers