stream_parser 0.2 → 0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1b99a423c7791ac03b0e45b390d22c10b455b45e51066edb1912eb4c9aec7a65
4
- data.tar.gz: edb3c225aeaff8d4400e0929a8bde7120326ab95c39fe42c11c8d4bf6f8ba2ee
3
+ metadata.gz: 1467f9df4e5b2bcf033a774a97ea24f61635a8249be9a1c7daad54d77251ede6
4
+ data.tar.gz: dbb178a552f4d194b25a1df946bd10ae546225bc140ce5047695370310562937
5
5
  SHA512:
6
- metadata.gz: 24ae770c7090a31b80abc9b7c216de440e61ca38d87476e17ef855ba37aa90a30bd1cb7f990831b0ebd68fead35a385d98c7307b608e9f34759d54dfcfd43fe3
7
- data.tar.gz: 2ce893bdd9e79aa3b094ba15bc612ac598f1e11b0325bf5fa3e44a521ff678ada20705e805cef40314eeb4a274d7e877603b5dfc4ee6ffbebc96b23980e370aa
6
+ metadata.gz: faadb8fe5f2706851da234ff47e7a6d01f78a13cc06f663d213e31cf179c35773ef30ae483619070822dad456500e66e1b5a22ba98eb4afb5935bdb6f8fd715f
7
+ data.tar.gz: 56ace2ac66d7ccdbf217d1e21250cc3d91f54320501a7d75232f613e49fb3d8920eddef54eeb6f20797c9cf5e57807256119a4fc92c0f5676624a25ef556cde3
data/README.md CHANGED
@@ -55,5 +55,7 @@ QuotedStringFinder.parse(%q{Here "ar})
55
55
  # => SyntaxError "Unbalanced Quotes in string"
56
56
  ```
57
57
 
58
+ Although we grab quoted values ourselfs in this example there is a `quoted_value` helper as well as a
59
+ `StreamParser::HTML` which provides additional helpers such as `next_tag`, `scan_for_tag`, `next_end_tag` and others.
58
60
 
59
61
 
@@ -0,0 +1,39 @@
1
+ class StreamParser::HTML::Tag
2
+ attr_reader :name, :attributes
3
+ attr_writer :self_closing
4
+
5
+ def initialize(name, closing=false)
6
+ @name = name
7
+ @attributes = {}
8
+ @closing = closing
9
+ @self_closing = false
10
+ end
11
+
12
+ def [](key)
13
+ @attributes[key.to_sym]
14
+ end
15
+
16
+ def []=(key, value)
17
+ @attributes[key.to_sym] = value
18
+ end
19
+
20
+ def self_closing?
21
+ @self_closing
22
+ end
23
+
24
+ def closing?
25
+ @closing
26
+ end
27
+
28
+ def opening?
29
+ !@closing
30
+ end
31
+
32
+ def match(name: nil, closing: nil, attributes: nil)
33
+ return false if name && @name != name
34
+ return false if !closing.nil? && @closing != closing
35
+ return false if attributes && !attributes.all? { |k,v| @attributes[k] == v }
36
+
37
+ true
38
+ end
39
+ end
@@ -0,0 +1,112 @@
1
+ module StreamParser::HTML
2
+
3
+ autoload :Tag, File.expand_path('../html/tag', __FILE__)
4
+
5
+ def self.included(base)
6
+ base.include(StreamParser)
7
+ end
8
+
9
+ def next_tag(old_index: nil)
10
+ old_index ||= @index
11
+ return unless scan_until(/<\s*/)
12
+ start_index = @index-1
13
+
14
+ while peek(3) == '!--'
15
+ forward(3)
16
+ scan_until(/-->\s*/)
17
+ scan_until(/<\s*/)
18
+ end
19
+
20
+ # HTMLComment.new(pre_match)
21
+ if peek(1) == '/'
22
+ scan_until(/[^>\s\/]+/)
23
+ scan_tag(match, old_index: old_index, start_index: start_index, closing: true)
24
+ else
25
+ scan_until(/[^>\s\/]+/)
26
+ scan_tag(match, old_index: old_index, start_index: start_index)
27
+ end
28
+ end
29
+
30
+ def scan_for_tag(name, closing: nil, **attributes)
31
+ old_index ||= @index
32
+ tag = next_tag
33
+ while tag && !tag.match(name: name, closing: closing, attributes: attributes)
34
+ tag = next_tag(old_index: old_index)
35
+ end
36
+ tag
37
+ end
38
+
39
+ def scan_for_closing_tag
40
+ old_index = @index
41
+ heap = []
42
+
43
+ tag = next_tag
44
+ puts tag.inspect
45
+ while tag && !(tag.closing? && heap.empty?)
46
+ if !tag.closing? && !tag.self_closing?
47
+ heap << tag
48
+ elsif !tag.self_closing?
49
+ heap.pop
50
+ end
51
+ tag = next_tag(old_index: old_index)
52
+ end
53
+ @old_index = old_index
54
+ tag
55
+ end
56
+
57
+ def scan_tag(name, closing: false, old_index:, start_index:)
58
+ tag = Tag.new(name, closing)
59
+
60
+ while !eos?
61
+ gobble(/\s+/)
62
+ key = case peek(1)
63
+ when '>'
64
+ forward(1)
65
+ @old_index = old_index
66
+ @match = @source[start_index...@index]
67
+ return tag
68
+ when '/'
69
+ forward(1)
70
+ gobble(/\s*\>/)
71
+ @old_index = old_index
72
+ @match = @source[start_index...@index]
73
+ tag.self_closing = true
74
+ return tag
75
+ when '"', "'"
76
+ quote_char = next_char
77
+ forward(1)
78
+ quoted_value(quote_char)
79
+ else
80
+ scan_until(/[^>\s\/=]+/)[0]
81
+ end
82
+
83
+ tag[key] = if next?(/\s*=/)
84
+ gobble(/\s*=/)
85
+ html_tag_value
86
+ else
87
+ true
88
+ end
89
+ end
90
+
91
+ @old_index = old_index
92
+ @match = @source[start_index...@index]
93
+ tag
94
+ end
95
+
96
+ def html_tag_value
97
+ gobble(/\s+/)
98
+ case peek(1)
99
+ when '"', "'"
100
+ quote_char = next_char
101
+ forward(1)
102
+ quoted_value(quote_char)
103
+ else
104
+ scan_until(/[^>\s\/=]+/)[0]
105
+ end
106
+ end
107
+
108
+ def next_end_tag(name)
109
+ scan_until(/<\/\s*li>/)
110
+ end
111
+
112
+ end
@@ -0,0 +1 @@
1
+ class StreamParser::SyntaxError < StandardError; end
@@ -1,3 +1,3 @@
1
1
  module StreamParser
2
- VERSION = '0.2'
2
+ VERSION = '0.4'
3
3
  end
data/lib/stream_parser.rb CHANGED
@@ -1,5 +1,8 @@
1
1
  module StreamParser
2
2
 
3
+ autoload :HTML, 'stream_parser/html'
4
+ autoload :SyntaxError, 'stream_parser/syntax_error'
5
+
3
6
  def self.included(base)
4
7
  base.extend ClassMethods
5
8
  end
@@ -29,8 +32,8 @@ module StreamParser
29
32
 
30
33
  def scan_until(r)
31
34
  r = Regexp.new(Regexp.escape(r)) if r.is_a?(String)
32
- index = @source.index(r, @index)
33
35
  match = @source.match(r, @index)
36
+ index = match&.begin(0)
34
37
 
35
38
  if match
36
39
  @match = match.to_s
@@ -43,6 +46,13 @@ module StreamParser
43
46
  end
44
47
  match
45
48
  end
49
+
50
+ def gobble(r)
51
+ m = @source.match(r, @index)
52
+ if m&.begin(0) == @index
53
+ scan_until(r)
54
+ end
55
+ end
46
56
 
47
57
  def pre_match
48
58
  @source[@old_index...(@index-(@match&.size || 0))]
@@ -70,6 +80,18 @@ module StreamParser
70
80
  @source[@index-1]
71
81
  end
72
82
 
83
+ def next?(r)
84
+ @source.match(r, @index)&.begin(0) == @index
85
+ end
86
+
87
+ def peek(n=1)
88
+ if n.is_a?(Regexp)
89
+ @source.match(n, @index)
90
+ else
91
+ @source.slice(@index, n)
92
+ end
93
+ end
94
+
73
95
  def next_word
74
96
  nw = @source.match(/\s*(\S+)/, @index)
75
97
  nw.nil? ? nil : nw[1]
@@ -107,7 +129,7 @@ module StreamParser
107
129
  ret_value = ""
108
130
  while scan_until(/(#{quote_char}|\Z)/)
109
131
  if match != quote_char
110
- raise Net::HTTPHeaderSyntaxError.new("Invalid Set-Cookie header format: unbalanced quotes (#{quote_char})")
132
+ raise StreamParser::SyntaxError.new("Unbalanced quotes #{quote_char}")
111
133
  elsif !escape_chars.include?(pre_match[-1])
112
134
  ret_value << pre_match
113
135
  return ret_value
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stream_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: '0.4'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jon Bracy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-07 00:00:00.000000000 Z
11
+ date: 2024-04-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -118,6 +118,9 @@ extra_rdoc_files:
118
118
  files:
119
119
  - README.md
120
120
  - lib/stream_parser.rb
121
+ - lib/stream_parser/html.rb
122
+ - lib/stream_parser/html/tag.rb
123
+ - lib/stream_parser/syntax_error.rb
121
124
  - lib/stream_parser/version.rb
122
125
  homepage: https://github.com/malomalo/stream_parser
123
126
  licenses:
@@ -140,7 +143,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
143
  - !ruby/object:Gem::Version
141
144
  version: '0'
142
145
  requirements: []
143
- rubygems_version: 3.2.3
146
+ rubygems_version: 3.5.4
144
147
  signing_key:
145
148
  specification_version: 4
146
149
  summary: SAX/Stream style parse helpers