stream_parser 0.2 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1b99a423c7791ac03b0e45b390d22c10b455b45e51066edb1912eb4c9aec7a65
4
- data.tar.gz: edb3c225aeaff8d4400e0929a8bde7120326ab95c39fe42c11c8d4bf6f8ba2ee
3
+ metadata.gz: 3cc1101615985f944d1a8f88cd37509d985860be8950fed026bd75399184ce2d
4
+ data.tar.gz: c5088acc9378bb08a988a4d53c6fc3d9ec0d92ddfb20a4484a8c5b83e5ce6031
5
5
  SHA512:
6
- metadata.gz: 24ae770c7090a31b80abc9b7c216de440e61ca38d87476e17ef855ba37aa90a30bd1cb7f990831b0ebd68fead35a385d98c7307b608e9f34759d54dfcfd43fe3
7
- data.tar.gz: 2ce893bdd9e79aa3b094ba15bc612ac598f1e11b0325bf5fa3e44a521ff678ada20705e805cef40314eeb4a274d7e877603b5dfc4ee6ffbebc96b23980e370aa
6
+ metadata.gz: 8519f3bbcd0dc82e16fb52d366f893fc2465720ac626aa7822604a294c55baed11567bf62ae4b2f6971de0df8f67c2b5722413f1fdd8a86466b894810ef105ec
7
+ data.tar.gz: 7c5888f18e4dd6f65d86d117826294bfa4d3aac80a9c14c529764ac93243beb06fbef751107374818b5497fd9e571cfeab5f585c4012cf6763b440d5d23c8885
data/README.md CHANGED
@@ -55,5 +55,7 @@ QuotedStringFinder.parse(%q{Here "ar})
55
55
  # => SyntaxError "Unbalanced Quotes in string"
56
56
  ```
57
57
 
58
+ Although we grab quoted values ourselfs in this example there is a `quoted_value` helper as well as a
59
+ `StreamParser::HTML` which provides additional helpers such as `next_tag`, `scan_for_tag`, `next_end_tag` and others.
58
60
 
59
61
 
@@ -0,0 +1,39 @@
1
+ class StreamParser::HTML::Tag
2
+ attr_reader :name, :attributes
3
+ attr_writer :self_closing
4
+
5
+ def initialize(name, closing=false)
6
+ @name = name
7
+ @attributes = {}
8
+ @closing = closing
9
+ @self_closing = false
10
+ end
11
+
12
+ def [](key)
13
+ @attributes[key.to_sym]
14
+ end
15
+
16
+ def []=(key, value)
17
+ @attributes[key.to_sym] = value
18
+ end
19
+
20
+ def self_closing?
21
+ @self_closing
22
+ end
23
+
24
+ def closing?
25
+ @closing
26
+ end
27
+
28
+ def opening?
29
+ !@closing
30
+ end
31
+
32
+ def match(name: nil, closing: nil, attributes: nil)
33
+ return false if name && @name != name
34
+ return false if !closing.nil? && @closing != closing
35
+ return false if attributes && !attributes.all? { |k,v| @attributes[k] == v }
36
+
37
+ true
38
+ end
39
+ end
@@ -0,0 +1,112 @@
1
+ module StreamParser::HTML
2
+
3
+ autoload :Tag, File.expand_path('../html/tag', __FILE__)
4
+
5
+ def self.included(base)
6
+ base.include(StreamParser)
7
+ end
8
+
9
+ def next_tag(old_index: nil)
10
+ old_index ||= @index
11
+ return unless scan_until(/<\s*/)
12
+ start_index = @index-1
13
+
14
+ while peek(3) == '!--'
15
+ forward(3)
16
+ scan_until(/-->\s*/)
17
+ scan_until(/<\s*/)
18
+ end
19
+
20
+ # HTMLComment.new(pre_match)
21
+ if peek(1) == '/'
22
+ scan_until(/[^>\s\/]+/)
23
+ scan_tag(match, old_index: old_index, start_index: start_index, closing: true)
24
+ else
25
+ scan_until(/[^>\s\/]+/)
26
+ scan_tag(match, old_index: old_index, start_index: start_index)
27
+ end
28
+ end
29
+
30
+ def scan_for_tag(name, closing: nil, **attributes)
31
+ old_index ||= @index
32
+ tag = next_tag
33
+ while tag && !tag.match(name: name, closing: closing, attributes: attributes)
34
+ tag = next_tag(old_index: old_index)
35
+ end
36
+ tag
37
+ end
38
+
39
+ def scan_for_closing_tag
40
+ old_index = @index
41
+ heap = []
42
+
43
+ tag = next_tag
44
+ puts tag.inspect
45
+ while tag && !(tag.closing? && heap.empty?)
46
+ if !tag.closing? && !tag.self_closing?
47
+ heap << tag
48
+ elsif !tag.self_closing?
49
+ heap.pop
50
+ end
51
+ tag = next_tag(old_index: old_index)
52
+ end
53
+ @old_index = old_index
54
+ tag
55
+ end
56
+
57
+ def scan_tag(name, closing: false, old_index:, start_index:)
58
+ tag = Tag.new(name, closing)
59
+
60
+ while !eos?
61
+ gobble(/\s+/)
62
+ key = case peek(1)
63
+ when '>'
64
+ forward(1)
65
+ @old_index = old_index
66
+ @match = @source[start_index...@index]
67
+ return tag
68
+ when '/'
69
+ forward(1)
70
+ gobble(/\s*\>/)
71
+ @old_index = old_index
72
+ @match = @source[start_index...@index]
73
+ tag.self_closing = true
74
+ return tag
75
+ when '"', "'"
76
+ quote_char = next_char
77
+ forward(1)
78
+ quoted_value(quote_char)
79
+ else
80
+ scan_until(/[^>\s\/=]+/)[0]
81
+ end
82
+
83
+ tag[key] = if next?(/\s*=/)
84
+ gobble(/\s*=/)
85
+ html_tag_value
86
+ else
87
+ true
88
+ end
89
+ end
90
+
91
+ @old_index = old_index
92
+ @match = @source[start_index...@index]
93
+ tag
94
+ end
95
+
96
+ def html_tag_value
97
+ gobble(/\s+/)
98
+ case peek(1)
99
+ when '"', "'"
100
+ quote_char = next_char
101
+ forward(1)
102
+ quoted_value(quote_char)
103
+ else
104
+ scan_until(/[^>\s\/=]+/)[0]
105
+ end
106
+ end
107
+
108
+ def next_end_tag(name)
109
+ scan_until(/<\/\s*li>/)
110
+ end
111
+
112
+ end
@@ -1,3 +1,3 @@
1
1
  module StreamParser
2
- VERSION = '0.2'
2
+ VERSION = '0.3'
3
3
  end
data/lib/stream_parser.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  module StreamParser
2
2
 
3
+ autoload :HTML, File.expand_path('../stream_parser/html', __FILE__)
4
+
3
5
  def self.included(base)
4
6
  base.extend ClassMethods
5
7
  end
@@ -29,8 +31,8 @@ module StreamParser
29
31
 
30
32
  def scan_until(r)
31
33
  r = Regexp.new(Regexp.escape(r)) if r.is_a?(String)
32
- index = @source.index(r, @index)
33
34
  match = @source.match(r, @index)
35
+ index = match&.begin(0)
34
36
 
35
37
  if match
36
38
  @match = match.to_s
@@ -43,6 +45,13 @@ module StreamParser
43
45
  end
44
46
  match
45
47
  end
48
+
49
+ def gobble(r)
50
+ m = @source.match(r, @index)
51
+ if m&.begin(0) == @index
52
+ scan_until(r)
53
+ end
54
+ end
46
55
 
47
56
  def pre_match
48
57
  @source[@old_index...(@index-(@match&.size || 0))]
@@ -70,6 +79,18 @@ module StreamParser
70
79
  @source[@index-1]
71
80
  end
72
81
 
82
+ def next?(r)
83
+ @source.match(r, @index)&.begin(0) == @index
84
+ end
85
+
86
+ def peek(n=1)
87
+ if n.is_a?(Regexp)
88
+ @source.match(n, @index)
89
+ else
90
+ @source.slice(@index, n)
91
+ end
92
+ end
93
+
73
94
  def next_word
74
95
  nw = @source.match(/\s*(\S+)/, @index)
75
96
  nw.nil? ? nil : nw[1]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stream_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: '0.3'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jon Bracy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-07 00:00:00.000000000 Z
11
+ date: 2023-08-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -118,6 +118,8 @@ extra_rdoc_files:
118
118
  files:
119
119
  - README.md
120
120
  - lib/stream_parser.rb
121
+ - lib/stream_parser/html.rb
122
+ - lib/stream_parser/html/tag.rb
121
123
  - lib/stream_parser/version.rb
122
124
  homepage: https://github.com/malomalo/stream_parser
123
125
  licenses:
@@ -140,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
142
  - !ruby/object:Gem::Version
141
143
  version: '0'
142
144
  requirements: []
143
- rubygems_version: 3.2.3
145
+ rubygems_version: 3.4.13
144
146
  signing_key:
145
147
  specification_version: 4
146
148
  summary: SAX/Stream style parse helpers