rLexer 0.1.12 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rLexer.rb +1 -1
  3. data/lib/rLexer/tokenizer.rb +155 -148
  4. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9e539be93197746639346f23cbe8f1ef3ebf919be3936970071c0514cacbf311
4
- data.tar.gz: abeab5b45dafbca62bacc24b02b6a4b262d84d02cbd4965c8f8bc93b0ce4b281
3
+ metadata.gz: 491a367e5dcdfe6c436552318c5f728f941cbd7049ef42ff45dd911fe19ed9a7
4
+ data.tar.gz: 377026255c729caa2df62e7099d5efd80a5d38d3e866f41ac185527a37936ed3
5
5
  SHA512:
6
- metadata.gz: 0152746babc3149ebc2f6efee3ec20f9753c0567a04e695b63d2326ab0318f7d32e0cb3a77f0bb341a7540210fcb57c5537434e96f7f054a7bd504eac393dc03
7
- data.tar.gz: f5912634d0b97eafe9924bb5f50a0c1f72587f0e159999ceb0def40ef5a5488acf71373b8f866bb3febf68422e89ced67eab9e4daaeca2e85986fb673a75eddb
6
+ metadata.gz: 70c49109ede9cee10204532819bb70d02b56a68d851924017ac59f15f5273705cc11d902abf96a0d0e44874f4fcf92a6240e39330232e3f2ec920ab1988be5ea
7
+ data.tar.gz: dc62f40fcb624f4e57bc06fd8104692ac4e92fc693542b02dc1891f5e4f1f7eb42134f643831857bc68be7a144b1e6340809edf019f618c5f8a09e52e8f4d3d7
@@ -1 +1 @@
1
- require_relative 'rLexer/tokenizer'
1
+ require_relative "rLexer/tokenizer"
@@ -1,150 +1,157 @@
1
1
  require_relative 'tags'
2
2
 
3
- class Tokenizer
4
- attr_accessor :html, :type, :Tags
5
-
6
- def initialize(html)
7
- @html = html.gsub('"', '\'')
8
- @type = :EOF
9
- @tokens = []
10
- end
11
-
12
- def tokenize
13
- @html.each_char.with_index do |ch, idx|
14
- comment_end(idx)
15
- next if @type == :COMMENT
16
- if open_tag?(ch) or close_tag?(ch)
17
- process(idx)
18
- end
19
- end
20
- consume_attributes
21
- end
22
-
23
- def process(idx)
24
- set_type(idx); consume(idx)
25
- end
26
-
27
- def consume(idx)
28
- if @type == :COMMENT
29
- consume_comment(idx)
30
- elsif @type == :OPEN or @type == :CLOSE
31
- consume_tag(idx)
32
- elsif @type == :DOCTYPE
33
- #consume_doctype(idx)
34
- elsif @type == :DATA
35
- consume_data(idx)
36
- end
37
- end
38
-
39
- def set_type(idx)
40
- if comment_start?(idx)
41
- @type = :COMMENT
42
- elsif end_tag?(idx)
43
- @type = :CLOSE
44
- elsif doctype?(idx)
45
- @type = :DOCTYPE
46
- elsif close_tag?(current_char(idx)) or comment_end?(idx)
47
- @type = :DATA
48
- elsif open_tag?(current_char(idx))
49
- @type = :OPEN
50
- end
51
- end
52
-
53
- def set_token(slice)
54
- @tokens.push([@type, slice])
55
- end
56
-
57
- def consume_comment(idx)
58
- slice = @html[idx..-1]
59
- slice = slice[Tags::START_COMMENT.length..end_comment_index(slice)]
60
- set_token(slice)
61
- end
62
-
63
- def consume_tag(idx)
64
- slice = @html[idx..-1]
65
- slice = slice[tag_index(slice)..slice.index(Tags::CLOSE_TAG) -1]
66
- set_token(slice)
67
- end
68
-
69
- def consume_attributes
70
- atts_new = []
71
- @tokens.each.with_index do |token, i|
72
- atts = token[1].split(' ')[1..-1]
73
- if token[0] == :OPEN and !atts[0].nil?
74
- atts_new.push([i, atts.join(' ').split("' ")])
75
- end
76
- @tokens[i][1] = @tokens[i][1].split(' ')[0] unless @tokens[i][0] == :COMMENT or @tokens[i][0] == :DATA
77
- end
78
- c = 1
79
- atts_new.each.with_index do |x|
80
- @tokens.insert(x[0] + c, [:ATTRIBUTES, x[1]])
81
- c += 1
82
- end
83
- end
84
-
85
- def consume_data(idx)
86
- return if next_char?(idx)
87
-
88
- slice = @html[idx..-1]
89
- slice = slice[Tags::CLOSE_TAG.length..slice.index(Tags::OPEN_TAG) -1]
90
- slice.strip!
91
-
92
- set_token(slice) unless slice == ''
93
- end
94
-
95
- def current_char(idx)
96
- @html[idx]
97
- end
98
-
99
- def end_comment_index(html)
100
- idx = html.index(Tags::END_COMMENT)
101
- (not idx.nil?) ? (idx + 2) - Tags::END_COMMENT.length : -1
102
- end
103
-
104
- def tag_index(html)
105
- (@type == :OPEN) ? Tags::OPEN_TAG.length : Tags::CLOSING_TAG.length
106
- end
107
-
108
- def comment_end(idx)
109
- return if not @type == :COMMENT
110
- if comment_end?(idx)
111
- set_type(idx)
112
- end
113
- end
114
-
115
- def comment_end?(idx)
116
- suitable?(idx, Tags::END_COMMENT)
117
- end
118
-
119
- def next_char?(idx)
120
- @html[idx +1] == Tags::OPEN_TAG or @html[idx +1].nil?
121
- end
122
-
123
- def end_tag?(idx)
124
- suitable?(idx, Tags::CLOSING_TAG)
125
- end
126
-
127
- def doctype?(idx)
128
- false
129
- end
130
-
131
- def comment_start?(idx)
132
- suitable?(idx, Tags::START_COMMENT)
133
- end
134
-
135
- def suitable?(idx, tag)
136
- tag == @html.byteslice(idx, tag.length)
137
- end
138
-
139
- def open_tag?(char)
140
- char == Tags::OPEN_TAG
141
- end
142
-
143
- def close_tag?(char)
144
- char == Tags::CLOSE_TAG
145
- end
146
-
147
- def tokens
148
- @tokens
149
- end
150
- end
3
+ class Tokenizer
4
+ attr_accessor :html, :type, :Tags
5
+
6
+ def initialize(html)
7
+ @html = html.gsub('"', '\'')
8
+ @html.strip!
9
+ @type = :EOF
10
+ @tokens = []
11
+ end
12
+
13
+ def tokenize
14
+ @html.each_char.with_index do |ch, idx|
15
+ comment_end(idx)
16
+ next if @type == :COMMENT
17
+ if open_tag?(ch) or close_tag?(ch)
18
+ process(idx)
19
+ end
20
+ end
21
+ consume_attributes
22
+ end
23
+
24
+ def process(idx)
25
+ set_type(idx); consume(idx)
26
+ end
27
+
28
+ def consume(idx)
29
+ if @type == :COMMENT
30
+ consume_comment(idx)
31
+ elsif @type == :OPEN or @type == :CLOSE
32
+ consume_tag(idx)
33
+ elsif @type == :DOCTYPE
34
+ #consume_doctype(idx)
35
+ elsif @type == :DATA
36
+ consume_data(idx)
37
+ end
38
+ end
39
+
40
+ def set_type(idx)
41
+ if comment_start?(idx)
42
+ @type = :COMMENT
43
+ elsif end_tag?(idx)
44
+ @type = :CLOSE
45
+ elsif doctype?(idx)
46
+ @type = :DOCTYPE
47
+ elsif close_tag?(current_char(idx)) or comment_end?(idx)
48
+ @type = :DATA
49
+ elsif open_tag?(current_char(idx))
50
+ @type = :OPEN
51
+ end
52
+ end
53
+
54
+ def set_token(slice)
55
+ @tokens.push([@type, slice])
56
+ end
57
+
58
+ def consume_comment(idx)
59
+ slice = @html[idx..-1]
60
+ slice = slice[Tags::START_COMMENT.length..end_comment_index(slice)]
61
+ set_token(slice)
62
+ end
63
+
64
+ def consume_tag(idx)
65
+ slice = @html[idx..-1]
66
+
67
+ if slice.index(Tags::CLOSE_TAG).nil?
68
+ index = -1
69
+ else
70
+ index = slice.index(Tags::CLOSE_TAG) - 1
71
+ end
72
+
73
+ slice = slice[tag_index(slice)..index]
74
+ set_token(slice)
75
+ end
76
+
77
+ def consume_attributes
78
+ atts_new = []
79
+ @tokens.each.with_index do |token, i|
80
+ atts = token[1].split(' ')[1..-1]
81
+ if token[0] == :OPEN and !atts[0].nil?
82
+ atts_new.push([i, atts.join(' ').split("' ")])
83
+ end
84
+ @tokens[i][1] = @tokens[i][1].split(' ')[0] unless @tokens[i][0] == :COMMENT or @tokens[i][0] == :DATA
85
+ end
86
+ c = 1
87
+ atts_new.each.with_index do |x|
88
+ @tokens.insert(x[0] + c, [:ATTRIBUTES, x[1]])
89
+ c += 1
90
+ end
91
+ end
92
+
93
+ def consume_data(idx)
94
+ return if next_char?(idx)
95
+
96
+ slice = @html[idx..-1]
97
+ slice = slice[Tags::CLOSE_TAG.length..slice.index(Tags::OPEN_TAG) || slice.length]
98
+
99
+ set_token(slice) unless slice == ''
100
+ end
101
+
102
+ def current_char(idx)
103
+ @html[idx]
104
+ end
105
+
106
+ def end_comment_index(html)
107
+ idx = html.index(Tags::END_COMMENT)
108
+ (not idx.nil?) ? (idx + 2) - Tags::END_COMMENT.length : -1
109
+ end
110
+
111
+ def tag_index(html)
112
+ (@type == :OPEN) ? Tags::OPEN_TAG.length : Tags::CLOSING_TAG.length
113
+ end
114
+
115
+ def comment_end(idx)
116
+ return if not @type == :COMMENT
117
+ if comment_end?(idx)
118
+ set_type(idx)
119
+ end
120
+ end
121
+
122
+ def comment_end?(idx)
123
+ suitable?(idx, Tags::END_COMMENT)
124
+ end
125
+
126
+ def next_char?(idx)
127
+ @html[idx +1] == Tags::OPEN_TAG or @html[idx +1].nil?
128
+ end
129
+
130
+ def end_tag?(idx)
131
+ suitable?(idx, Tags::CLOSING_TAG)
132
+ end
133
+
134
+ def doctype?(idx)
135
+ false
136
+ end
137
+
138
+ def comment_start?(idx)
139
+ suitable?(idx, Tags::START_COMMENT)
140
+ end
141
+
142
+ def suitable?(idx, tag)
143
+ tag == @html.byteslice(idx, tag.length)
144
+ end
145
+
146
+ def open_tag?(char)
147
+ char == Tags::OPEN_TAG
148
+ end
149
+
150
+ def close_tag?(char)
151
+ char == Tags::CLOSE_TAG
152
+ end
153
+
154
+ def tokens
155
+ @tokens
156
+ end
157
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rLexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.12
4
+ version: 0.1.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert Holland
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-07 00:00:00.000000000 Z
11
+ date: 2020-09-08 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: