rLexer 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 9e539be93197746639346f23cbe8f1ef3ebf919be3936970071c0514cacbf311
4
+ data.tar.gz: abeab5b45dafbca62bacc24b02b6a4b262d84d02cbd4965c8f8bc93b0ce4b281
5
+ SHA512:
6
+ metadata.gz: 0152746babc3149ebc2f6efee3ec20f9753c0567a04e695b63d2326ab0318f7d32e0cb3a77f0bb341a7540210fcb57c5537434e96f7f054a7bd504eac393dc03
7
+ data.tar.gz: f5912634d0b97eafe9924bb5f50a0c1f72587f0e159999ceb0def40ef5a5488acf71373b8f866bb3febf68422e89ced67eab9e4daaeca2e85986fb673a75eddb
@@ -0,0 +1,12 @@
1
+
2
+ class Tags
3
+ IDENTIFIERS = [
4
+ OPEN_TAG = '<',
5
+ CLOSE_TAG = '>',
6
+ SELF_CLOSING_TAG = '/>',
7
+ CLOSING_TAG = '</',
8
+ START_COMMENT = '<!--',
9
+ END_COMMENT = '-->',
10
+ ATTRIBUTE_ASSIGNMENT = '='
11
+ ]
12
+ end
@@ -0,0 +1,150 @@
1
+ require_relative 'tags'
2
+
3
+ class Tokenizer
4
+ attr_accessor :html, :type, :Tags
5
+
6
+ def initialize(html)
7
+ @html = html.gsub('"', '\'')
8
+ @type = :EOF
9
+ @tokens = []
10
+ end
11
+
12
+ def tokenize
13
+ @html.each_char.with_index do |ch, idx|
14
+ comment_end(idx)
15
+ next if @type == :COMMENT
16
+ if open_tag?(ch) or close_tag?(ch)
17
+ process(idx)
18
+ end
19
+ end
20
+ consume_attributes
21
+ end
22
+
23
+ def process(idx)
24
+ set_type(idx); consume(idx)
25
+ end
26
+
27
+ def consume(idx)
28
+ if @type == :COMMENT
29
+ consume_comment(idx)
30
+ elsif @type == :OPEN or @type == :CLOSE
31
+ consume_tag(idx)
32
+ elsif @type == :DOCTYPE
33
+ #consume_doctype(idx)
34
+ elsif @type == :DATA
35
+ consume_data(idx)
36
+ end
37
+ end
38
+
39
+ def set_type(idx)
40
+ if comment_start?(idx)
41
+ @type = :COMMENT
42
+ elsif end_tag?(idx)
43
+ @type = :CLOSE
44
+ elsif doctype?(idx)
45
+ @type = :DOCTYPE
46
+ elsif close_tag?(current_char(idx)) or comment_end?(idx)
47
+ @type = :DATA
48
+ elsif open_tag?(current_char(idx))
49
+ @type = :OPEN
50
+ end
51
+ end
52
+
53
+ def set_token(slice)
54
+ @tokens.push([@type, slice])
55
+ end
56
+
57
+ def consume_comment(idx)
58
+ slice = @html[idx..-1]
59
+ slice = slice[Tags::START_COMMENT.length..end_comment_index(slice)]
60
+ set_token(slice)
61
+ end
62
+
63
+ def consume_tag(idx)
64
+ slice = @html[idx..-1]
65
+ slice = slice[tag_index(slice)..slice.index(Tags::CLOSE_TAG) -1]
66
+ set_token(slice)
67
+ end
68
+
69
+ def consume_attributes
70
+ atts_new = []
71
+ @tokens.each.with_index do |token, i|
72
+ atts = token[1].split(' ')[1..-1]
73
+ if token[0] == :OPEN and !atts[0].nil?
74
+ atts_new.push([i, atts.join(' ').split("' ")])
75
+ end
76
+ @tokens[i][1] = @tokens[i][1].split(' ')[0] unless @tokens[i][0] == :COMMENT or @tokens[i][0] == :DATA
77
+ end
78
+ c = 1
79
+ atts_new.each.with_index do |x|
80
+ @tokens.insert(x[0] + c, [:ATTRIBUTES, x[1]])
81
+ c += 1
82
+ end
83
+ end
84
+
85
+ def consume_data(idx)
86
+ return if next_char?(idx)
87
+
88
+ slice = @html[idx..-1]
89
+ slice = slice[Tags::CLOSE_TAG.length..slice.index(Tags::OPEN_TAG) -1]
90
+ slice.strip!
91
+
92
+ set_token(slice) unless slice == ''
93
+ end
94
+
95
+ def current_char(idx)
96
+ @html[idx]
97
+ end
98
+
99
+ def end_comment_index(html)
100
+ idx = html.index(Tags::END_COMMENT)
101
+ (not idx.nil?) ? (idx + 2) - Tags::END_COMMENT.length : -1
102
+ end
103
+
104
+ def tag_index(html)
105
+ (@type == :OPEN) ? Tags::OPEN_TAG.length : Tags::CLOSING_TAG.length
106
+ end
107
+
108
+ def comment_end(idx)
109
+ return if not @type == :COMMENT
110
+ if comment_end?(idx)
111
+ set_type(idx)
112
+ end
113
+ end
114
+
115
+ def comment_end?(idx)
116
+ suitable?(idx, Tags::END_COMMENT)
117
+ end
118
+
119
+ def next_char?(idx)
120
+ @html[idx +1] == Tags::OPEN_TAG or @html[idx +1].nil?
121
+ end
122
+
123
+ def end_tag?(idx)
124
+ suitable?(idx, Tags::CLOSING_TAG)
125
+ end
126
+
127
+ def doctype?(idx)
128
+ false
129
+ end
130
+
131
+ def comment_start?(idx)
132
+ suitable?(idx, Tags::START_COMMENT)
133
+ end
134
+
135
+ def suitable?(idx, tag)
136
+ tag == @html.byteslice(idx, tag.length)
137
+ end
138
+
139
+ def open_tag?(char)
140
+ char == Tags::OPEN_TAG
141
+ end
142
+
143
+ def close_tag?(char)
144
+ char == Tags::CLOSE_TAG
145
+ end
146
+
147
+ def tokens
148
+ @tokens
149
+ end
150
+ end
data/lib/rLexer.rb ADDED
@@ -0,0 +1 @@
1
+ require_relative 'rLexer/tokenizer'
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rLexer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.12
5
+ platform: ruby
6
+ authors:
7
+ - Robert Holland
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-09-07 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
15
+ - rlexerdevelopment@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/rLexer.rb
21
+ - lib/rLexer/tags.rb
22
+ - lib/rLexer/tokenizer.rb
23
+ homepage: https://github.com/whollandr94/rLexer
24
+ licenses:
25
+ - MIT
26
+ metadata: {}
27
+ post_install_message:
28
+ rdoc_options: []
29
+ require_paths:
30
+ - lib
31
+ required_ruby_version: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - ">="
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ required_rubygems_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ requirements: []
42
+ rubygems_version: 3.1.2
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: A simple HTML lexer/tokenizer written in Ruby.
46
+ test_files: []