src_lexer 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/src_lexer/version.rb +1 -1
- data/lib/src_lexer.rb +119 -65
- data/spec/src_lexer_spec.rb +12 -12
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
N2I2ZmY3ZjlhNjU3MTBiYzE1ZmMzMTYwMjk0MzEwNTU3MmE3NWVlYw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZmQ2YjRkOTVkODhmODEyZDA4OWY2Y2JhN2M5Yzc3YjkxNzJjYmMzNQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MTc4NmI5ZTM4MmZkNThlM2FiY2VkMDRiNTRjOWIzMzEzZjZlNzI2NGQ2NzU5
|
10
|
+
MTM5ZGQ3MTY5NmY5MzFiYjQ3NzNkNzhiMDY3NmM0MmY4ZDVlNDlmM2RiZmQy
|
11
|
+
NDdhYzk2YjI5MGU2MGQ3Yzc1YjQyYzVhYzE4MTJiMzAyYTI0Mzc=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NTcyOWE4NjlmODE2ODMxMWEwMzlmZTJmNmZkNWVhYTcwNDEyODIzZjkwYTE2
|
14
|
+
MDYyNjk5MzkxZGVkZTEzZDkzMmRhOTNlZjE2MjA3Y2UzNTkzNTc1N2JiZmRm
|
15
|
+
NTgxNjQ1MGNmOWQxZmE4ODRkYjAxODVlNTk3ZTQ4ZjNjNTJjODg=
|
data/lib/src_lexer/version.rb
CHANGED
data/lib/src_lexer.rb
CHANGED
@@ -18,13 +18,16 @@ module SrcLexer
|
|
18
18
|
|
19
19
|
class Lexer
|
20
20
|
END_TOKEN = [false, nil]
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
@
|
27
|
-
@
|
21
|
+
NUMBER_REGEX = /^[\d]+[\.]?[\d]*\z/
|
22
|
+
STRING_REGEX = /^\"(.*)\"\z/m
|
23
|
+
attr_reader :keywords, :symbols, :string_literal_marker, :line_comment_marker, :comment_markers, :tokens, :str
|
24
|
+
|
25
|
+
def initialize(keywords, symbols, string_literal_marker, line_comment_marker, comment_markers)
|
26
|
+
@keywords = (keywords ? keywords.uniq.compact : [])
|
27
|
+
@symbols = (symbols ? symbols.uniq.compact : [])
|
28
|
+
@string_literal_marker = string_literal_marker
|
29
|
+
@line_comment_marker = line_comment_marker
|
30
|
+
@comment_markers = comment_markers
|
28
31
|
end
|
29
32
|
|
30
33
|
def analyze(str)
|
@@ -34,39 +37,51 @@ module SrcLexer
|
|
34
37
|
|
35
38
|
def pop_token
|
36
39
|
token = @tokens.shift
|
37
|
-
if token.nil?
|
38
|
-
return END_TOKEN
|
39
|
-
end
|
40
|
+
return END_TOKEN if token.nil?
|
40
41
|
case token[0]
|
41
|
-
when
|
42
|
+
when NUMBER_REGEX
|
42
43
|
[:NUMBER, Token.new(token[0], token[1], token[2])]
|
43
|
-
when
|
44
|
+
when STRING_REGEX
|
44
45
|
[:STRING, Token.new(token[0], token[1], token[2])]
|
45
46
|
else
|
46
|
-
|
47
|
-
[id, Token.new(token[0], token[1], token[2])]
|
47
|
+
[is_reserved?(token[0]) ? token[0] : :IDENT, Token.new(token[0], token[1], token[2])]
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
51
|
private
|
52
52
|
|
53
|
-
class
|
54
|
-
|
53
|
+
class PosInfo
|
54
|
+
attr_accessor :index, :line_no, :char_no
|
55
|
+
|
56
|
+
def initialize
|
57
|
+
@index = 0
|
58
|
+
@line_no = 1
|
59
|
+
@char_no = 1
|
60
|
+
end
|
61
|
+
end
|
55
62
|
|
63
|
+
class StringIterator
|
56
64
|
def initialize(str)
|
57
65
|
@str = str
|
58
|
-
@
|
59
|
-
@marked_pos =
|
66
|
+
@current_pos = PosInfo.new
|
67
|
+
@marked_pos = PosInfo.new
|
68
|
+
mark_clear()
|
69
|
+
end
|
70
|
+
|
71
|
+
def mark_clear
|
72
|
+
@marked_pos.index = -1
|
73
|
+
@marked_pos.line_no = 0
|
74
|
+
@marked_pos.char_no = 0
|
60
75
|
end
|
61
76
|
|
62
77
|
def mark_set
|
63
|
-
@marked_pos = @
|
78
|
+
@marked_pos = @current_pos.clone
|
64
79
|
end
|
65
80
|
|
66
81
|
def is(target_string)
|
67
82
|
return false if target_string.length.zero?
|
68
|
-
end_pos = (@index + target_string.length - 1)
|
69
|
-
@str[@index..end_pos] == target_string
|
83
|
+
end_pos = (@current_pos.index + target_string.length - 1)
|
84
|
+
@str[@current_pos.index..end_pos] == target_string
|
70
85
|
end
|
71
86
|
|
72
87
|
def is_in(target_list)
|
@@ -74,57 +89,51 @@ module SrcLexer
|
|
74
89
|
end
|
75
90
|
|
76
91
|
def move_next
|
77
|
-
@index
|
92
|
+
if /\n/.match @str[@current_pos.index]
|
93
|
+
@current_pos.line_no += 1
|
94
|
+
@current_pos.char_no = 1
|
95
|
+
else
|
96
|
+
@current_pos.char_no += 1
|
97
|
+
end
|
98
|
+
@current_pos.index += 1
|
78
99
|
end
|
79
100
|
|
80
101
|
def move_to_the_end_of_the_line
|
81
|
-
|
102
|
+
char_count_to_the_end_of_the_line = (@str[@current_pos.index..-1] =~ /$/) - 1
|
103
|
+
@current_pos.index += char_count_to_the_end_of_the_line
|
104
|
+
@current_pos.char_no += char_count_to_the_end_of_the_line
|
82
105
|
end
|
83
106
|
|
84
107
|
def move_to(target)
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
108
|
+
char_count_to_target = (@str[@current_pos.index..-1] =~ /#{Regexp.escape(target)}/m) + target.length - 1
|
109
|
+
chopped_string = @str[@current_pos.index..@current_pos.index + char_count_to_target]
|
110
|
+
@current_pos.index += char_count_to_target
|
111
|
+
match = /.*\n(.*)$/m.match(chopped_string)
|
112
|
+
p match[1].length if match
|
113
|
+
if match
|
114
|
+
@current_pos.char_no = match[1].length
|
115
|
+
else
|
116
|
+
@current_pos.char_no += char_count_to_target
|
117
|
+
end
|
118
|
+
@current_pos.line_no += chopped_string.each_char.select{|char| /\n/.match char}.length
|
95
119
|
end
|
96
120
|
|
97
|
-
def
|
98
|
-
@
|
121
|
+
def <(index)
|
122
|
+
@current_pos.index < index
|
99
123
|
end
|
100
124
|
|
101
125
|
def is_white_space
|
102
|
-
|
103
|
-
end
|
104
|
-
|
105
|
-
def info(pos)
|
106
|
-
[0, 0] if pos == 0
|
107
|
-
line_no, char_no = 1, 0
|
108
|
-
@str[0..pos].each_char do |char|
|
109
|
-
if /\n/.match(char)
|
110
|
-
line_no += 1
|
111
|
-
char_no = 0
|
112
|
-
else
|
113
|
-
char_no += 1
|
114
|
-
end
|
115
|
-
end
|
116
|
-
[line_no, char_no]
|
126
|
+
/\s/.match(@str[@current_pos.index])
|
117
127
|
end
|
118
128
|
|
119
129
|
def marked?
|
120
|
-
@marked_pos != -1
|
130
|
+
@marked_pos.index != -1
|
121
131
|
end
|
122
132
|
|
123
133
|
def shift
|
124
|
-
result = @str[@marked_pos..(@index - 1)]
|
125
|
-
|
126
|
-
|
127
|
-
return result, *line_no_and_char_no
|
134
|
+
result = [@str[@marked_pos.index..(@current_pos.index - 1)], @marked_pos.line_no, @marked_pos.char_no]
|
135
|
+
mark_clear()
|
136
|
+
return result
|
128
137
|
end
|
129
138
|
end
|
130
139
|
|
@@ -135,31 +144,33 @@ module SrcLexer
|
|
135
144
|
while iterator < @str.length do
|
136
145
|
if iterator.is_white_space then
|
137
146
|
@tokens.push iterator.shift if iterator.marked?
|
138
|
-
|
147
|
+
iterator.move_next
|
148
|
+
elsif @line_comment_marker && iterator.is(@line_comment_marker) then
|
139
149
|
@tokens.push iterator.shift if iterator.marked?
|
140
150
|
iterator.move_to_the_end_of_the_line
|
141
|
-
|
151
|
+
iterator.move_next
|
152
|
+
elsif @comment_markers && iterator.is(@comment_markers[0]) then
|
142
153
|
@tokens.push iterator.shift if iterator.marked?
|
143
154
|
iterator.move_to(@comment_markers[1])
|
144
|
-
|
155
|
+
iterator.move_next
|
156
|
+
elsif @string_literal_marker && iterator.is(@string_literal_marker[0]) then
|
145
157
|
@tokens.push iterator.shift if iterator.marked?
|
146
158
|
iterator.mark_set
|
147
159
|
iterator.move_next
|
148
|
-
iterator.move_to(
|
160
|
+
iterator.move_to(@string_literal_marker[1])
|
149
161
|
iterator.move_next
|
150
162
|
@tokens.push iterator.shift
|
151
|
-
next
|
152
163
|
elsif iterator.is_in(@symbols) then
|
153
164
|
@tokens.push iterator.shift if iterator.marked?
|
154
|
-
|
155
|
-
@
|
156
|
-
|
165
|
+
iterator.mark_set
|
166
|
+
@symbols.find { |symbol| iterator.is(symbol) }.length.times { iterator.move_next }
|
167
|
+
@tokens.push iterator.shift
|
157
168
|
elsif !iterator.marked? then
|
158
169
|
iterator.mark_set
|
170
|
+
else
|
171
|
+
iterator.move_next
|
159
172
|
end
|
160
|
-
iterator.move_next
|
161
173
|
end
|
162
|
-
|
163
174
|
@tokens.push iterator.shift if iterator.marked?
|
164
175
|
end
|
165
176
|
|
@@ -167,4 +178,47 @@ module SrcLexer
|
|
167
178
|
@keywords.include?(token) || @symbols.include?(token)
|
168
179
|
end
|
169
180
|
end
|
181
|
+
|
182
|
+
class CSharpLexer < Lexer
|
183
|
+
def initialize
|
184
|
+
super(
|
185
|
+
[ # C# keywords
|
186
|
+
'abstract', 'as', 'base', 'bool', 'break',
|
187
|
+
'byte', 'case', 'catch', 'char', 'checked',
|
188
|
+
'class', 'const', 'continue', 'decimal', 'default',
|
189
|
+
'delegate', 'do', 'double', 'else', 'enum',
|
190
|
+
'event', 'explicit', 'extern', 'false', 'finally',
|
191
|
+
'fixed', 'float', 'for', 'foreach', 'goto',
|
192
|
+
'if', 'implicit', 'in', 'int', 'interface',
|
193
|
+
'internal', 'is', 'lock', 'long', 'namespace',
|
194
|
+
'new', 'null', 'object', 'operator', 'out',
|
195
|
+
'override', 'params', 'private', 'protected', 'public',
|
196
|
+
'readonly', 'ref', 'return', 'sbyte', 'sealed',
|
197
|
+
'short', 'sizeof', 'stackalloc', 'static', 'string',
|
198
|
+
'struct', 'switch', 'this', 'throw', 'true',
|
199
|
+
'try', 'typeof', 'uint', 'ulong', 'unchecked',
|
200
|
+
'unsafe', 'ushort', 'using', 'virtual', 'void',
|
201
|
+
'volatile', 'while',
|
202
|
+
# C# context keywords
|
203
|
+
'add', 'alias', 'ascending', 'async', 'await',
|
204
|
+
'descending', 'dynamic', 'from', 'get', 'global',
|
205
|
+
'group', 'into', 'join', 'let', 'orderby',
|
206
|
+
'partial', 'remove', 'select', 'set', 'value',
|
207
|
+
'var', 'where', 'yield'
|
208
|
+
],
|
209
|
+
[
|
210
|
+
'<<=', '>>=', '<<', '>>', '<=',
|
211
|
+
'>=', '==', '!=', '&&', '||',
|
212
|
+
'??', '+=', '-=', '*=', '/=',
|
213
|
+
'%=', '&=', '|=', '^=', '=>',
|
214
|
+
'*', '/', '%', '+', '-',
|
215
|
+
'<', '>', '&', '^', '|',
|
216
|
+
'?', ':', '=', '{', '}',
|
217
|
+
'(', ')', '[', ']', ';'
|
218
|
+
],
|
219
|
+
['"', '"'], # comment markers
|
220
|
+
'//', # line comment marker
|
221
|
+
['/*', '*/']) # multi line comment markers
|
222
|
+
end
|
223
|
+
end
|
170
224
|
end
|
data/spec/src_lexer_spec.rb
CHANGED
@@ -9,7 +9,7 @@ end
|
|
9
9
|
|
10
10
|
describe SrcLexer::Lexer, 'with empty string' do
|
11
11
|
it 'should return Lexer::END_TOKEN' do
|
12
|
-
sut = SrcLexer::Lexer.new(nil, nil, nil, nil)
|
12
|
+
sut = SrcLexer::Lexer.new(nil, nil, nil, nil, nil)
|
13
13
|
sut.analyze('')
|
14
14
|
sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
|
15
15
|
end
|
@@ -17,7 +17,7 @@ end
|
|
17
17
|
|
18
18
|
describe SrcLexer::Lexer, 'with keyword definitions' do
|
19
19
|
it 'should recognize keywords' do
|
20
|
-
sut = SrcLexer::Lexer.new(['struct', 'enum'], nil, nil, nil)
|
20
|
+
sut = SrcLexer::Lexer.new(['struct', 'enum'], nil, nil, nil, nil)
|
21
21
|
sut.analyze('struct structenum enum')
|
22
22
|
sut.pop_token.should == ['struct', SrcLexer::Token.new('struct', 1, 1)]
|
23
23
|
sut.pop_token.should == [:IDENT, SrcLexer::Token.new('structenum', 1, 8)]
|
@@ -25,18 +25,18 @@ describe SrcLexer::Lexer, 'with keyword definitions' do
|
|
25
25
|
sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
|
26
26
|
end
|
27
27
|
it 'should reduce keyword duplication' do
|
28
|
-
sut = SrcLexer::Lexer.new(['struct', 'struct'], nil, nil, nil)
|
28
|
+
sut = SrcLexer::Lexer.new(['struct', 'struct'], nil, nil, nil, nil)
|
29
29
|
sut.keywords.should == ['struct']
|
30
30
|
end
|
31
31
|
it 'should ignore nil keyword' do
|
32
|
-
sut = SrcLexer::Lexer.new(['struct', nil, 'enum'], nil, nil, nil)
|
32
|
+
sut = SrcLexer::Lexer.new(['struct', nil, 'enum'], nil, nil, nil, nil)
|
33
33
|
sut.keywords.should == ['struct', 'enum']
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
37
|
describe SrcLexer::Lexer, 'with symbol definitions' do
|
38
38
|
it 'should recognize symbols' do
|
39
|
-
sut = SrcLexer::Lexer.new(nil, ['..', ','], nil, nil)
|
39
|
+
sut = SrcLexer::Lexer.new(nil, ['..', ','], nil, nil, nil)
|
40
40
|
sut.analyze('.. A ,')
|
41
41
|
sut.pop_token.should == ['..', SrcLexer::Token.new('..', 1, 1)]
|
42
42
|
sut.pop_token.should == [:IDENT, SrcLexer::Token.new('A', 1, 4)]
|
@@ -44,7 +44,7 @@ describe SrcLexer::Lexer, 'with symbol definitions' do
|
|
44
44
|
sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
|
45
45
|
end
|
46
46
|
it 'should recognize symbols(,) if continues like "A,B"' do
|
47
|
-
sut = SrcLexer::Lexer.new(nil, [','], nil, nil)
|
47
|
+
sut = SrcLexer::Lexer.new(nil, [','], nil, nil, nil)
|
48
48
|
sut.analyze('A,B')
|
49
49
|
sut.pop_token.should == [:IDENT, SrcLexer::Token.new('A', 1, 1)]
|
50
50
|
sut.pop_token.should == [',', SrcLexer::Token.new(',', 1, 2)]
|
@@ -52,18 +52,18 @@ describe SrcLexer::Lexer, 'with symbol definitions' do
|
|
52
52
|
sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
|
53
53
|
end
|
54
54
|
it 'should reduce symbol duplication' do
|
55
|
-
sut = SrcLexer::Lexer.new(nil, [',', ','], nil, nil)
|
55
|
+
sut = SrcLexer::Lexer.new(nil, [',', ','], nil, nil, nil)
|
56
56
|
sut.symbols.should == [',']
|
57
57
|
end
|
58
58
|
it 'should ignore nil keyword' do
|
59
|
-
sut = SrcLexer::Lexer.new(nil, ['{', nil, '}'], nil, nil)
|
59
|
+
sut = SrcLexer::Lexer.new(nil, ['{', nil, '}'], nil, nil, nil)
|
60
60
|
sut.symbols.should == ['{', '}']
|
61
61
|
end
|
62
62
|
end
|
63
63
|
|
64
64
|
describe SrcLexer::Lexer, 'with line comment marker' do
|
65
65
|
it 'should recognize line comment' do
|
66
|
-
sut = SrcLexer::Lexer.new(nil, nil, '//', nil)
|
66
|
+
sut = SrcLexer::Lexer.new(nil, nil, nil, '//', nil)
|
67
67
|
sut.analyze(<<-'EOS')
|
68
68
|
A//comment
|
69
69
|
B
|
@@ -73,7 +73,7 @@ describe SrcLexer::Lexer, 'with line comment marker' do
|
|
73
73
|
sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
|
74
74
|
end
|
75
75
|
it 'should recognize multi line comment' do
|
76
|
-
sut = SrcLexer::Lexer.new(nil, nil, '//', ['/*', '*/'])
|
76
|
+
sut = SrcLexer::Lexer.new(nil, nil, nil, '//', ['/*', '*/'])
|
77
77
|
sut.analyze(<<-'EOS')
|
78
78
|
A/*comment
|
79
79
|
B//still in comment*/C
|
@@ -86,14 +86,14 @@ end
|
|
86
86
|
|
87
87
|
describe SrcLexer::Lexer do
|
88
88
|
it 'should analyze number string' do
|
89
|
-
sut = SrcLexer::Lexer.new(nil, nil, nil, nil)
|
89
|
+
sut = SrcLexer::Lexer.new(nil, nil, nil, nil, nil)
|
90
90
|
sut.analyze('9 1.5')
|
91
91
|
sut.pop_token.should == [:NUMBER, SrcLexer::Token.new("9", 1, 1,)]
|
92
92
|
sut.pop_token.should == [:NUMBER, SrcLexer::Token.new("1.5", 1, 3)]
|
93
93
|
sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
|
94
94
|
end
|
95
95
|
it 'should analyze string literal' do
|
96
|
-
sut = SrcLexer::Lexer.new(nil, nil, '//', ['/*', '*/'])
|
96
|
+
sut = SrcLexer::Lexer.new(nil, nil, ['"', '"'], '//', ['/*', '*/'])
|
97
97
|
sut.analyze('A"//"B"/**/"C')
|
98
98
|
sut.pop_token.should == [:IDENT, SrcLexer::Token.new('A', 1, 1)]
|
99
99
|
sut.pop_token.should == [:STRING, SrcLexer::Token.new('"//"', 1, 2)]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: src_lexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kkikzk
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-04-
|
11
|
+
date: 2014-04-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|