src_lexer 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- YWNmNzQ2N2JjN2UyNTVhM2UwZTE3ZmVkMWVlZjE4OTJiZGRkNGUwNg==
4
+ N2I2ZmY3ZjlhNjU3MTBiYzE1ZmMzMTYwMjk0MzEwNTU3MmE3NWVlYw==
5
5
  data.tar.gz: !binary |-
6
- ZWFmYTU0NmVjYTkxZWI5NjNkNTQ3Zjc4ZjNlMTkzNjQwMTFkOWI2Mg==
6
+ ZmQ2YjRkOTVkODhmODEyZDA4OWY2Y2JhN2M5Yzc3YjkxNzJjYmMzNQ==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- MGY0M2JlNWZkNjY3ZTEzNjk5M2E4OTZlMjBhYmU4Mzg5OGI3ZGZjNWQyNTZm
10
- NzliYjU2OGFmNDk2OWJkZDY5ZGJlMmYzMTViMjhkYmMwMDk2NmUyYzBhYjdl
11
- ZDFmNmExOGZkNTA2YTY2NDVjOGI3YzVkZjlhYzI4ZDU1YzViMzI=
9
+ MTc4NmI5ZTM4MmZkNThlM2FiY2VkMDRiNTRjOWIzMzEzZjZlNzI2NGQ2NzU5
10
+ MTM5ZGQ3MTY5NmY5MzFiYjQ3NzNkNzhiMDY3NmM0MmY4ZDVlNDlmM2RiZmQy
11
+ NDdhYzk2YjI5MGU2MGQ3Yzc1YjQyYzVhYzE4MTJiMzAyYTI0Mzc=
12
12
  data.tar.gz: !binary |-
13
- ZmE5YWQ3YTViY2FmZWE0YTRlMWQxODhlZThkMDA0ZWExMDNiODQwNGI0ZTQ1
14
- ZDRkYzAyZTczZDhiNGJiZjVjYWNhMDI5M2Y5NDBlNmI3ZjBkZTUxODlkMzM4
15
- MDNiNGI3ZjBiZjFlM2ZmMmY4MWYxZTVjM2ZhMjEzNDIwNGM1ZGM=
13
+ NTcyOWE4NjlmODE2ODMxMWEwMzlmZTJmNmZkNWVhYTcwNDEyODIzZjkwYTE2
14
+ MDYyNjk5MzkxZGVkZTEzZDkzMmRhOTNlZjE2MjA3Y2UzNTkzNTc1N2JiZmRm
15
+ NTgxNjQ1MGNmOWQxZmE4ODRkYjAxODVlNTk3ZTQ4ZjNjNTJjODg=
@@ -1,3 +1,3 @@
1
1
  module SrcLexer
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/lib/src_lexer.rb CHANGED
@@ -18,13 +18,16 @@ module SrcLexer
18
18
 
19
19
  class Lexer
20
20
  END_TOKEN = [false, nil]
21
- attr_reader :keywords, :symbols, :line_comment_marker, :comment_markers, :tokens, :str
22
-
23
- def initialize(keywords, symbols, line_comment_marker, comment_marker)
24
- @keywords = ((keywords.nil?) ? [] : keywords.uniq.compact)
25
- @symbols = ((symbols.nil?) ? [] : symbols.uniq.compact)
26
- @line_comment_marker = ((line_comment_marker.nil?) ? '' : line_comment_marker)
27
- @comment_markers = ((comment_marker.nil?) ? ['', ''] : comment_marker)
21
+ NUMBER_REGEX = /^[\d]+[\.]?[\d]*\z/
22
+ STRING_REGEX = /^\"(.*)\"\z/m
23
+ attr_reader :keywords, :symbols, :string_literal_marker, :line_comment_marker, :comment_markers, :tokens, :str
24
+
25
+ def initialize(keywords, symbols, string_literal_marker, line_comment_marker, comment_markers)
26
+ @keywords = (keywords ? keywords.uniq.compact : [])
27
+ @symbols = (symbols ? symbols.uniq.compact : [])
28
+ @string_literal_marker = string_literal_marker
29
+ @line_comment_marker = line_comment_marker
30
+ @comment_markers = comment_markers
28
31
  end
29
32
 
30
33
  def analyze(str)
@@ -34,39 +37,51 @@ module SrcLexer
34
37
 
35
38
  def pop_token
36
39
  token = @tokens.shift
37
- if token.nil? then
38
- return END_TOKEN
39
- end
40
+ return END_TOKEN if token.nil?
40
41
  case token[0]
41
- when /^[\d]+[\.]?[\d]*\z/
42
+ when NUMBER_REGEX
42
43
  [:NUMBER, Token.new(token[0], token[1], token[2])]
43
- when /^\"(.*)\"\z/m
44
+ when STRING_REGEX
44
45
  [:STRING, Token.new(token[0], token[1], token[2])]
45
46
  else
46
- id = is_reserved?(token[0]) ? token[0] : :IDENT
47
- [id, Token.new(token[0], token[1], token[2])]
47
+ [is_reserved?(token[0]) ? token[0] : :IDENT, Token.new(token[0], token[1], token[2])]
48
48
  end
49
49
  end
50
50
 
51
51
  private
52
52
 
53
- class StringIterator
54
- attr_reader :index
53
+ class PosInfo
54
+ attr_accessor :index, :line_no, :char_no
55
+
56
+ def initialize
57
+ @index = 0
58
+ @line_no = 1
59
+ @char_no = 1
60
+ end
61
+ end
55
62
 
63
+ class StringIterator
56
64
  def initialize(str)
57
65
  @str = str
58
- @index = 0
59
- @marked_pos = -1
66
+ @current_pos = PosInfo.new
67
+ @marked_pos = PosInfo.new
68
+ mark_clear()
69
+ end
70
+
71
+ def mark_clear
72
+ @marked_pos.index = -1
73
+ @marked_pos.line_no = 0
74
+ @marked_pos.char_no = 0
60
75
  end
61
76
 
62
77
  def mark_set
63
- @marked_pos = @index
78
+ @marked_pos = @current_pos.clone
64
79
  end
65
80
 
66
81
  def is(target_string)
67
82
  return false if target_string.length.zero?
68
- end_pos = (@index + target_string.length - 1)
69
- @str[@index..end_pos] == target_string
83
+ end_pos = (@current_pos.index + target_string.length - 1)
84
+ @str[@current_pos.index..end_pos] == target_string
70
85
  end
71
86
 
72
87
  def is_in(target_list)
@@ -74,57 +89,51 @@ module SrcLexer
74
89
  end
75
90
 
76
91
  def move_next
77
- @index += 1
92
+ if /\n/.match @str[@current_pos.index]
93
+ @current_pos.line_no += 1
94
+ @current_pos.char_no = 1
95
+ else
96
+ @current_pos.char_no += 1
97
+ end
98
+ @current_pos.index += 1
78
99
  end
79
100
 
80
101
  def move_to_the_end_of_the_line
81
- @index += (@str[@index..-1] =~ /$/) - 1
102
+ char_count_to_the_end_of_the_line = (@str[@current_pos.index..-1] =~ /$/) - 1
103
+ @current_pos.index += char_count_to_the_end_of_the_line
104
+ @current_pos.char_no += char_count_to_the_end_of_the_line
82
105
  end
83
106
 
84
107
  def move_to(target)
85
- esceped_target = Regexp.escape(target)
86
- @index += (@str[@index..-1] =~ /#{esceped_target}/m) + target.length - 1
87
- end
88
-
89
- def [](range)
90
- @str[range]
91
- end
92
-
93
- def <(pos)
94
- @index < pos
108
+ char_count_to_target = (@str[@current_pos.index..-1] =~ /#{Regexp.escape(target)}/m) + target.length - 1
109
+ chopped_string = @str[@current_pos.index..@current_pos.index + char_count_to_target]
110
+ @current_pos.index += char_count_to_target
111
+ match = /.*\n(.*)$/m.match(chopped_string)
112
+ p match[1].length if match
113
+ if match
114
+ @current_pos.char_no = match[1].length
115
+ else
116
+ @current_pos.char_no += char_count_to_target
117
+ end
118
+ @current_pos.line_no += chopped_string.each_char.select{|char| /\n/.match char}.length
95
119
  end
96
120
 
97
- def char
98
- @str[@index]
121
+ def <(index)
122
+ @current_pos.index < index
99
123
  end
100
124
 
101
125
  def is_white_space
102
- /[\s]/.match(char)
103
- end
104
-
105
- def info(pos)
106
- [0, 0] if pos == 0
107
- line_no, char_no = 1, 0
108
- @str[0..pos].each_char do |char|
109
- if /\n/.match(char)
110
- line_no += 1
111
- char_no = 0
112
- else
113
- char_no += 1
114
- end
115
- end
116
- [line_no, char_no]
126
+ /\s/.match(@str[@current_pos.index])
117
127
  end
118
128
 
119
129
  def marked?
120
- @marked_pos != -1
130
+ @marked_pos.index != -1
121
131
  end
122
132
 
123
133
  def shift
124
- result = @str[@marked_pos..(@index - 1)]
125
- line_no_and_char_no = info(@marked_pos)
126
- @marked_pos = -1
127
- return result, *line_no_and_char_no
134
+ result = [@str[@marked_pos.index..(@current_pos.index - 1)], @marked_pos.line_no, @marked_pos.char_no]
135
+ mark_clear()
136
+ return result
128
137
  end
129
138
  end
130
139
 
@@ -135,31 +144,33 @@ module SrcLexer
135
144
  while iterator < @str.length do
136
145
  if iterator.is_white_space then
137
146
  @tokens.push iterator.shift if iterator.marked?
138
- elsif iterator.is(@line_comment_marker) then
147
+ iterator.move_next
148
+ elsif @line_comment_marker && iterator.is(@line_comment_marker) then
139
149
  @tokens.push iterator.shift if iterator.marked?
140
150
  iterator.move_to_the_end_of_the_line
141
- elsif iterator.is(@comment_markers[0]) then
151
+ iterator.move_next
152
+ elsif @comment_markers && iterator.is(@comment_markers[0]) then
142
153
  @tokens.push iterator.shift if iterator.marked?
143
154
  iterator.move_to(@comment_markers[1])
144
- elsif iterator.is('"') then
155
+ iterator.move_next
156
+ elsif @string_literal_marker && iterator.is(@string_literal_marker[0]) then
145
157
  @tokens.push iterator.shift if iterator.marked?
146
158
  iterator.mark_set
147
159
  iterator.move_next
148
- iterator.move_to('"')
160
+ iterator.move_to(@string_literal_marker[1])
149
161
  iterator.move_next
150
162
  @tokens.push iterator.shift
151
- next
152
163
  elsif iterator.is_in(@symbols) then
153
164
  @tokens.push iterator.shift if iterator.marked?
154
- symbol = @symbols.find { |symbol| iterator.is(symbol) }
155
- @tokens.push [iterator[iterator.index..(iterator.index + symbol.length - 1)], *iterator.info(iterator.index)]
156
- (symbol.length - 1).times { iterator.move_next }
165
+ iterator.mark_set
166
+ @symbols.find { |symbol| iterator.is(symbol) }.length.times { iterator.move_next }
167
+ @tokens.push iterator.shift
157
168
  elsif !iterator.marked? then
158
169
  iterator.mark_set
170
+ else
171
+ iterator.move_next
159
172
  end
160
- iterator.move_next
161
173
  end
162
-
163
174
  @tokens.push iterator.shift if iterator.marked?
164
175
  end
165
176
 
@@ -167,4 +178,47 @@ module SrcLexer
167
178
  @keywords.include?(token) || @symbols.include?(token)
168
179
  end
169
180
  end
181
+
182
+ class CSharpLexer < Lexer
183
+ def initialize
184
+ super(
185
+ [ # C# keywords
186
+ 'abstract', 'as', 'base', 'bool', 'break',
187
+ 'byte', 'case', 'catch', 'char', 'checked',
188
+ 'class', 'const', 'continue', 'decimal', 'default',
189
+ 'delegate', 'do', 'double', 'else', 'enum',
190
+ 'event', 'explicit', 'extern', 'false', 'finally',
191
+ 'fixed', 'float', 'for', 'foreach', 'goto',
192
+ 'if', 'implicit', 'in', 'int', 'interface',
193
+ 'internal', 'is', 'lock', 'long', 'namespace',
194
+ 'new', 'null', 'object', 'operator', 'out',
195
+ 'override', 'params', 'private', 'protected', 'public',
196
+ 'readonly', 'ref', 'return', 'sbyte', 'sealed',
197
+ 'short', 'sizeof', 'stackalloc', 'static', 'string',
198
+ 'struct', 'switch', 'this', 'throw', 'true',
199
+ 'try', 'typeof', 'uint', 'ulong', 'unchecked',
200
+ 'unsafe', 'ushort', 'using', 'virtual', 'void',
201
+ 'volatile', 'while',
202
+ # C# context keywords
203
+ 'add', 'alias', 'ascending', 'async', 'await',
204
+ 'descending', 'dynamic', 'from', 'get', 'global',
205
+ 'group', 'into', 'join', 'let', 'orderby',
206
+ 'partial', 'remove', 'select', 'set', 'value',
207
+ 'var', 'where', 'yield'
208
+ ],
209
+ [
210
+ '<<=', '>>=', '<<', '>>', '<=',
211
+ '>=', '==', '!=', '&&', '||',
212
+ '??', '+=', '-=', '*=', '/=',
213
+ '%=', '&=', '|=', '^=', '=>',
214
+ '*', '/', '%', '+', '-',
215
+ '<', '>', '&', '^', '|',
216
+ '?', ':', '=', '{', '}',
217
+ '(', ')', '[', ']', ';'
218
+ ],
219
+ ['"', '"'], # comment markers
220
+ '//', # line comment marker
221
+ ['/*', '*/']) # multi line comment markers
222
+ end
223
+ end
170
224
  end
@@ -9,7 +9,7 @@ end
9
9
 
10
10
  describe SrcLexer::Lexer, 'with empty string' do
11
11
  it 'should return Lexer::END_TOKEN' do
12
- sut = SrcLexer::Lexer.new(nil, nil, nil, nil)
12
+ sut = SrcLexer::Lexer.new(nil, nil, nil, nil, nil)
13
13
  sut.analyze('')
14
14
  sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
15
15
  end
@@ -17,7 +17,7 @@ end
17
17
 
18
18
  describe SrcLexer::Lexer, 'with keyword definitions' do
19
19
  it 'should recognize keywords' do
20
- sut = SrcLexer::Lexer.new(['struct', 'enum'], nil, nil, nil)
20
+ sut = SrcLexer::Lexer.new(['struct', 'enum'], nil, nil, nil, nil)
21
21
  sut.analyze('struct structenum enum')
22
22
  sut.pop_token.should == ['struct', SrcLexer::Token.new('struct', 1, 1)]
23
23
  sut.pop_token.should == [:IDENT, SrcLexer::Token.new('structenum', 1, 8)]
@@ -25,18 +25,18 @@ describe SrcLexer::Lexer, 'with keyword definitions' do
25
25
  sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
26
26
  end
27
27
  it 'should reduce keyword duplication' do
28
- sut = SrcLexer::Lexer.new(['struct', 'struct'], nil, nil, nil)
28
+ sut = SrcLexer::Lexer.new(['struct', 'struct'], nil, nil, nil, nil)
29
29
  sut.keywords.should == ['struct']
30
30
  end
31
31
  it 'should ignore nil keyword' do
32
- sut = SrcLexer::Lexer.new(['struct', nil, 'enum'], nil, nil, nil)
32
+ sut = SrcLexer::Lexer.new(['struct', nil, 'enum'], nil, nil, nil, nil)
33
33
  sut.keywords.should == ['struct', 'enum']
34
34
  end
35
35
  end
36
36
 
37
37
  describe SrcLexer::Lexer, 'with symbol definitions' do
38
38
  it 'should recognize symbols' do
39
- sut = SrcLexer::Lexer.new(nil, ['..', ','], nil, nil)
39
+ sut = SrcLexer::Lexer.new(nil, ['..', ','], nil, nil, nil)
40
40
  sut.analyze('.. A ,')
41
41
  sut.pop_token.should == ['..', SrcLexer::Token.new('..', 1, 1)]
42
42
  sut.pop_token.should == [:IDENT, SrcLexer::Token.new('A', 1, 4)]
@@ -44,7 +44,7 @@ describe SrcLexer::Lexer, 'with symbol definitions' do
44
44
  sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
45
45
  end
46
46
  it 'should recognize symbols(,) if continues like "A,B"' do
47
- sut = SrcLexer::Lexer.new(nil, [','], nil, nil)
47
+ sut = SrcLexer::Lexer.new(nil, [','], nil, nil, nil)
48
48
  sut.analyze('A,B')
49
49
  sut.pop_token.should == [:IDENT, SrcLexer::Token.new('A', 1, 1)]
50
50
  sut.pop_token.should == [',', SrcLexer::Token.new(',', 1, 2)]
@@ -52,18 +52,18 @@ describe SrcLexer::Lexer, 'with symbol definitions' do
52
52
  sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
53
53
  end
54
54
  it 'should reduce symbol duplication' do
55
- sut = SrcLexer::Lexer.new(nil, [',', ','], nil, nil)
55
+ sut = SrcLexer::Lexer.new(nil, [',', ','], nil, nil, nil)
56
56
  sut.symbols.should == [',']
57
57
  end
58
58
  it 'should ignore nil keyword' do
59
- sut = SrcLexer::Lexer.new(nil, ['{', nil, '}'], nil, nil)
59
+ sut = SrcLexer::Lexer.new(nil, ['{', nil, '}'], nil, nil, nil)
60
60
  sut.symbols.should == ['{', '}']
61
61
  end
62
62
  end
63
63
 
64
64
  describe SrcLexer::Lexer, 'with line comment marker' do
65
65
  it 'should recognize line comment' do
66
- sut = SrcLexer::Lexer.new(nil, nil, '//', nil)
66
+ sut = SrcLexer::Lexer.new(nil, nil, nil, '//', nil)
67
67
  sut.analyze(<<-'EOS')
68
68
  A//comment
69
69
  B
@@ -73,7 +73,7 @@ describe SrcLexer::Lexer, 'with line comment marker' do
73
73
  sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
74
74
  end
75
75
  it 'should recognize multi line comment' do
76
- sut = SrcLexer::Lexer.new(nil, nil, '//', ['/*', '*/'])
76
+ sut = SrcLexer::Lexer.new(nil, nil, nil, '//', ['/*', '*/'])
77
77
  sut.analyze(<<-'EOS')
78
78
  A/*comment
79
79
  B//still in comment*/C
@@ -86,14 +86,14 @@ end
86
86
 
87
87
  describe SrcLexer::Lexer do
88
88
  it 'should analyze number string' do
89
- sut = SrcLexer::Lexer.new(nil, nil, nil, nil)
89
+ sut = SrcLexer::Lexer.new(nil, nil, nil, nil, nil)
90
90
  sut.analyze('9 1.5')
91
91
  sut.pop_token.should == [:NUMBER, SrcLexer::Token.new("9", 1, 1,)]
92
92
  sut.pop_token.should == [:NUMBER, SrcLexer::Token.new("1.5", 1, 3)]
93
93
  sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
94
94
  end
95
95
  it 'should analyze string literal' do
96
- sut = SrcLexer::Lexer.new(nil, nil, '//', ['/*', '*/'])
96
+ sut = SrcLexer::Lexer.new(nil, nil, ['"', '"'], '//', ['/*', '*/'])
97
97
  sut.analyze('A"//"B"/**/"C')
98
98
  sut.pop_token.should == [:IDENT, SrcLexer::Token.new('A', 1, 1)]
99
99
  sut.pop_token.should == [:STRING, SrcLexer::Token.new('"//"', 1, 2)]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: src_lexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - kkikzk
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-27 00:00:00.000000000 Z
11
+ date: 2014-04-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler