src_lexer 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/.gitignore +17 -17
- data/.rspec +2 -2
- data/.travis.yml +3 -3
- data/Gemfile +4 -4
- data/LICENSE.txt +22 -22
- data/README.md +84 -84
- data/Rakefile +6 -6
- data/bin/src_lexer +3 -3
- data/lib/src_lexer/version.rb +3 -3
- data/lib/src_lexer.rb +227 -227
- data/spec/spec_helper.rb +3 -3
- data/spec/src_lexer_spec.rb +115 -105
- data/src_lexer.gemspec +24 -24
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NjUyYWQ1MmYxNTM0ODdmM2JlNjA0MTJjZGVmZWI2NjMyZmFhNDlkMg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
NjZjYTZlZDgyOTFlNzZjZDE3Y2NjMDY5ZmQ5ZWY2NTJhN2EwODg1Nw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ODI2N2RkMGU1MWFiN2Y5MzY0MWJmNTRiNTMzNjA0ODdkOTk4NjczMTc3NzEy
|
10
|
+
YzM2NzVlNGVlMjg5MDJjOWZjMGVjYzc3OTY5YTVhNDIwYTUwYjBiNDlmN2E5
|
11
|
+
ZjU2OWIxY2RhZDIwYmIxNzc5ZDYwMzVkYTI4MWY3OGY2YThlZjc=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YjFiOWQ3MmExZTRjM2IyODI0MzAwZjE2YWFiM2JhZjlmY2FjN2FiMDkwMTYx
|
14
|
+
MWU3MTk4YjMyMjRhMDdjMjUxZThkODVmYjZhZmE0MjNkODFlMWM2MTUyMDFi
|
15
|
+
Y2ZmNWNlNzg5OTIxM2FjMmQyOTQ4YjI1ZmU4OGVlYWQxMjU5YzY=
|
data/.gitignore
CHANGED
@@ -1,17 +1,17 @@
|
|
1
|
-
*.gem
|
2
|
-
*.rbc
|
3
|
-
.bundle
|
4
|
-
.config
|
5
|
-
.yardoc
|
6
|
-
Gemfile.lock
|
7
|
-
InstalledFiles
|
8
|
-
_yardoc
|
9
|
-
coverage
|
10
|
-
doc/
|
11
|
-
lib/bundler/man
|
12
|
-
pkg
|
13
|
-
rdoc
|
14
|
-
spec/reports
|
15
|
-
test/tmp
|
16
|
-
test/version_tmp
|
17
|
-
tmp
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
9
|
+
coverage
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
12
|
+
pkg
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
data/.rspec
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
--format documentation
|
2
|
-
--color
|
1
|
+
--format documentation
|
2
|
+
--color
|
data/.travis.yml
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
language: ruby
|
2
|
-
rvm:
|
3
|
-
- 1.9.3
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 1.9.3
|
data/Gemfile
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
source 'https://rubygems.org'
|
2
|
-
|
3
|
-
# Specify your gem's dependencies in src_lexer.gemspec
|
4
|
-
gemspec
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
# Specify your gem's dependencies in src_lexer.gemspec
|
4
|
+
gemspec
|
data/LICENSE.txt
CHANGED
@@ -1,22 +1,22 @@
|
|
1
|
-
Copyright (c) 2014 kkikzk
|
2
|
-
|
3
|
-
MIT License
|
4
|
-
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
-
a copy of this software and associated documentation files (the
|
7
|
-
"Software"), to deal in the Software without restriction, including
|
8
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
-
permit persons to whom the Software is furnished to do so, subject to
|
11
|
-
the following conditions:
|
12
|
-
|
13
|
-
The above copyright notice and this permission notice shall be
|
14
|
-
included in all copies or substantial portions of the Software.
|
15
|
-
|
16
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
-
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
-
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
-
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
-
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
1
|
+
Copyright (c) 2014 kkikzk
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
CHANGED
@@ -1,84 +1,84 @@
|
|
1
|
-
# SrcLexer
|
2
|
-
|
3
|
-
SrcLexer is a simple source file lexer.
|
4
|
-
|
5
|
-
## Installation
|
6
|
-
|
7
|
-
Add this line to your application's Gemfile:
|
8
|
-
|
9
|
-
gem 'src_lexer'
|
10
|
-
|
11
|
-
And then execute:
|
12
|
-
|
13
|
-
$ bundle
|
14
|
-
|
15
|
-
Or install it yourself as:
|
16
|
-
|
17
|
-
$ gem install src_lexer
|
18
|
-
|
19
|
-
## Usage
|
20
|
-
|
21
|
-
lexer = SrcLexer::Lexer.new(
|
22
|
-
['struct', 'enum', 'true', 'false'], # kyewords
|
23
|
-
['{', '}', '(', ')', ',', '==', '=', ';'], # symbols
|
24
|
-
['"', '"'], # string literal markers
|
25
|
-
'//', # line comment marker
|
26
|
-
['/*', '*/'] # multi line comment markers
|
27
|
-
)
|
28
|
-
|
29
|
-
lexer.analyze(<<-'EOS')
|
30
|
-
// comment
|
31
|
-
enum ID {
|
32
|
-
First = 1,
|
33
|
-
Second = 1.5
|
34
|
-
}
|
35
|
-
/* comment
|
36
|
-
againe */
|
37
|
-
struct Data {
|
38
|
-
string name = "This is a name.";
|
39
|
-
ID id;
|
40
|
-
}
|
41
|
-
bool b = (true==false);
|
42
|
-
EOS
|
43
|
-
|
44
|
-
lexer.pop_token # => ['enum', SrcLexer::Token.new('enum', 2, 3)]
|
45
|
-
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('ID', 2, 8)]
|
46
|
-
lexer.pop_token # => ['{', SrcLexer::Token.new('{', 2, 11)]
|
47
|
-
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('First', 3, 5)]
|
48
|
-
lexer.pop_token # => ['=', SrcLexer::Token.new('=', 3, 11)]
|
49
|
-
lexer.pop_token # => [:NUMBER, SrcLexer::Token.new('1', 3, 13)]
|
50
|
-
lexer.pop_token # => [',', SrcLexer::Token.new(',', 3, 14)]
|
51
|
-
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('Second', 4, 5)]
|
52
|
-
lexer.pop_token # => ['=', SrcLexer::Token.new('=', 4, 12)]
|
53
|
-
lexer.pop_token # => [:NUMBER, SrcLexer::Token.new('1.5', 4, 14)]
|
54
|
-
lexer.pop_token # => ['}', SrcLexer::Token.new('}', 5, 3)]
|
55
|
-
lexer.pop_token # => ['struct', SrcLexer::Token.new('struct', 8, 3)]
|
56
|
-
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('Data', 8, 10)]
|
57
|
-
lexer.pop_token # => ['{', SrcLexer::Token.new('{', 8, 15)]
|
58
|
-
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('string', 9, 5)]
|
59
|
-
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('name', 9, 12)]
|
60
|
-
lexer.pop_token # => ['=', SrcLexer::Token.new('=', 9, 17)]
|
61
|
-
lexer.pop_token # => [:STRING, SrcLexer::Token.new('"This is a name."', 9, 19)]
|
62
|
-
lexer.pop_token # => [';', SrcLexer::Token.new(';', 9, 36)]
|
63
|
-
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('ID', 10, 5)]
|
64
|
-
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('id', 10, 8)]
|
65
|
-
lexer.pop_token # => [';', SrcLexer::Token.new(';', 10, 10)]
|
66
|
-
lexer.pop_token # => ['}', SrcLexer::Token.new('}', 11, 3)]
|
67
|
-
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('bool', 12, 3)]
|
68
|
-
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('b', 12, 8)]
|
69
|
-
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('=', 12, 10)]
|
70
|
-
lexer.pop_token # => ['(', SrcLexer::Token.new('(', 12, 12)]
|
71
|
-
lexer.pop_token # => ['true', SrcLexer::Token.new('true', 12, 13)]
|
72
|
-
lexer.pop_token # => ['==', SrcLexer::Token.new('==', 12, 17)]
|
73
|
-
lexer.pop_token # => ['false', SrcLexer::Token.new('false', 12, 19)]
|
74
|
-
lexer.pop_token # => [')', SrcLexer::Token.new(')', 12, 24)]
|
75
|
-
lexer.pop_token # => [';', SrcLexer::Token.new(';', 12, 25)]
|
76
|
-
lexer.pop_token # => SrcLexer::Lexer::END_TOKEN
|
77
|
-
|
78
|
-
## Contributing
|
79
|
-
|
80
|
-
1. Fork it ( http://github.com/<my-github-username>/src_lexer/fork )
|
81
|
-
2. Create your feature branch (`git checkout -b my-new-feature`)
|
82
|
-
3. Commit your changes (`git commit -am 'Add some feature'`)
|
83
|
-
4. Push to the branch (`git push origin my-new-feature`)
|
84
|
-
5. Create new Pull Request
|
1
|
+
# SrcLexer
|
2
|
+
|
3
|
+
SrcLexer is a simple source file lexer.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'src_lexer'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install src_lexer
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
lexer = SrcLexer::Lexer.new(
|
22
|
+
['struct', 'enum', 'true', 'false'], # kyewords
|
23
|
+
['{', '}', '(', ')', ',', '==', '=', ';'], # symbols
|
24
|
+
['"', '"'], # string literal markers
|
25
|
+
'//', # line comment marker
|
26
|
+
['/*', '*/'] # multi line comment markers
|
27
|
+
)
|
28
|
+
|
29
|
+
lexer.analyze(<<-'EOS')
|
30
|
+
// comment
|
31
|
+
enum ID {
|
32
|
+
First = 1,
|
33
|
+
Second = 1.5
|
34
|
+
}
|
35
|
+
/* comment
|
36
|
+
againe */
|
37
|
+
struct Data {
|
38
|
+
string name = "This is a name.";
|
39
|
+
ID id;
|
40
|
+
}
|
41
|
+
bool b = (true==false);
|
42
|
+
EOS
|
43
|
+
|
44
|
+
lexer.pop_token # => ['enum', SrcLexer::Token.new('enum', 2, 3)]
|
45
|
+
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('ID', 2, 8)]
|
46
|
+
lexer.pop_token # => ['{', SrcLexer::Token.new('{', 2, 11)]
|
47
|
+
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('First', 3, 5)]
|
48
|
+
lexer.pop_token # => ['=', SrcLexer::Token.new('=', 3, 11)]
|
49
|
+
lexer.pop_token # => [:NUMBER, SrcLexer::Token.new('1', 3, 13)]
|
50
|
+
lexer.pop_token # => [',', SrcLexer::Token.new(',', 3, 14)]
|
51
|
+
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('Second', 4, 5)]
|
52
|
+
lexer.pop_token # => ['=', SrcLexer::Token.new('=', 4, 12)]
|
53
|
+
lexer.pop_token # => [:NUMBER, SrcLexer::Token.new('1.5', 4, 14)]
|
54
|
+
lexer.pop_token # => ['}', SrcLexer::Token.new('}', 5, 3)]
|
55
|
+
lexer.pop_token # => ['struct', SrcLexer::Token.new('struct', 8, 3)]
|
56
|
+
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('Data', 8, 10)]
|
57
|
+
lexer.pop_token # => ['{', SrcLexer::Token.new('{', 8, 15)]
|
58
|
+
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('string', 9, 5)]
|
59
|
+
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('name', 9, 12)]
|
60
|
+
lexer.pop_token # => ['=', SrcLexer::Token.new('=', 9, 17)]
|
61
|
+
lexer.pop_token # => [:STRING, SrcLexer::Token.new('"This is a name."', 9, 19)]
|
62
|
+
lexer.pop_token # => [';', SrcLexer::Token.new(';', 9, 36)]
|
63
|
+
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('ID', 10, 5)]
|
64
|
+
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('id', 10, 8)]
|
65
|
+
lexer.pop_token # => [';', SrcLexer::Token.new(';', 10, 10)]
|
66
|
+
lexer.pop_token # => ['}', SrcLexer::Token.new('}', 11, 3)]
|
67
|
+
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('bool', 12, 3)]
|
68
|
+
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('b', 12, 8)]
|
69
|
+
lexer.pop_token # => [:IDENT, SrcLexer::Token.new('=', 12, 10)]
|
70
|
+
lexer.pop_token # => ['(', SrcLexer::Token.new('(', 12, 12)]
|
71
|
+
lexer.pop_token # => ['true', SrcLexer::Token.new('true', 12, 13)]
|
72
|
+
lexer.pop_token # => ['==', SrcLexer::Token.new('==', 12, 17)]
|
73
|
+
lexer.pop_token # => ['false', SrcLexer::Token.new('false', 12, 19)]
|
74
|
+
lexer.pop_token # => [')', SrcLexer::Token.new(')', 12, 24)]
|
75
|
+
lexer.pop_token # => [';', SrcLexer::Token.new(';', 12, 25)]
|
76
|
+
lexer.pop_token # => SrcLexer::Lexer::END_TOKEN
|
77
|
+
|
78
|
+
## Contributing
|
79
|
+
|
80
|
+
1. Fork it ( http://github.com/<my-github-username>/src_lexer/fork )
|
81
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
82
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
83
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
84
|
+
5. Create new Pull Request
|
data/Rakefile
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
require "bundler/gem_tasks"
|
2
|
-
require "rspec/core/rake_task"
|
3
|
-
|
4
|
-
RSpec::Core::RakeTask.new(:spec)
|
5
|
-
|
6
|
-
task :default => :spec
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rspec/core/rake_task"
|
3
|
+
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
|
6
|
+
task :default => :spec
|
data/bin/src_lexer
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'src_lexer'
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'src_lexer'
|
data/lib/src_lexer/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
module SrcLexer
|
2
|
-
VERSION = "1.0.
|
3
|
-
end
|
1
|
+
module SrcLexer
|
2
|
+
VERSION = "1.0.3"
|
3
|
+
end
|
data/lib/src_lexer.rb
CHANGED
@@ -1,227 +1,227 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
require "src_lexer/version"
|
3
|
-
|
4
|
-
module SrcLexer
|
5
|
-
class Token
|
6
|
-
attr_reader :str, :line_no, :char_no
|
7
|
-
|
8
|
-
def initialize(str, line_no, char_no)
|
9
|
-
@str = str
|
10
|
-
@line_no = line_no
|
11
|
-
@char_no = char_no
|
12
|
-
end
|
13
|
-
|
14
|
-
def ==(other_object)
|
15
|
-
@str == other_object.str && @line_no == other_object.line_no && @char_no == other_object.char_no
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
class Lexer
|
20
|
-
END_TOKEN = [false, nil]
|
21
|
-
NUMBER_REGEX = /^[\d]+[\.]?[\d]*\z/
|
22
|
-
STRING_REGEX = /^\"(.*)\"\z/m
|
23
|
-
attr_reader :keywords, :symbols, :string_literal_marker, :line_comment_marker, :comment_markers, :tokens, :str
|
24
|
-
|
25
|
-
def initialize(keywords, symbols, string_literal_marker, line_comment_marker, comment_markers)
|
26
|
-
@keywords = (keywords ? keywords.uniq.compact : [])
|
27
|
-
@symbols = (symbols ? symbols.uniq.compact : [])
|
28
|
-
@string_literal_marker = string_literal_marker
|
29
|
-
@line_comment_marker = line_comment_marker
|
30
|
-
@comment_markers = comment_markers
|
31
|
-
end
|
32
|
-
|
33
|
-
def analyze(str)
|
34
|
-
@str = str
|
35
|
-
tokenize
|
36
|
-
end
|
37
|
-
|
38
|
-
def pop_token
|
39
|
-
token = @tokens.shift
|
40
|
-
return END_TOKEN if token.nil?
|
41
|
-
case token[0]
|
42
|
-
when NUMBER_REGEX
|
43
|
-
[:NUMBER, Token.new(token[0], token[1], token[2])]
|
44
|
-
when STRING_REGEX
|
45
|
-
[:STRING, Token.new(token[0], token[1], token[2])]
|
46
|
-
else
|
47
|
-
[is_reserved?(token[0]) ? token[0] : :IDENT, Token.new(token[0], token[1], token[2])]
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
private
|
52
|
-
|
53
|
-
class PosInfo
|
54
|
-
attr_accessor :index, :line_no, :char_no
|
55
|
-
|
56
|
-
def initialize
|
57
|
-
@index = 0
|
58
|
-
@line_no = 1
|
59
|
-
@char_no = 1
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
class StringIterator
|
64
|
-
def initialize(str)
|
65
|
-
@str = str
|
66
|
-
@current_pos = PosInfo.new
|
67
|
-
@marked_pos = PosInfo.new
|
68
|
-
mark_clear()
|
69
|
-
end
|
70
|
-
|
71
|
-
def mark_clear
|
72
|
-
@marked_pos.index = -1
|
73
|
-
@marked_pos.line_no = 0
|
74
|
-
@marked_pos.char_no = 0
|
75
|
-
end
|
76
|
-
|
77
|
-
def mark_set
|
78
|
-
@marked_pos = @current_pos.clone
|
79
|
-
end
|
80
|
-
|
81
|
-
def is(target_string)
|
82
|
-
return false if target_string.length.zero?
|
83
|
-
end_pos = (@current_pos.index + target_string.length - 1)
|
84
|
-
@str[@current_pos.index..end_pos] == target_string
|
85
|
-
end
|
86
|
-
|
87
|
-
def is_in(target_list)
|
88
|
-
target_list.find { |target| is(target) } != nil
|
89
|
-
end
|
90
|
-
|
91
|
-
def move_next
|
92
|
-
if /\n/.match @str[@current_pos.index]
|
93
|
-
@current_pos.line_no += 1
|
94
|
-
@current_pos.char_no = 1
|
95
|
-
else
|
96
|
-
@current_pos.char_no += 1
|
97
|
-
end
|
98
|
-
@current_pos.index += 1
|
99
|
-
end
|
100
|
-
|
101
|
-
def move_to_the_end_of_the_line
|
102
|
-
char_count_to_the_end_of_the_line = (@str[@current_pos.index..-1] =~ /$/) - 1
|
103
|
-
@current_pos.index += char_count_to_the_end_of_the_line
|
104
|
-
@current_pos.char_no += char_count_to_the_end_of_the_line
|
105
|
-
end
|
106
|
-
|
107
|
-
def move_to(target)
|
108
|
-
char_count_to_target = (@str[@current_pos.index..-1] =~ /#{Regexp.escape(target)}/m) + target.length - 1
|
109
|
-
chopped_string = @str[@current_pos.index..@current_pos.index + char_count_to_target]
|
110
|
-
@current_pos.index += char_count_to_target
|
111
|
-
match = /.*\n(.*)$/m.match(chopped_string)
|
112
|
-
p match[1].length if match
|
113
|
-
if match
|
114
|
-
@current_pos.char_no = match[1].length
|
115
|
-
else
|
116
|
-
@current_pos.char_no += char_count_to_target
|
117
|
-
end
|
118
|
-
@current_pos.line_no += chopped_string.each_char.select{|char| /\n/.match char}.length
|
119
|
-
end
|
120
|
-
|
121
|
-
def <(index)
|
122
|
-
@current_pos.index < index
|
123
|
-
end
|
124
|
-
|
125
|
-
def is_white_space
|
126
|
-
/\s/.match(@str[@current_pos.index])
|
127
|
-
end
|
128
|
-
|
129
|
-
def marked?
|
130
|
-
@marked_pos.index != -1
|
131
|
-
end
|
132
|
-
|
133
|
-
def shift
|
134
|
-
result = [@str[@marked_pos.index..(@current_pos.index - 1)], @marked_pos.line_no, @marked_pos.char_no]
|
135
|
-
mark_clear()
|
136
|
-
return result
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
def tokenize()
|
141
|
-
@tokens = []
|
142
|
-
iterator = StringIterator.new(@str)
|
143
|
-
|
144
|
-
while iterator < @str.length do
|
145
|
-
if iterator.is_white_space then
|
146
|
-
@tokens.push iterator.shift if iterator.marked?
|
147
|
-
iterator.move_next
|
148
|
-
elsif @line_comment_marker && iterator.is(@line_comment_marker) then
|
149
|
-
@tokens.push iterator.shift if iterator.marked?
|
150
|
-
iterator.move_to_the_end_of_the_line
|
151
|
-
iterator.move_next
|
152
|
-
elsif @comment_markers && iterator.is(@comment_markers[0]) then
|
153
|
-
@tokens.push iterator.shift if iterator.marked?
|
154
|
-
iterator.move_to(@comment_markers[1])
|
155
|
-
iterator.move_next
|
156
|
-
elsif @string_literal_marker && iterator.is(@string_literal_marker[0]) then
|
157
|
-
@tokens.push iterator.shift if iterator.marked?
|
158
|
-
iterator.mark_set
|
159
|
-
iterator.move_next
|
160
|
-
iterator.move_to(@string_literal_marker[1])
|
161
|
-
iterator.move_next
|
162
|
-
@tokens.push iterator.shift
|
163
|
-
elsif iterator.is_in(@symbols) then
|
164
|
-
@tokens.push iterator.shift if iterator.marked?
|
165
|
-
iterator.mark_set
|
166
|
-
@symbols.find { |symbol| iterator.is(symbol) }.length.times { iterator.move_next }
|
167
|
-
@tokens.push iterator.shift
|
168
|
-
elsif !iterator.marked? then
|
169
|
-
iterator.mark_set
|
170
|
-
else
|
171
|
-
iterator.move_next
|
172
|
-
end
|
173
|
-
end
|
174
|
-
@tokens.push iterator.shift if iterator.marked?
|
175
|
-
|
176
|
-
return self
|
177
|
-
end
|
178
|
-
|
179
|
-
def is_reserved?(token)
|
180
|
-
@keywords.include?(token) || @symbols.include?(token)
|
181
|
-
end
|
182
|
-
end
|
183
|
-
|
184
|
-
class CSharpLexer < Lexer
|
185
|
-
def initialize
|
186
|
-
super(
|
187
|
-
[ # C# keywords
|
188
|
-
'abstract', 'as', 'base', 'bool', 'break',
|
189
|
-
'byte', 'case', 'catch', 'char', 'checked',
|
190
|
-
'class', 'const', 'continue', 'decimal', 'default',
|
191
|
-
'delegate', 'do', 'double', 'else', 'enum',
|
192
|
-
'event', 'explicit', 'extern', 'false', 'finally',
|
193
|
-
'fixed', 'float', 'for', 'foreach', 'goto',
|
194
|
-
'if', 'implicit', 'in', 'int', 'interface',
|
195
|
-
'internal', 'is', 'lock', 'long', 'namespace',
|
196
|
-
'new', 'null', 'object', 'operator', 'out',
|
197
|
-
'override', 'params', 'private', 'protected', 'public',
|
198
|
-
'readonly', 'ref', 'return', 'sbyte', 'sealed',
|
199
|
-
'short', 'sizeof', 'stackalloc', 'static', 'string',
|
200
|
-
'struct', 'switch', 'this', 'throw', 'true',
|
201
|
-
'try', 'typeof', 'uint', 'ulong', 'unchecked',
|
202
|
-
'unsafe', 'ushort', 'using', 'virtual', 'void',
|
203
|
-
'volatile', 'while',
|
204
|
-
# C# context keywords
|
205
|
-
'add', 'alias', 'ascending', 'async', 'await',
|
206
|
-
'descending', 'dynamic', 'from', 'get', 'global',
|
207
|
-
'group', 'into', 'join', 'let', 'orderby',
|
208
|
-
'partial', 'remove', 'select', 'set', 'value',
|
209
|
-
'var', 'where', 'yield'
|
210
|
-
],
|
211
|
-
[
|
212
|
-
'<<=', '>>=', '<<', '>>', '<=',
|
213
|
-
'>=', '==', '!=', '&&', '||',
|
214
|
-
'??', '+=', '-=', '*=', '/=',
|
215
|
-
'%=', '&=', '|=', '^=', '=>',
|
216
|
-
'*', '/', '%', '+', '-',
|
217
|
-
'<', '>', '&', '^', '|',
|
218
|
-
'?', ':', '=', '{', '}',
|
219
|
-
'(', ')', '[', ']', ';',
|
220
|
-
','
|
221
|
-
],
|
222
|
-
['"', '"'], # comment markers
|
223
|
-
'//', # line comment marker
|
224
|
-
['/*', '*/']) # multi line comment markers
|
225
|
-
end
|
226
|
-
end
|
227
|
-
end
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require "src_lexer/version"
|
3
|
+
|
4
|
+
module SrcLexer
|
5
|
+
class Token
|
6
|
+
attr_reader :str, :line_no, :char_no
|
7
|
+
|
8
|
+
def initialize(str, line_no, char_no)
|
9
|
+
@str = str
|
10
|
+
@line_no = line_no
|
11
|
+
@char_no = char_no
|
12
|
+
end
|
13
|
+
|
14
|
+
def ==(other_object)
|
15
|
+
@str == other_object.str && @line_no == other_object.line_no && @char_no == other_object.char_no
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class Lexer
|
20
|
+
END_TOKEN = [false, nil]
|
21
|
+
NUMBER_REGEX = /^[\d]+[\.]?[\d]*\z/
|
22
|
+
STRING_REGEX = /^\"(.*)\"\z/m
|
23
|
+
attr_reader :keywords, :symbols, :string_literal_marker, :line_comment_marker, :comment_markers, :tokens, :str
|
24
|
+
|
25
|
+
def initialize(keywords, symbols, string_literal_marker, line_comment_marker, comment_markers)
|
26
|
+
@keywords = (keywords ? keywords.uniq.compact : [])
|
27
|
+
@symbols = (symbols ? symbols.uniq.compact : [])
|
28
|
+
@string_literal_marker = string_literal_marker
|
29
|
+
@line_comment_marker = line_comment_marker
|
30
|
+
@comment_markers = comment_markers
|
31
|
+
end
|
32
|
+
|
33
|
+
def analyze(str)
|
34
|
+
@str = str
|
35
|
+
tokenize
|
36
|
+
end
|
37
|
+
|
38
|
+
def pop_token
|
39
|
+
token = @tokens.shift
|
40
|
+
return END_TOKEN if token.nil?
|
41
|
+
case token[0]
|
42
|
+
when NUMBER_REGEX
|
43
|
+
[:NUMBER, Token.new(token[0], token[1], token[2])]
|
44
|
+
when STRING_REGEX
|
45
|
+
[:STRING, Token.new(token[0], token[1], token[2])]
|
46
|
+
else
|
47
|
+
[is_reserved?(token[0]) ? token[0] : :IDENT, Token.new(token[0], token[1], token[2])]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
class PosInfo
|
54
|
+
attr_accessor :index, :line_no, :char_no
|
55
|
+
|
56
|
+
def initialize
|
57
|
+
@index = 0
|
58
|
+
@line_no = 1
|
59
|
+
@char_no = 1
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
class StringIterator
|
64
|
+
def initialize(str)
|
65
|
+
@str = str
|
66
|
+
@current_pos = PosInfo.new
|
67
|
+
@marked_pos = PosInfo.new
|
68
|
+
mark_clear()
|
69
|
+
end
|
70
|
+
|
71
|
+
def mark_clear
|
72
|
+
@marked_pos.index = -1
|
73
|
+
@marked_pos.line_no = 0
|
74
|
+
@marked_pos.char_no = 0
|
75
|
+
end
|
76
|
+
|
77
|
+
def mark_set
|
78
|
+
@marked_pos = @current_pos.clone
|
79
|
+
end
|
80
|
+
|
81
|
+
def is(target_string)
|
82
|
+
return false if target_string.length.zero?
|
83
|
+
end_pos = (@current_pos.index + target_string.length - 1)
|
84
|
+
@str[@current_pos.index..end_pos] == target_string
|
85
|
+
end
|
86
|
+
|
87
|
+
def is_in(target_list)
|
88
|
+
target_list.find { |target| is(target) } != nil
|
89
|
+
end
|
90
|
+
|
91
|
+
def move_next
|
92
|
+
if /\n/.match @str[@current_pos.index]
|
93
|
+
@current_pos.line_no += 1
|
94
|
+
@current_pos.char_no = 1
|
95
|
+
else
|
96
|
+
@current_pos.char_no += 1
|
97
|
+
end
|
98
|
+
@current_pos.index += 1
|
99
|
+
end
|
100
|
+
|
101
|
+
def move_to_the_end_of_the_line
|
102
|
+
char_count_to_the_end_of_the_line = (@str[@current_pos.index..-1] =~ /$/) - 1
|
103
|
+
@current_pos.index += char_count_to_the_end_of_the_line
|
104
|
+
@current_pos.char_no += char_count_to_the_end_of_the_line
|
105
|
+
end
|
106
|
+
|
107
|
+
def move_to(target)
|
108
|
+
char_count_to_target = (@str[@current_pos.index..-1] =~ /#{Regexp.escape(target)}/m) + target.length - 1
|
109
|
+
chopped_string = @str[@current_pos.index..@current_pos.index + char_count_to_target]
|
110
|
+
@current_pos.index += char_count_to_target
|
111
|
+
match = /.*\n(.*)$/m.match(chopped_string)
|
112
|
+
p match[1].length if match
|
113
|
+
if match
|
114
|
+
@current_pos.char_no = match[1].length
|
115
|
+
else
|
116
|
+
@current_pos.char_no += char_count_to_target
|
117
|
+
end
|
118
|
+
@current_pos.line_no += chopped_string.each_char.select{|char| /\n/.match char}.length
|
119
|
+
end
|
120
|
+
|
121
|
+
def <(index)
|
122
|
+
@current_pos.index < index
|
123
|
+
end
|
124
|
+
|
125
|
+
def is_white_space
|
126
|
+
/\s/.match(@str[@current_pos.index])
|
127
|
+
end
|
128
|
+
|
129
|
+
def marked?
|
130
|
+
@marked_pos.index != -1
|
131
|
+
end
|
132
|
+
|
133
|
+
def shift
|
134
|
+
result = [@str[@marked_pos.index..(@current_pos.index - 1)], @marked_pos.line_no, @marked_pos.char_no]
|
135
|
+
mark_clear()
|
136
|
+
return result
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def tokenize()
|
141
|
+
@tokens = []
|
142
|
+
iterator = StringIterator.new(@str)
|
143
|
+
|
144
|
+
while iterator < @str.length do
|
145
|
+
if iterator.is_white_space then
|
146
|
+
@tokens.push iterator.shift if iterator.marked?
|
147
|
+
iterator.move_next
|
148
|
+
elsif @line_comment_marker && iterator.is(@line_comment_marker) then
|
149
|
+
@tokens.push iterator.shift if iterator.marked?
|
150
|
+
iterator.move_to_the_end_of_the_line
|
151
|
+
iterator.move_next
|
152
|
+
elsif @comment_markers && iterator.is(@comment_markers[0]) then
|
153
|
+
@tokens.push iterator.shift if iterator.marked?
|
154
|
+
iterator.move_to(@comment_markers[1])
|
155
|
+
iterator.move_next
|
156
|
+
elsif @string_literal_marker && iterator.is(@string_literal_marker[0]) then
|
157
|
+
@tokens.push iterator.shift if iterator.marked?
|
158
|
+
iterator.mark_set
|
159
|
+
iterator.move_next
|
160
|
+
iterator.move_to(@string_literal_marker[1])
|
161
|
+
iterator.move_next
|
162
|
+
@tokens.push iterator.shift
|
163
|
+
elsif iterator.is_in(@symbols) then
|
164
|
+
@tokens.push iterator.shift if iterator.marked?
|
165
|
+
iterator.mark_set
|
166
|
+
@symbols.find { |symbol| iterator.is(symbol) }.length.times { iterator.move_next }
|
167
|
+
@tokens.push iterator.shift
|
168
|
+
elsif !iterator.marked? then
|
169
|
+
iterator.mark_set
|
170
|
+
else
|
171
|
+
iterator.move_next
|
172
|
+
end
|
173
|
+
end
|
174
|
+
@tokens.push iterator.shift if iterator.marked?
|
175
|
+
|
176
|
+
return self
|
177
|
+
end
|
178
|
+
|
179
|
+
def is_reserved?(token)
|
180
|
+
@keywords.include?(token) || @symbols.include?(token)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
class CSharpLexer < Lexer
|
185
|
+
def initialize
|
186
|
+
super(
|
187
|
+
[ # C# keywords
|
188
|
+
'abstract', 'as', 'base', 'bool', 'break',
|
189
|
+
'byte', 'case', 'catch', 'char', 'checked',
|
190
|
+
'class', 'const', 'continue', 'decimal', 'default',
|
191
|
+
'delegate', 'do', 'double', 'else', 'enum',
|
192
|
+
'event', 'explicit', 'extern', 'false', 'finally',
|
193
|
+
'fixed', 'float', 'for', 'foreach', 'goto',
|
194
|
+
'if', 'implicit', 'in', 'int', 'interface',
|
195
|
+
'internal', 'is', 'lock', 'long', 'namespace',
|
196
|
+
'new', 'null', 'object', 'operator', 'out',
|
197
|
+
'override', 'params', 'private', 'protected', 'public',
|
198
|
+
'readonly', 'ref', 'return', 'sbyte', 'sealed',
|
199
|
+
'short', 'sizeof', 'stackalloc', 'static', 'string',
|
200
|
+
'struct', 'switch', 'this', 'throw', 'true',
|
201
|
+
'try', 'typeof', 'uint', 'ulong', 'unchecked',
|
202
|
+
'unsafe', 'ushort', 'using', 'virtual', 'void',
|
203
|
+
'volatile', 'while',
|
204
|
+
# C# context keywords
|
205
|
+
'add', 'alias', 'ascending', 'async', 'await',
|
206
|
+
'descending', 'dynamic', 'from', 'get', 'global',
|
207
|
+
'group', 'into', 'join', 'let', 'orderby',
|
208
|
+
'partial', 'remove', 'select', 'set', 'value',
|
209
|
+
'var', 'where', 'yield'
|
210
|
+
],
|
211
|
+
[
|
212
|
+
'<<=', '>>=', '<<', '>>', '<=',
|
213
|
+
'>=', '==', '!=', '&&', '||',
|
214
|
+
'??', '+=', '-=', '*=', '/=',
|
215
|
+
'%=', '&=', '|=', '^=', '=>',
|
216
|
+
'*', '/', '%', '+', '-',
|
217
|
+
'<', '>', '&', '^', '|',
|
218
|
+
'?', ':', '=', '{', '}',
|
219
|
+
'(', ')', '[', ']', ';',
|
220
|
+
','
|
221
|
+
],
|
222
|
+
['"', '"'], # comment markers
|
223
|
+
'//', # line comment marker
|
224
|
+
['/*', '*/']) # multi line comment markers
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
3
|
-
require 'src_lexer'
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
3
|
+
require 'src_lexer'
|
data/spec/src_lexer_spec.rb
CHANGED
@@ -1,105 +1,115 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
require_relative './spec_helper'
|
3
|
-
|
4
|
-
describe SrcLexer do
|
5
|
-
it 'should have a version number' do
|
6
|
-
SrcLexer::VERSION.
|
7
|
-
end
|
8
|
-
end
|
9
|
-
|
10
|
-
describe SrcLexer::Lexer, 'with empty string' do
|
11
|
-
it 'should return Lexer::END_TOKEN' do
|
12
|
-
sut = SrcLexer::Lexer.new(nil, nil, nil, nil, nil)
|
13
|
-
sut.analyze('')
|
14
|
-
sut.pop_token.
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
describe SrcLexer::Lexer, 'with keyword definitions' do
|
19
|
-
it 'should recognize keywords' do
|
20
|
-
sut = SrcLexer::Lexer.new(['struct', 'enum'], nil, nil, nil, nil)
|
21
|
-
sut.analyze('struct structenum enum')
|
22
|
-
sut.pop_token.
|
23
|
-
sut.pop_token.
|
24
|
-
sut.pop_token.
|
25
|
-
sut.pop_token.
|
26
|
-
end
|
27
|
-
it 'should reduce keyword duplication' do
|
28
|
-
sut = SrcLexer::Lexer.new(['struct', 'struct'], nil, nil, nil, nil)
|
29
|
-
sut.keywords.
|
30
|
-
end
|
31
|
-
it 'should ignore nil keyword' do
|
32
|
-
sut = SrcLexer::Lexer.new(['struct', nil, 'enum'], nil, nil, nil, nil)
|
33
|
-
sut.keywords.
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
describe SrcLexer::Lexer, 'with symbol definitions' do
|
38
|
-
it 'should recognize symbols' do
|
39
|
-
sut = SrcLexer::Lexer.new(nil, ['..', ','], nil, nil, nil)
|
40
|
-
sut.analyze('.. A ,')
|
41
|
-
sut.pop_token.
|
42
|
-
sut.pop_token.
|
43
|
-
sut.pop_token.
|
44
|
-
sut.pop_token.
|
45
|
-
end
|
46
|
-
it 'should recognize symbols(,) if continues like "A,B"' do
|
47
|
-
sut = SrcLexer::Lexer.new(['A', 'B'], [','], nil, nil, nil)
|
48
|
-
sut.analyze('A,B')
|
49
|
-
sut.pop_token.
|
50
|
-
sut.pop_token.
|
51
|
-
sut.pop_token.
|
52
|
-
sut.pop_token.
|
53
|
-
end
|
54
|
-
it 'should
|
55
|
-
sut = SrcLexer::Lexer.new(nil, ['
|
56
|
-
sut.
|
57
|
-
|
58
|
-
|
59
|
-
sut
|
60
|
-
sut.
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
sut
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
it 'should recognize
|
76
|
-
sut = SrcLexer::Lexer.new(nil, nil, nil, '//',
|
77
|
-
sut.analyze(<<-'EOS')
|
78
|
-
A
|
79
|
-
B
|
80
|
-
EOS
|
81
|
-
sut.pop_token.
|
82
|
-
sut.pop_token.
|
83
|
-
sut.pop_token.
|
84
|
-
end
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
sut.pop_token.
|
92
|
-
sut.pop_token.
|
93
|
-
sut.pop_token.
|
94
|
-
end
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
sut
|
100
|
-
sut.
|
101
|
-
sut.pop_token.
|
102
|
-
sut.pop_token.
|
103
|
-
sut.pop_token.
|
104
|
-
end
|
105
|
-
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require_relative './spec_helper'
|
3
|
+
|
4
|
+
describe SrcLexer do
|
5
|
+
it 'should have a version number' do
|
6
|
+
expect(SrcLexer::VERSION).not_to eq(be_nil)
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
describe SrcLexer::Lexer, 'with empty string' do
|
11
|
+
it 'should return Lexer::END_TOKEN' do
|
12
|
+
sut = SrcLexer::Lexer.new(nil, nil, nil, nil, nil)
|
13
|
+
sut.analyze('')
|
14
|
+
expect(sut.pop_token).to eq(SrcLexer::Lexer::END_TOKEN)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe SrcLexer::Lexer, 'with keyword definitions' do
|
19
|
+
it 'should recognize keywords' do
|
20
|
+
sut = SrcLexer::Lexer.new(['struct', 'enum'], nil, nil, nil, nil)
|
21
|
+
sut.analyze('struct structenum enum')
|
22
|
+
expect(sut.pop_token).to eq(['struct', SrcLexer::Token.new('struct', 1, 1)])
|
23
|
+
expect(sut.pop_token).to eq([:IDENT, SrcLexer::Token.new('structenum', 1, 8)])
|
24
|
+
expect(sut.pop_token).to eq(['enum', SrcLexer::Token.new('enum', 1, 19)])
|
25
|
+
expect(sut.pop_token).to eq(SrcLexer::Lexer::END_TOKEN)
|
26
|
+
end
|
27
|
+
it 'should reduce keyword duplication' do
|
28
|
+
sut = SrcLexer::Lexer.new(['struct', 'struct'], nil, nil, nil, nil)
|
29
|
+
expect(sut.keywords).to eq(['struct'])
|
30
|
+
end
|
31
|
+
it 'should ignore nil keyword' do
|
32
|
+
sut = SrcLexer::Lexer.new(['struct', nil, 'enum'], nil, nil, nil, nil)
|
33
|
+
expect(sut.keywords).to eq(['struct', 'enum'])
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe SrcLexer::Lexer, 'with symbol definitions' do
|
38
|
+
it 'should recognize symbols' do
|
39
|
+
sut = SrcLexer::Lexer.new(nil, ['..', ','], nil, nil, nil)
|
40
|
+
sut.analyze('.. A ,')
|
41
|
+
expect(sut.pop_token).to eq(['..', SrcLexer::Token.new('..', 1, 1)])
|
42
|
+
expect(sut.pop_token).to eq([:IDENT, SrcLexer::Token.new('A', 1, 4)])
|
43
|
+
expect(sut.pop_token).to eq([',', SrcLexer::Token.new(',', 1, 6)])
|
44
|
+
expect(sut.pop_token).to eq(SrcLexer::Lexer::END_TOKEN)
|
45
|
+
end
|
46
|
+
it 'should recognize symbols(,) if continues like "A,B"' do
|
47
|
+
sut = SrcLexer::Lexer.new(['A', 'B'], [','], nil, nil, nil)
|
48
|
+
sut.analyze('A,B')
|
49
|
+
expect(sut.pop_token).to eq(['A', SrcLexer::Token.new('A', 1, 1)])
|
50
|
+
expect(sut.pop_token).to eq([',', SrcLexer::Token.new(',', 1, 2)])
|
51
|
+
expect(sut.pop_token).to eq(['B', SrcLexer::Token.new('B', 1, 3)])
|
52
|
+
expect(sut.pop_token).to eq(SrcLexer::Lexer::END_TOKEN)
|
53
|
+
end
|
54
|
+
it 'should recognize symbol(==) if symbol(=) defined' do
|
55
|
+
sut = SrcLexer::Lexer.new(nil, ['==', '='], nil, nil, nil)
|
56
|
+
sut.analyze('A = B == C')
|
57
|
+
expect(sut.pop_token).to eq([:IDENT, SrcLexer::Token.new('A', 1, 1)])
|
58
|
+
expect(sut.pop_token).to eq(['=', SrcLexer::Token.new('=', 1, 3)])
|
59
|
+
expect(sut.pop_token).to eq([:IDENT, SrcLexer::Token.new('B', 1, 5)])
|
60
|
+
expect(sut.pop_token).to eq(['==', SrcLexer::Token.new('==', 1, 7)])
|
61
|
+
expect(sut.pop_token).to eq([:IDENT, SrcLexer::Token.new('C', 1, 10)])
|
62
|
+
expect(sut.pop_token).to eq(SrcLexer::Lexer::END_TOKEN)
|
63
|
+
end
|
64
|
+
it 'should reduce symbol duplication' do
|
65
|
+
sut = SrcLexer::Lexer.new(nil, [',', ','], nil, nil, nil)
|
66
|
+
expect(sut.symbols).to eq([','])
|
67
|
+
end
|
68
|
+
it 'should ignore nil keyword' do
|
69
|
+
sut = SrcLexer::Lexer.new(nil, ['{', nil, '}'], nil, nil, nil)
|
70
|
+
expect(sut.symbols).to eq(['{', '}'])
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
describe SrcLexer::Lexer, 'with line comment marker' do
|
75
|
+
it 'should recognize line comment' do
|
76
|
+
sut = SrcLexer::Lexer.new(nil, nil, nil, '//', nil)
|
77
|
+
sut.analyze(<<-'EOS')
|
78
|
+
A//comment
|
79
|
+
B
|
80
|
+
EOS
|
81
|
+
expect(sut.pop_token).to eq([:IDENT, SrcLexer::Token.new('A', 1, 7)])
|
82
|
+
expect(sut.pop_token).to eq([:IDENT, SrcLexer::Token.new('B', 2, 7)])
|
83
|
+
expect(sut.pop_token).to eq(SrcLexer::Lexer::END_TOKEN)
|
84
|
+
end
|
85
|
+
it 'should recognize multi line comment' do
|
86
|
+
sut = SrcLexer::Lexer.new(nil, nil, nil, '//', ['/*', '*/'])
|
87
|
+
sut.analyze(<<-'EOS')
|
88
|
+
A/*comment
|
89
|
+
B//still in comment*/C
|
90
|
+
EOS
|
91
|
+
expect(sut.pop_token).to eq([:IDENT, SrcLexer::Token.new('A', 1, 7)])
|
92
|
+
expect(sut.pop_token).to eq([:IDENT, SrcLexer::Token.new('C', 2, 28)])
|
93
|
+
expect(sut.pop_token).to eq(SrcLexer::Lexer::END_TOKEN)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
describe SrcLexer::Lexer do
|
98
|
+
it 'should analyze number string' do
|
99
|
+
sut = SrcLexer::Lexer.new(nil, nil, nil, nil, nil)
|
100
|
+
sut.analyze('9 1.5')
|
101
|
+
expect(sut.pop_token).to eq([:NUMBER, SrcLexer::Token.new("9", 1, 1,)])
|
102
|
+
expect(sut.pop_token).to eq([:NUMBER, SrcLexer::Token.new("1.5", 1, 3)])
|
103
|
+
expect(sut.pop_token).to eq(SrcLexer::Lexer::END_TOKEN)
|
104
|
+
end
|
105
|
+
it 'should analyze string literal' do
|
106
|
+
sut = SrcLexer::Lexer.new(nil, nil, ['"', '"'], '//', ['/*', '*/'])
|
107
|
+
sut.analyze('A"//"B"/**/"C')
|
108
|
+
expect(sut.pop_token).to eq([:IDENT, SrcLexer::Token.new('A', 1, 1)])
|
109
|
+
expect(sut.pop_token).to eq([:STRING, SrcLexer::Token.new('"//"', 1, 2)])
|
110
|
+
expect(sut.pop_token).to eq([:IDENT, SrcLexer::Token.new('B', 1, 6)])
|
111
|
+
expect(sut.pop_token).to eq([:STRING, SrcLexer::Token.new('"/**/"', 1, 7)])
|
112
|
+
expect(sut.pop_token).to eq([:IDENT, SrcLexer::Token.new('C', 1, 13)])
|
113
|
+
expect(sut.pop_token).to eq(SrcLexer::Lexer::END_TOKEN)
|
114
|
+
end
|
115
|
+
end
|
data/src_lexer.gemspec
CHANGED
@@ -1,24 +1,24 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
lib = File.expand_path('../lib', __FILE__)
|
3
|
-
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require 'src_lexer/version'
|
5
|
-
|
6
|
-
Gem::Specification.new do |spec|
|
7
|
-
spec.name = "src_lexer"
|
8
|
-
spec.version = SrcLexer::VERSION
|
9
|
-
spec.authors = ["kkikzk"]
|
10
|
-
spec.email = ["kkikzk@gmail.com"]
|
11
|
-
spec.summary = %q{A simple source file lexer}
|
12
|
-
spec.description = ""
|
13
|
-
spec.homepage = "https://github.com/kkikzk/src_lexer"
|
14
|
-
spec.license = "MIT"
|
15
|
-
|
16
|
-
spec.files = `git ls-files -z`.split("\x0")
|
17
|
-
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
-
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
-
spec.require_paths = ["lib"]
|
20
|
-
|
21
|
-
spec.add_development_dependency "bundler", "~> 1.5"
|
22
|
-
spec.add_development_dependency "rake"
|
23
|
-
spec.add_development_dependency "rspec"
|
24
|
-
end
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'src_lexer/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "src_lexer"
|
8
|
+
spec.version = SrcLexer::VERSION
|
9
|
+
spec.authors = ["kkikzk"]
|
10
|
+
spec.email = ["kkikzk@gmail.com"]
|
11
|
+
spec.summary = %q{A simple source file lexer}
|
12
|
+
spec.description = ""
|
13
|
+
spec.homepage = "https://github.com/kkikzk/src_lexer"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.5"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "rspec"
|
24
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: src_lexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kkikzk
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-04
|
11
|
+
date: 2014-05-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|