src_lexer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YWNmNzQ2N2JjN2UyNTVhM2UwZTE3ZmVkMWVlZjE4OTJiZGRkNGUwNg==
5
+ data.tar.gz: !binary |-
6
+ ZWFmYTU0NmVjYTkxZWI5NjNkNTQ3Zjc4ZjNlMTkzNjQwMTFkOWI2Mg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ MGY0M2JlNWZkNjY3ZTEzNjk5M2E4OTZlMjBhYmU4Mzg5OGI3ZGZjNWQyNTZm
10
+ NzliYjU2OGFmNDk2OWJkZDY5ZGJlMmYzMTViMjhkYmMwMDk2NmUyYzBhYjdl
11
+ ZDFmNmExOGZkNTA2YTY2NDVjOGI3YzVkZjlhYzI4ZDU1YzViMzI=
12
+ data.tar.gz: !binary |-
13
+ ZmE5YWQ3YTViY2FmZWE0YTRlMWQxODhlZThkMDA0ZWExMDNiODQwNGI0ZTQ1
14
+ ZDRkYzAyZTczZDhiNGJiZjVjYWNhMDI5M2Y5NDBlNmI3ZjBkZTUxODlkMzM4
15
+ MDNiNGI3ZjBiZjFlM2ZmMmY4MWYxZTVjM2ZhMjEzNDIwNGM1ZGM=
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in src_lexer.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 kkikzk
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # SrcLexer
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'src_lexer'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install src_lexer
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it ( http://github.com/<my-github-username>/src_lexer/fork )
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/src_lexer ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'src_lexer'
@@ -0,0 +1,3 @@
1
+ module SrcLexer
2
+ VERSION = "0.0.1"
3
+ end
data/lib/src_lexer.rb ADDED
@@ -0,0 +1,170 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require "src_lexer/version"
3
+
4
+ module SrcLexer
5
+ class Token
6
+ attr_reader :str, :line_no, :char_no
7
+
8
+ def initialize(str, line_no, char_no)
9
+ @str = str
10
+ @line_no = line_no
11
+ @char_no = char_no
12
+ end
13
+
14
+ def ==(other_object)
15
+ @str == other_object.str && @line_no == other_object.line_no && @char_no == other_object.char_no
16
+ end
17
+ end
18
+
19
+ class Lexer
20
+ END_TOKEN = [false, nil]
21
+ attr_reader :keywords, :symbols, :line_comment_marker, :comment_markers, :tokens, :str
22
+
23
+ def initialize(keywords, symbols, line_comment_marker, comment_marker)
24
+ @keywords = ((keywords.nil?) ? [] : keywords.uniq.compact)
25
+ @symbols = ((symbols.nil?) ? [] : symbols.uniq.compact)
26
+ @line_comment_marker = ((line_comment_marker.nil?) ? '' : line_comment_marker)
27
+ @comment_markers = ((comment_marker.nil?) ? ['', ''] : comment_marker)
28
+ end
29
+
30
+ def analyze(str)
31
+ @str = str
32
+ tokenize
33
+ end
34
+
35
+ def pop_token
36
+ token = @tokens.shift
37
+ if token.nil? then
38
+ return END_TOKEN
39
+ end
40
+ case token[0]
41
+ when /^[\d]+[\.]?[\d]*\z/
42
+ [:NUMBER, Token.new(token[0], token[1], token[2])]
43
+ when /^\"(.*)\"\z/m
44
+ [:STRING, Token.new(token[0], token[1], token[2])]
45
+ else
46
+ id = is_reserved?(token[0]) ? token[0] : :IDENT
47
+ [id, Token.new(token[0], token[1], token[2])]
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ class StringIterator
54
+ attr_reader :index
55
+
56
+ def initialize(str)
57
+ @str = str
58
+ @index = 0
59
+ @marked_pos = -1
60
+ end
61
+
62
+ def mark_set
63
+ @marked_pos = @index
64
+ end
65
+
66
+ def is(target_string)
67
+ return false if target_string.length.zero?
68
+ end_pos = (@index + target_string.length - 1)
69
+ @str[@index..end_pos] == target_string
70
+ end
71
+
72
+ def is_in(target_list)
73
+ target_list.find { |target| is(target) } != nil
74
+ end
75
+
76
+ def move_next
77
+ @index += 1
78
+ end
79
+
80
+ def move_to_the_end_of_the_line
81
+ @index += (@str[@index..-1] =~ /$/) - 1
82
+ end
83
+
84
+ def move_to(target)
85
+ esceped_target = Regexp.escape(target)
86
+ @index += (@str[@index..-1] =~ /#{esceped_target}/m) + target.length - 1
87
+ end
88
+
89
+ def [](range)
90
+ @str[range]
91
+ end
92
+
93
+ def <(pos)
94
+ @index < pos
95
+ end
96
+
97
+ def char
98
+ @str[@index]
99
+ end
100
+
101
+ def is_white_space
102
+ /[\s]/.match(char)
103
+ end
104
+
105
+ def info(pos)
106
+ [0, 0] if pos == 0
107
+ line_no, char_no = 1, 0
108
+ @str[0..pos].each_char do |char|
109
+ if /\n/.match(char)
110
+ line_no += 1
111
+ char_no = 0
112
+ else
113
+ char_no += 1
114
+ end
115
+ end
116
+ [line_no, char_no]
117
+ end
118
+
119
+ def marked?
120
+ @marked_pos != -1
121
+ end
122
+
123
+ def shift
124
+ result = @str[@marked_pos..(@index - 1)]
125
+ line_no_and_char_no = info(@marked_pos)
126
+ @marked_pos = -1
127
+ return result, *line_no_and_char_no
128
+ end
129
+ end
130
+
131
+ def tokenize()
132
+ @tokens = []
133
+ iterator = StringIterator.new(@str)
134
+
135
+ while iterator < @str.length do
136
+ if iterator.is_white_space then
137
+ @tokens.push iterator.shift if iterator.marked?
138
+ elsif iterator.is(@line_comment_marker) then
139
+ @tokens.push iterator.shift if iterator.marked?
140
+ iterator.move_to_the_end_of_the_line
141
+ elsif iterator.is(@comment_markers[0]) then
142
+ @tokens.push iterator.shift if iterator.marked?
143
+ iterator.move_to(@comment_markers[1])
144
+ elsif iterator.is('"') then
145
+ @tokens.push iterator.shift if iterator.marked?
146
+ iterator.mark_set
147
+ iterator.move_next
148
+ iterator.move_to('"')
149
+ iterator.move_next
150
+ @tokens.push iterator.shift
151
+ next
152
+ elsif iterator.is_in(@symbols) then
153
+ @tokens.push iterator.shift if iterator.marked?
154
+ symbol = @symbols.find { |symbol| iterator.is(symbol) }
155
+ @tokens.push [iterator[iterator.index..(iterator.index + symbol.length - 1)], *iterator.info(iterator.index)]
156
+ (symbol.length - 1).times { iterator.move_next }
157
+ elsif !iterator.marked? then
158
+ iterator.mark_set
159
+ end
160
+ iterator.move_next
161
+ end
162
+
163
+ @tokens.push iterator.shift if iterator.marked?
164
+ end
165
+
166
+ def is_reserved?(token)
167
+ @keywords.include?(token) || @symbols.include?(token)
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,3 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
3
+ require 'src_lexer'
@@ -0,0 +1,105 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require_relative './spec_helper'
3
+
4
+ describe SrcLexer do
5
+ it 'should have a version number' do
6
+ SrcLexer::VERSION.should_not be_nil
7
+ end
8
+ end
9
+
10
+ describe SrcLexer::Lexer, 'with empty string' do
11
+ it 'should return Lexer::END_TOKEN' do
12
+ sut = SrcLexer::Lexer.new(nil, nil, nil, nil)
13
+ sut.analyze('')
14
+ sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
15
+ end
16
+ end
17
+
18
+ describe SrcLexer::Lexer, 'with keyword definitions' do
19
+ it 'should recognize keywords' do
20
+ sut = SrcLexer::Lexer.new(['struct', 'enum'], nil, nil, nil)
21
+ sut.analyze('struct structenum enum')
22
+ sut.pop_token.should == ['struct', SrcLexer::Token.new('struct', 1, 1)]
23
+ sut.pop_token.should == [:IDENT, SrcLexer::Token.new('structenum', 1, 8)]
24
+ sut.pop_token.should == ['enum', SrcLexer::Token.new('enum', 1, 19)]
25
+ sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
26
+ end
27
+ it 'should reduce keyword duplication' do
28
+ sut = SrcLexer::Lexer.new(['struct', 'struct'], nil, nil, nil)
29
+ sut.keywords.should == ['struct']
30
+ end
31
+ it 'should ignore nil keyword' do
32
+ sut = SrcLexer::Lexer.new(['struct', nil, 'enum'], nil, nil, nil)
33
+ sut.keywords.should == ['struct', 'enum']
34
+ end
35
+ end
36
+
37
+ describe SrcLexer::Lexer, 'with symbol definitions' do
38
+ it 'should recognize symbols' do
39
+ sut = SrcLexer::Lexer.new(nil, ['..', ','], nil, nil)
40
+ sut.analyze('.. A ,')
41
+ sut.pop_token.should == ['..', SrcLexer::Token.new('..', 1, 1)]
42
+ sut.pop_token.should == [:IDENT, SrcLexer::Token.new('A', 1, 4)]
43
+ sut.pop_token.should == [',', SrcLexer::Token.new(',', 1, 6)]
44
+ sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
45
+ end
46
+ it 'should recognize symbols(,) if continues like "A,B"' do
47
+ sut = SrcLexer::Lexer.new(nil, [','], nil, nil)
48
+ sut.analyze('A,B')
49
+ sut.pop_token.should == [:IDENT, SrcLexer::Token.new('A', 1, 1)]
50
+ sut.pop_token.should == [',', SrcLexer::Token.new(',', 1, 2)]
51
+ sut.pop_token.should == [:IDENT, SrcLexer::Token.new('B', 1, 3)]
52
+ sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
53
+ end
54
+ it 'should reduce symbol duplication' do
55
+ sut = SrcLexer::Lexer.new(nil, [',', ','], nil, nil)
56
+ sut.symbols.should == [',']
57
+ end
58
+ it 'should ignore nil keyword' do
59
+ sut = SrcLexer::Lexer.new(nil, ['{', nil, '}'], nil, nil)
60
+ sut.symbols.should == ['{', '}']
61
+ end
62
+ end
63
+
64
+ describe SrcLexer::Lexer, 'with line comment marker' do
65
+ it 'should recognize line comment' do
66
+ sut = SrcLexer::Lexer.new(nil, nil, '//', nil)
67
+ sut.analyze(<<-'EOS')
68
+ A//comment
69
+ B
70
+ EOS
71
+ sut.pop_token.should == [:IDENT, SrcLexer::Token.new('A', 1, 7)]
72
+ sut.pop_token.should == [:IDENT, SrcLexer::Token.new('B', 2, 7)]
73
+ sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
74
+ end
75
+ it 'should recognize multi line comment' do
76
+ sut = SrcLexer::Lexer.new(nil, nil, '//', ['/*', '*/'])
77
+ sut.analyze(<<-'EOS')
78
+ A/*comment
79
+ B//still in comment*/C
80
+ EOS
81
+ sut.pop_token.should == [:IDENT, SrcLexer::Token.new('A', 1, 7)]
82
+ sut.pop_token.should == [:IDENT, SrcLexer::Token.new('C', 2, 28)]
83
+ sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
84
+ end
85
+ end
86
+
87
+ describe SrcLexer::Lexer do
88
+ it 'should analyze number string' do
89
+ sut = SrcLexer::Lexer.new(nil, nil, nil, nil)
90
+ sut.analyze('9 1.5')
91
+ sut.pop_token.should == [:NUMBER, SrcLexer::Token.new("9", 1, 1,)]
92
+ sut.pop_token.should == [:NUMBER, SrcLexer::Token.new("1.5", 1, 3)]
93
+ sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
94
+ end
95
+ it 'should analyze string literal' do
96
+ sut = SrcLexer::Lexer.new(nil, nil, '//', ['/*', '*/'])
97
+ sut.analyze('A"//"B"/**/"C')
98
+ sut.pop_token.should == [:IDENT, SrcLexer::Token.new('A', 1, 1)]
99
+ sut.pop_token.should == [:STRING, SrcLexer::Token.new('"//"', 1, 2)]
100
+ sut.pop_token.should == [:IDENT, SrcLexer::Token.new('B', 1, 6)]
101
+ sut.pop_token.should == [:STRING, SrcLexer::Token.new('"/**/"', 1, 7)]
102
+ sut.pop_token.should == [:IDENT, SrcLexer::Token.new('C', 1, 13)]
103
+ sut.pop_token.should == SrcLexer::Lexer::END_TOKEN
104
+ end
105
+ end
data/src_lexer.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'src_lexer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "src_lexer"
8
+ spec.version = SrcLexer::VERSION
9
+ spec.authors = ["kkikzk"]
10
+ spec.email = ["kkikzk@gmail.com"]
11
+ spec.summary = %q{A simple source file lexer}
12
+ spec.description = ""
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.5"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec"
24
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: src_lexer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - kkikzk
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: ''
56
+ email:
57
+ - kkikzk@gmail.com
58
+ executables:
59
+ - src_lexer
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - .gitignore
64
+ - .rspec
65
+ - .travis.yml
66
+ - Gemfile
67
+ - LICENSE.txt
68
+ - README.md
69
+ - Rakefile
70
+ - bin/src_lexer
71
+ - lib/src_lexer.rb
72
+ - lib/src_lexer/version.rb
73
+ - spec/spec_helper.rb
74
+ - spec/src_lexer_spec.rb
75
+ - src_lexer.gemspec
76
+ homepage: ''
77
+ licenses:
78
+ - MIT
79
+ metadata: {}
80
+ post_install_message:
81
+ rdoc_options: []
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ! '>='
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ requirements: []
95
+ rubyforge_project:
96
+ rubygems_version: 2.2.2
97
+ signing_key:
98
+ specification_version: 4
99
+ summary: A simple source file lexer
100
+ test_files:
101
+ - spec/spec_helper.rb
102
+ - spec/src_lexer_spec.rb