rlex 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in rlex.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Rasmus Borgsmidt
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,74 @@
1
+ # Rlex
2
+
3
+ Provides a simple lexer based on the <tt>StringScanner</tt> class.
4
+
5
+ The lexer was written for use with Racc, a Ruby variant of
6
+ Yacc. But there is no code dependency on that project so the
7
+ lexer may also be used on its own or with other packages.
8
+
9
+ * Ignored input takes precedence over rules and keywords, so if
10
+ a prefix is matched by an ignore pattern, it's ignored even if
11
+ it's also a keyword or matched by a rule
12
+ * The lexer is greedy, so if a prefix is matched by multiple
13
+ rules or keywords, the lexer chooses the option consuming the
14
+ most input
15
+
16
+ ## Documentation
17
+
18
+ Find out more about this project on [GitHub][gh_rlex],
19
+ [RubyGems][rg_rlex] and [RubyDoc][rd_rlex].
20
+
21
+ [gh_rlex]: https://github.com/borgsmidt/rlex
22
+ [rg_rlex]: http://rubygems.org/gems/rlex
23
+ [rd_rlex]: http://rubydoc.info/gems/rlex
24
+
25
+ ## Installation
26
+
27
+ Install the gem using:
28
+
29
+ $ gem install rlex
30
+
31
+ Alternatively, add this line to your application's Gemfile:
32
+
33
+ gem 'rlex'
34
+
35
+ Then execute:
36
+
37
+ $ bundle
38
+
39
+ ## Usage
40
+
41
+ ```ruby
42
+ # Define behavior
43
+ lexer = Lexer.new
44
+ lexer.ignore /\s+/ # ignore whitespace
45
+ lexer.rule :word, /\w+/ # consider any text a 'word'
46
+ lexer.keyword :if # treat 'if' as a special keyword
47
+ lexer.keyword :lparen, "(" # any fixed input such as parentheses
48
+ lexer.keyword :rparen, ")" # may be defined as keywords
49
+
50
+ # Initialize with input
51
+ lexer.start "if ( foo ) bar" # initialize the lexer with a string
52
+
53
+ # Iterate through tokens
54
+ lexer.next_token # => Token (type = :if, value = 'if' )
55
+ lexer.next_token # => Token (type = :lparen, value = '(' )
56
+ lexer.next_token # => Token (type = :word, value = 'foo')
57
+ lexer.next_token # => Token (type = :rparen, value = ')' )
58
+ lexer.next_token # => Token (type = :word, value = 'bar')
59
+ lexer.next_token # => EOF_TOKEN
60
+ ```
61
+
62
+ ## Contributing
63
+
64
+ 1. Fork it on [GitHub][gh_rlex]
65
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
66
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
67
+ 4. Push to the branch (`git push origin my-new-feature`)
68
+ 5. Create new Pull Request
69
+
70
+ ## License
71
+
72
+ Written by Rasmus Borgsmidt &lt;<rasmus@borgsmidt.dk>&gt;
73
+
74
+ Released under the MIT license: www.opensource.org/licenses/MIT
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
data/lib/rlex.rb ADDED
@@ -0,0 +1,8 @@
1
+ require "rlex/version"
2
+
3
+ # Implements a simple lexer using a StringScanner.
4
+ #
5
+ # @author Rasmus Borgsmidt <rasmus@borgsmidt.dk>
6
+ #
7
+ module Rlex
8
+ end
data/lib/rlex/lexer.rb ADDED
@@ -0,0 +1,172 @@
1
+ require 'strscan'
2
+ require 'rlex/token'
3
+
4
+ module Rlex
5
+ # Implements a simple lexer using a <tt>StringScanner</tt>.
6
+ #
7
+ # The lexer was written for use with Racc, a Ruby variant of
8
+ # Yacc. But there is no code dependency on that project so the
9
+ # lexer may also be used on its own or with other packages.
10
+ #
11
+ # * Ignored input takes precedence over rules and keywords, so if
12
+ # a prefix is matched by an ignore pattern, it's ignored even if
13
+ # it's also a keyword or matched by a rule
14
+ # * The lexer is greedy, so if a prefix is matched by multiple
15
+ # rules or keywords, the lexer chooses the option consuming the
16
+ # most input
17
+ #
18
+ # @author Rasmus Borgsmidt <rasmus@borgsmidt.dk>
19
+ #
20
+ # @example Basic usage
21
+ # # Define behavior
22
+ # lexer = Lexer.new
23
+ # lexer.ignore /\s+/ # ignore whitespace
24
+ # lexer.rule :word, /\w+/ # consider any text a 'word'
25
+ # lexer.keyword :if # treat 'if' as a special keyword
26
+ # lexer.keyword :lparen, "(" # any fixed input such as parentheses
27
+ # lexer.keyword :rparen, ")" # may be defined as keywords
28
+ #
29
+ # # Initialize with input
30
+ # lexer.start "if ( foo ) bar" # initialize the lexer with a string
31
+ #
32
+ # # Iterate through tokens
33
+ # lexer.next_token # => Token (type = :if, value = 'if')
34
+ # lexer.next_token # => Token (type = :lparen, value = '(')
35
+ # lexer.next_token # => Token (type = :word, value = 'foo')
36
+ # lexer.next_token # => Token (type = :rparen, value = ')')
37
+ # lexer.next_token # => Token (type = :word, value = 'bar')
38
+ # lexer.next_token # => EOF_TOKEN
39
+ #
40
+ class Lexer
41
+ # Initializes an empty Lexer.
42
+ #
43
+ def initialize
44
+ @ignored = []
45
+ @rules = []
46
+ @keywords = {}
47
+ end
48
+
49
+ # Instructs the lexer to ignore input matched by the specified
50
+ # pattern. If appropriate, call this multiple times to ignore
51
+ # several patterns.
52
+ #
53
+ # @note Ignored input takes precedence over rules and keywords,
54
+ # so if a prefix is matched by an ignore pattern, it's ignored
55
+ # even if it's also a keyword or matched by a rule
56
+ #
57
+ # @param [Regexp] pattern Pattern of input to ignore
58
+ #
59
+ # @return [Regexp] The specified pattern
60
+ #
61
+ def ignore(pattern)
62
+ @ignored << pattern
63
+ return pattern
64
+ end
65
+
66
+ # Defines a rule to match the specified pattern.
67
+ #
68
+ # @note Use keywords for efficiency instead of rules whenever
69
+ # the matched input is static
70
+ #
71
+ # @param [Symbol, #to_sym] name Unique name of rule
72
+ # @param [Regexp] pattern Pattern of input to match
73
+ #
74
+ # @raise [ArgumentError] If the specified name is already
75
+ # used by other rules or keywords
76
+ #
77
+ # @return [Symbol] The name of the rule
78
+ #
79
+ def rule(name, pattern)
80
+ # @todo Validate the rule name
81
+ @rules << (Rule.new name.to_sym, pattern)
82
+ return name.to_sym
83
+ end
84
+
85
+ # Defines a static sequence of input as a keyword.
86
+ #
87
+ # @note Use keywords for efficiency instead of rules whenever
88
+ # the matched input is static
89
+ #
90
+ # @param [optional, Symbol, #to_sym] name Unique name of the
91
+ # keyword. If this argument is not given, the keyword is used
92
+ # to name itself
93
+ # @param [String, #to_s] kword Sequence of input to match as a
94
+ # keyword
95
+ #
96
+ # @raise [ArgumentError] If the specified name is already
97
+ # used by other rules or keywords
98
+ #
99
+ # @return [Symbol] The name of the keyword
100
+ #
101
+ def keyword(name = nil, kword)
102
+ # @todo Validate the keyword name
103
+ name = kword if name == nil
104
+ pattern = Regexp.new(Regexp.escape kword.to_s)
105
+ rule name, pattern
106
+ @keywords[kword.to_s] = Token.new name.to_sym, kword.to_s
107
+ return name.to_sym
108
+ end
109
+
110
+ # Initializes the lexer with new input.
111
+ #
112
+ # @note This resets the lexer with a new StringScanner so any
113
+ # state information related to previous input is lost
114
+ #
115
+ # @param [String] input Input to scan for tokens
116
+ #
117
+ # @return [String] The specified input
118
+ #
119
+ def start(input)
120
+ @scanner = StringScanner.new input
121
+ return input
122
+ end
123
+
124
+ # Returns the next token matched from the remaining input. If no
125
+ # input is left, or the lexer has not been initialized,
126
+ # <tt>EOF_TOKEN</tt> is returned.
127
+ #
128
+ # @raise [RuntimeError] If there is any unmatched input
129
+ #
130
+ # @return [Token] Next token or <tt>EOF_TOKEN</tt>
131
+ #
132
+ def next_token
133
+ return EOF_TOKEN if @scanner.nil? or @scanner.empty?
134
+ return next_token if ignore_prefix?
135
+ rule = greediest_rule
136
+ if rule
137
+ prefix = @scanner.scan(rule.pattern)
138
+ keyword = @keywords[prefix]
139
+ return keyword ? keyword : Token.new(rule.name, prefix)
140
+ end
141
+ raise "unexpected input <#{@scanner.peek(5)}>"
142
+ end
143
+
144
+ private
145
+
146
+ # @private
147
+ Rule = Struct.new :name, :pattern
148
+
149
+ # @private
150
+ def ignore_prefix?
151
+ @ignored.each do |pattern|
152
+ prefix = @scanner.scan(pattern)
153
+ return true if prefix
154
+ end
155
+ return false
156
+ end
157
+
158
+ # @private
159
+ def greediest_rule
160
+ r = nil
161
+ len = 0
162
+ @rules.each do |rule|
163
+ prefix = @scanner.check(rule.pattern)
164
+ if prefix and prefix.length > len
165
+ r = rule
166
+ len = prefix.length
167
+ end
168
+ end
169
+ return r
170
+ end
171
+ end
172
+ end
data/lib/rlex/token.rb ADDED
@@ -0,0 +1,16 @@
1
+ module Rlex
2
+ # Simple class to represent tokens matched from the input.
3
+ #
4
+ # @author Rasmus Borgsmidt <rasmus@borgsmidt.dk>
5
+ #
6
+ # @attr_reader [Symbol] type Type of the token, such as the name of
7
+ # the rule used to match it
8
+ # @attr_reader [String] value Text matched from the input
9
+ #
10
+ Token = Struct.new :type, :value
11
+
12
+ # Special token used when the lexer has reached the end of the
13
+ # specified input.
14
+ #
15
+ EOF_TOKEN = Token.new :eof, ""
16
+ end
@@ -0,0 +1,4 @@
1
+ module Rlex
2
+ # Project version
3
+ VERSION = "0.5.0"
4
+ end
data/rlex.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/rlex/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Rasmus Borgsmidt"]
6
+ gem.email = ["rasmus@borgsmidt.dk"]
7
+ gem.description = %q{Implements a simple lexer using a StringScanner}
8
+ gem.summary = %q{The lexer was written for use with Racc, a
9
+ Ruby variant of Yacc. But there is no code
10
+ dependency on that project so the lexer may
11
+ also be used on its own or with other packages.}
12
+ gem.homepage = "https://github.com/borgsmidt/rlex"
13
+
14
+ gem.files = `git ls-files`.split($\)
15
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
+ gem.name = "rlex"
18
+ gem.require_paths = ["lib"]
19
+ gem.version = Rlex::VERSION
20
+ end
@@ -0,0 +1,69 @@
1
+ require 'spec_helper'
2
+ require 'rlex/token'
3
+ require 'rlex/lexer'
4
+ include Rlex
5
+
6
+ describe Lexer do
7
+ before :each do
8
+ @lexer = Lexer.new
9
+ end
10
+
11
+ describe "next_token" do
12
+ it "should return EOF_TOKEN when the lexer has been assigned no input" do
13
+ @lexer.next_token.should eq EOF_TOKEN
14
+ end
15
+
16
+ it "should return EOF_TOKEN when there is no more input" do
17
+ @lexer.start ""
18
+ @lexer.next_token.should eq EOF_TOKEN
19
+ end
20
+
21
+ it "should raise an error when remaining input is not matched by any rules" do
22
+ @lexer.start "unmatched input"
23
+ lambda {@lexer.next_token}.should raise_error RuntimeError
24
+ end
25
+
26
+ it "should return EOF_TOKEN when instructed to ignore everything" do
27
+ @lexer.ignore /.*/
28
+ @lexer.start "input to be ignored"
29
+ @lexer.next_token.should eq EOF_TOKEN
30
+ end
31
+
32
+ it "should return recognized keywords and finish with EOF_TOKEN" do
33
+ @lexer.ignore /\s+/
34
+ @lexer.keyword :special
35
+ @lexer.start " \t\nspecialspecial special "
36
+ special = Token.new :special, "special"
37
+ @lexer.next_token.should eq special
38
+ @lexer.next_token.should eq special
39
+ @lexer.next_token.should eq special
40
+ @lexer.next_token.should eq EOF_TOKEN
41
+ end
42
+
43
+ it "should return tokens matched by regular rules and finish with EOF_TOKEN" do
44
+ @lexer.ignore /\s+/
45
+ @lexer.rule :word, /\w+/
46
+ @lexer.start "sentence with four tokens"
47
+ @lexer.next_token.should eq Token.new :word, "sentence"
48
+ @lexer.next_token.should eq Token.new :word, "with"
49
+ @lexer.next_token.should eq Token.new :word, "four"
50
+ @lexer.next_token.should eq Token.new :word, "tokens"
51
+ @lexer.next_token.should eq EOF_TOKEN
52
+ end
53
+
54
+ it "should return all types of tokens and finish with EOF_TOKEN" do
55
+ @lexer.ignore /\s+/
56
+ @lexer.keyword :if
57
+ @lexer.keyword :lparen, "("
58
+ @lexer.keyword :rparen, ")"
59
+ @lexer.rule :word, /\w+/
60
+ @lexer.start "ifu ( if ) ifu"
61
+ @lexer.next_token.should eq Token.new :word, "ifu"
62
+ @lexer.next_token.should eq Token.new :lparen, "("
63
+ @lexer.next_token.should eq Token.new :if, "if"
64
+ @lexer.next_token.should eq Token.new :rparen, ")"
65
+ @lexer.next_token.should eq Token.new :word, "ifu"
66
+ @lexer.next_token.should eq EOF_TOKEN
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,11 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # Require this file using `require "spec_helper.rb"` to ensure that it is only
4
+ # loaded once.
5
+ #
6
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
7
+ RSpec.configure do |config|
8
+ config.treat_symbols_as_metadata_keys_with_true_values = true
9
+ config.run_all_when_everything_filtered = true
10
+ config.filter_run :focus
11
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rlex
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.5.0
6
+ platform: ruby
7
+ authors:
8
+ - Rasmus Borgsmidt
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2012-04-24 00:00:00 Z
14
+ dependencies: []
15
+
16
+ description: Implements a simple lexer using a StringScanner
17
+ email:
18
+ - rasmus@borgsmidt.dk
19
+ executables: []
20
+
21
+ extensions: []
22
+
23
+ extra_rdoc_files: []
24
+
25
+ files:
26
+ - .gitignore
27
+ - .rspec
28
+ - Gemfile
29
+ - LICENSE
30
+ - README.md
31
+ - Rakefile
32
+ - lib/rlex.rb
33
+ - lib/rlex/lexer.rb
34
+ - lib/rlex/token.rb
35
+ - lib/rlex/version.rb
36
+ - rlex.gemspec
37
+ - spec/rlex/lexer_spec.rb
38
+ - spec/spec_helper.rb
39
+ homepage: https://github.com/borgsmidt/rlex
40
+ licenses: []
41
+
42
+ post_install_message:
43
+ rdoc_options: []
44
+
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ none: false
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: "0"
59
+ requirements: []
60
+
61
+ rubyforge_project:
62
+ rubygems_version: 1.8.11
63
+ signing_key:
64
+ specification_version: 3
65
+ summary: The lexer was written for use with Racc, a Ruby variant of Yacc. But there is no code dependency on that project so the lexer may also be used on its own or with other packages.
66
+ test_files:
67
+ - spec/rlex/lexer_spec.rb
68
+ - spec/spec_helper.rb
69
+ has_rdoc: