rlex 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in rlex.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Rasmus Borgsmidt
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,74 @@
1
+ # Rlex
2
+
3
+ Provides a simple lexer based on the <tt>StringScanner</tt> class.
4
+
5
+ The lexer was written for use with Racc, a Ruby variant of
6
+ Yacc. But there is no code dependency on that project so the
7
+ lexer may also be used on its own or with other packages.
8
+
9
+ * Ignored input takes precedence over rules and keywords, so if
10
+ a prefix is matched by an ignore pattern, it's ignored even if
11
+ it's also a keyword or matched by a rule
12
+ * The lexer is greedy, so if a prefix is matched by multiple
13
+ rules or keywords, the lexer chooses the option consuming the
14
+ most input
15
+
16
+ ## Documentation
17
+
18
+ Find out more about this project on [GitHub][gh_rlex],
19
+ [RubyGems][rg_rlex] and [RubyDoc][rd_rlex].
20
+
21
+ [gh_rlex]: https://github.com/borgsmidt/rlex
22
+ [rg_rlex]: http://rubygems.org/gems/rlex
23
+ [rd_rlex]: http://rubydoc.info/gems/rlex
24
+
25
+ ## Installation
26
+
27
+ Install the gem using:
28
+
29
+ $ gem install rlex
30
+
31
+ Alternatively, add this line to your application's Gemfile:
32
+
33
+ gem 'rlex'
34
+
35
+ Then execute:
36
+
37
+ $ bundle
38
+
39
+ ## Usage
40
+
41
+ ```ruby
42
+ # Define behavior
43
+ lexer = Lexer.new
44
+ lexer.ignore /\s+/ # ignore whitespace
45
+ lexer.rule :word, /\w+/ # consider any text a 'word'
46
+ lexer.keyword :if # treat 'if' as a special keyword
47
+ lexer.keyword :lparen, "(" # any fixed input such as parentheses
48
+ lexer.keyword :rparen, ")" # may be defined as keywords
49
+
50
+ # Initialize with input
51
+ lexer.start "if ( foo ) bar" # initialize the lexer with a string
52
+
53
+ # Iterate through tokens
54
+ lexer.next_token # => Token (type = :if, value = 'if' )
55
+ lexer.next_token # => Token (type = :lparen, value = '(' )
56
+ lexer.next_token # => Token (type = :word, value = 'foo')
57
+ lexer.next_token # => Token (type = :rparen, value = ')' )
58
+ lexer.next_token # => Token (type = :word, value = 'bar')
59
+ lexer.next_token # => EOF_TOKEN
60
+ ```
61
+
62
+ ## Contributing
63
+
64
+ 1. Fork it on [GitHub][gh_rlex]
65
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
66
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
67
+ 4. Push to the branch (`git push origin my-new-feature`)
68
+ 5. Create new Pull Request
69
+
70
+ ## License
71
+
72
+ Written by Rasmus Borgsmidt &lt;<rasmus@borgsmidt.dk>&gt;
73
+
74
+ Released under the MIT license: www.opensource.org/licenses/MIT
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
data/lib/rlex.rb ADDED
@@ -0,0 +1,8 @@
1
+ require "rlex/version"
2
+
3
+ # Implements a simple lexer using a StringScanner.
4
+ #
5
+ # @author Rasmus Borgsmidt <rasmus@borgsmidt.dk>
6
+ #
7
+ module Rlex
8
+ end
data/lib/rlex/lexer.rb ADDED
@@ -0,0 +1,172 @@
1
+ require 'strscan'
2
+ require 'rlex/token'
3
+
4
+ module Rlex
5
+ # Implements a simple lexer using a <tt>StringScanner</tt>.
6
+ #
7
+ # The lexer was written for use with Racc, a Ruby variant of
8
+ # Yacc. But there is no code dependency on that project so the
9
+ # lexer may also be used on its own or with other packages.
10
+ #
11
+ # * Ignored input takes precedence over rules and keywords, so if
12
+ # a prefix is matched by an ignore pattern, it's ignored even if
13
+ # it's also a keyword or matched by a rule
14
+ # * The lexer is greedy, so if a prefix is matched by multiple
15
+ # rules or keywords, the lexer chooses the option consuming the
16
+ # most input
17
+ #
18
+ # @author Rasmus Borgsmidt <rasmus@borgsmidt.dk>
19
+ #
20
+ # @example Basic usage
21
+ # # Define behavior
22
+ # lexer = Lexer.new
23
+ # lexer.ignore /\s+/ # ignore whitespace
24
+ # lexer.rule :word, /\w+/ # consider any text a 'word'
25
+ # lexer.keyword :if # treat 'if' as a special keyword
26
+ # lexer.keyword :lparen, "(" # any fixed input such as parentheses
27
+ # lexer.keyword :rparen, ")" # may be defined as keywords
28
+ #
29
+ # # Initialize with input
30
+ # lexer.start "if ( foo ) bar" # initialize the lexer with a string
31
+ #
32
+ # # Iterate through tokens
33
+ # lexer.next_token # => Token (type = :if, value = 'if')
34
+ # lexer.next_token # => Token (type = :lparen, value = '(')
35
+ # lexer.next_token # => Token (type = :word, value = 'foo')
36
+ # lexer.next_token # => Token (type = :rparen, value = ')')
37
+ # lexer.next_token # => Token (type = :word, value = 'bar')
38
+ # lexer.next_token # => EOF_TOKEN
39
+ #
40
+ class Lexer
41
+ # Initializes an empty Lexer.
42
+ #
43
+ def initialize
44
+ @ignored = []
45
+ @rules = []
46
+ @keywords = {}
47
+ end
48
+
49
+ # Instructs the lexer to ignore input matched by the specified
50
+ # pattern. If appropriate, call this multiple times to ignore
51
+ # several patterns.
52
+ #
53
+ # @note Ignored input takes precedence over rules and keywords,
54
+ # so if a prefix is matched by an ignore pattern, it's ignored
55
+ # even if it's also a keyword or matched by a rule
56
+ #
57
+ # @param [Regexp] pattern Pattern of input to ignore
58
+ #
59
+ # @return [Regexp] The specified pattern
60
+ #
61
+ def ignore(pattern)
62
+ @ignored << pattern
63
+ return pattern
64
+ end
65
+
66
+ # Defines a rule to match the specified pattern.
67
+ #
68
+ # @note Use keywords for efficiency instead of rules whenever
69
+ # the matched input is static
70
+ #
71
+ # @param [Symbol, #to_sym] name Unique name of rule
72
+ # @param [Regexp] pattern Pattern of input to match
73
+ #
74
+ # @raise [ArgumentError] If the specified name is already
75
+ # used by other rules or keywords
76
+ #
77
+ # @return [Symbol] The name of the rule
78
+ #
79
+ def rule(name, pattern)
80
+ # @todo Validate the rule name
81
+ @rules << (Rule.new name.to_sym, pattern)
82
+ return name.to_sym
83
+ end
84
+
85
+ # Defines a static sequence of input as a keyword.
86
+ #
87
+ # @note Use keywords for efficiency instead of rules whenever
88
+ # the matched input is static
89
+ #
90
+ # @param [optional, Symbol, #to_sym] name Unique name of the
91
+ # keyword. If this argument is not given, the keyword is used
92
+ # to name itself
93
+ # @param [String, #to_s] kword Sequence of input to match as a
94
+ # keyword
95
+ #
96
+ # @raise [ArgumentError] If the specified name is already
97
+ # used by other rules or keywords
98
+ #
99
+ # @return [Symbol] The name of the keyword
100
+ #
101
+ def keyword(name = nil, kword)
102
+ # @todo Validate the keyword name
103
+ name = kword if name == nil
104
+ pattern = Regexp.new(Regexp.escape kword.to_s)
105
+ rule name, pattern
106
+ @keywords[kword.to_s] = Token.new name.to_sym, kword.to_s
107
+ return name.to_sym
108
+ end
109
+
110
+ # Initializes the lexer with new input.
111
+ #
112
+ # @note This resets the lexer with a new StringScanner so any
113
+ # state information related to previous input is lost
114
+ #
115
+ # @param [String] input Input to scan for tokens
116
+ #
117
+ # @return [String] The specified input
118
+ #
119
+ def start(input)
120
+ @scanner = StringScanner.new input
121
+ return input
122
+ end
123
+
124
+ # Returns the next token matched from the remaining input. If no
125
+ # input is left, or the lexer has not been initialized,
126
+ # <tt>EOF_TOKEN</tt> is returned.
127
+ #
128
+ # @raise [RuntimeError] If there is any unmatched input
129
+ #
130
+ # @return [Token] Next token or <tt>EOF_TOKEN</tt>
131
+ #
132
+ def next_token
133
+ return EOF_TOKEN if @scanner.nil? or @scanner.empty?
134
+ return next_token if ignore_prefix?
135
+ rule = greediest_rule
136
+ if rule
137
+ prefix = @scanner.scan(rule.pattern)
138
+ keyword = @keywords[prefix]
139
+ return keyword ? keyword : Token.new(rule.name, prefix)
140
+ end
141
+ raise "unexpected input <#{@scanner.peek(5)}>"
142
+ end
143
+
144
+ private
145
+
146
+ # @private
147
+ Rule = Struct.new :name, :pattern
148
+
149
+ # @private
150
+ def ignore_prefix?
151
+ @ignored.each do |pattern|
152
+ prefix = @scanner.scan(pattern)
153
+ return true if prefix
154
+ end
155
+ return false
156
+ end
157
+
158
+ # @private
159
+ def greediest_rule
160
+ r = nil
161
+ len = 0
162
+ @rules.each do |rule|
163
+ prefix = @scanner.check(rule.pattern)
164
+ if prefix and prefix.length > len
165
+ r = rule
166
+ len = prefix.length
167
+ end
168
+ end
169
+ return r
170
+ end
171
+ end
172
+ end
data/lib/rlex/token.rb ADDED
@@ -0,0 +1,16 @@
1
+ module Rlex
2
+ # Simple class to represent tokens matched from the input.
3
+ #
4
+ # @author Rasmus Borgsmidt <rasmus@borgsmidt.dk>
5
+ #
6
+ # @attr_reader [Symbol] type Type of the token, such as the name of
7
+ # the rule used to match it
8
+ # @attr_reader [String] value Text matched from the input
9
+ #
10
+ Token = Struct.new :type, :value
11
+
12
+ # Special token used when the lexer has reached the end of the
13
+ # specified input.
14
+ #
15
+ EOF_TOKEN = Token.new :eof, ""
16
+ end
@@ -0,0 +1,4 @@
1
+ module Rlex
2
+ # Project version
3
+ VERSION = "0.5.0"
4
+ end
data/rlex.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/rlex/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Rasmus Borgsmidt"]
6
+ gem.email = ["rasmus@borgsmidt.dk"]
7
+ gem.description = %q{Implements a simple lexer using a StringScanner}
8
+ gem.summary = %q{The lexer was written for use with Racc, a
9
+ Ruby variant of Yacc. But there is no code
10
+ dependency on that project so the lexer may
11
+ also be used on its own or with other packages.}
12
+ gem.homepage = "https://github.com/borgsmidt/rlex"
13
+
14
+ gem.files = `git ls-files`.split($\)
15
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
+ gem.name = "rlex"
18
+ gem.require_paths = ["lib"]
19
+ gem.version = Rlex::VERSION
20
+ end
@@ -0,0 +1,69 @@
1
+ require 'spec_helper'
2
+ require 'rlex/token'
3
+ require 'rlex/lexer'
4
+ include Rlex
5
+
6
+ describe Lexer do
7
+ before :each do
8
+ @lexer = Lexer.new
9
+ end
10
+
11
+ describe "next_token" do
12
+ it "should return EOF_TOKEN when the lexer has been assigned no input" do
13
+ @lexer.next_token.should eq EOF_TOKEN
14
+ end
15
+
16
+ it "should return EOF_TOKEN when there is no more input" do
17
+ @lexer.start ""
18
+ @lexer.next_token.should eq EOF_TOKEN
19
+ end
20
+
21
+ it "should raise an error when remaining input is not matched by any rules" do
22
+ @lexer.start "unmatched input"
23
+ lambda {@lexer.next_token}.should raise_error RuntimeError
24
+ end
25
+
26
+ it "should return EOF_TOKEN when instructed to ignore everything" do
27
+ @lexer.ignore /.*/
28
+ @lexer.start "input to be ignored"
29
+ @lexer.next_token.should eq EOF_TOKEN
30
+ end
31
+
32
+ it "should return recognized keywords and finish with EOF_TOKEN" do
33
+ @lexer.ignore /\s+/
34
+ @lexer.keyword :special
35
+ @lexer.start " \t\nspecialspecial special "
36
+ special = Token.new :special, "special"
37
+ @lexer.next_token.should eq special
38
+ @lexer.next_token.should eq special
39
+ @lexer.next_token.should eq special
40
+ @lexer.next_token.should eq EOF_TOKEN
41
+ end
42
+
43
+ it "should return tokens matched by regular rules and finish with EOF_TOKEN" do
44
+ @lexer.ignore /\s+/
45
+ @lexer.rule :word, /\w+/
46
+ @lexer.start "sentence with four tokens"
47
+ @lexer.next_token.should eq Token.new :word, "sentence"
48
+ @lexer.next_token.should eq Token.new :word, "with"
49
+ @lexer.next_token.should eq Token.new :word, "four"
50
+ @lexer.next_token.should eq Token.new :word, "tokens"
51
+ @lexer.next_token.should eq EOF_TOKEN
52
+ end
53
+
54
+ it "should return all types of tokens and finish with EOF_TOKEN" do
55
+ @lexer.ignore /\s+/
56
+ @lexer.keyword :if
57
+ @lexer.keyword :lparen, "("
58
+ @lexer.keyword :rparen, ")"
59
+ @lexer.rule :word, /\w+/
60
+ @lexer.start "ifu ( if ) ifu"
61
+ @lexer.next_token.should eq Token.new :word, "ifu"
62
+ @lexer.next_token.should eq Token.new :lparen, "("
63
+ @lexer.next_token.should eq Token.new :if, "if"
64
+ @lexer.next_token.should eq Token.new :rparen, ")"
65
+ @lexer.next_token.should eq Token.new :word, "ifu"
66
+ @lexer.next_token.should eq EOF_TOKEN
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,11 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # Require this file using `require "spec_helper.rb"` to ensure that it is only
4
+ # loaded once.
5
+ #
6
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
7
+ RSpec.configure do |config|
8
+ config.treat_symbols_as_metadata_keys_with_true_values = true
9
+ config.run_all_when_everything_filtered = true
10
+ config.filter_run :focus
11
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rlex
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.5.0
6
+ platform: ruby
7
+ authors:
8
+ - Rasmus Borgsmidt
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2012-04-24 00:00:00 Z
14
+ dependencies: []
15
+
16
+ description: Implements a simple lexer using a StringScanner
17
+ email:
18
+ - rasmus@borgsmidt.dk
19
+ executables: []
20
+
21
+ extensions: []
22
+
23
+ extra_rdoc_files: []
24
+
25
+ files:
26
+ - .gitignore
27
+ - .rspec
28
+ - Gemfile
29
+ - LICENSE
30
+ - README.md
31
+ - Rakefile
32
+ - lib/rlex.rb
33
+ - lib/rlex/lexer.rb
34
+ - lib/rlex/token.rb
35
+ - lib/rlex/version.rb
36
+ - rlex.gemspec
37
+ - spec/rlex/lexer_spec.rb
38
+ - spec/spec_helper.rb
39
+ homepage: https://github.com/borgsmidt/rlex
40
+ licenses: []
41
+
42
+ post_install_message:
43
+ rdoc_options: []
44
+
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ none: false
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: "0"
59
+ requirements: []
60
+
61
+ rubyforge_project:
62
+ rubygems_version: 1.8.11
63
+ signing_key:
64
+ specification_version: 3
65
+ summary: The lexer was written for use with Racc, a Ruby variant of Yacc. But there is no code dependency on that project so the lexer may also be used on its own or with other packages.
66
+ test_files:
67
+ - spec/rlex/lexer_spec.rb
68
+ - spec/spec_helper.rb
69
+ has_rdoc: