rly 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ measurement/report.txt
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --format progress
3
+ -rspec_helper
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - rbx-19mode
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'rake', :group => :test
4
+ gem 'pry-nav', :group => :development
5
+
6
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Vladimir Pouzanov
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,18 @@
1
+ [![Build Status](https://secure.travis-ci.org/farcaller/rly.png?branch=master)](http://travis-ci.org/farcaller/rly)
2
+
3
+ # Rly
4
+
5
+ Rly is a lexer and parser generator for ruby, based on ideas and solutions of
6
+ Python's [Ply](http://www.dabeaz.com/ply/).
7
+
8
+ ## Installation
9
+
10
+ Install via rubygems
11
+
12
+ gem install rly
13
+
14
+ ## Usage
15
+
16
+ You need to create lexer and parser classes for each grammar you want to process.
17
+ It is commonly done by subclassing {Rly::Lex} and {Rly::Parse} classes (check the
18
+ appropriate docs).
data/Rakefile ADDED
@@ -0,0 +1,17 @@
1
+ require "bundler/gem_tasks"
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new('spec')
6
+
7
+ # require 'yardstick/rake/verify'
8
+ # Yardstick::Rake::Verify.new do |verify|
9
+ # verify.threshold = 100
10
+ # end
11
+
12
+ # require 'yardstick/rake/measurement'
13
+ # Yardstick::Rake::Measurement.new(:yardstick_measure) do |measurement|
14
+ # measurement.output = 'measurement/report.txt'
15
+ # end
16
+
17
+ task :default => :spec
data/lib/rly.rb ADDED
@@ -0,0 +1,6 @@
1
+ require "rly/version"
2
+ require "rly/lex"
3
+
4
+ module Rly
5
+ # Your code goes here...
6
+ end
data/lib/rly/lex.rb ADDED
@@ -0,0 +1,302 @@
1
+ require "rly/lex_token"
2
+
3
+ module Rly
4
+
5
+ # Exception, which is returned on unhandled lexing errors.
6
+ class LexError < Exception; end
7
+
8
+ # Base class for your lexer.
9
+ #
10
+ # Generally, you define a new lexer by subclassing Rly::Lex. Your code should
11
+ # use methods {.token}, {.ignore}, {.literals}, {.on_error} to make the lexer
12
+ # configuration (check the methods documentation for details).
13
+ #
14
+ # Once you got your lexer configured, you can create its instances passing a
15
+ # String to be tokenized. You can then use either {#each} method or common
16
+ # *Enumerable* methods to get the processed tokens.
17
+ class Lex
18
+ include Enumerable
19
+
20
+ # Tracks the current line number for generated tokens
21
+ #
22
+ # *lineno*'s value should be increased manually. Check the example for a demo
23
+ # rule.
24
+ #
25
+ # @api semipublic
26
+ # @return [Fixnum] current line number
27
+ #
28
+ # @example
29
+ # token /\n+/ do |t| t.lexer.lineno = t.value.count("\n"); t end
30
+ attr_accessor :lineno
31
+
32
+ # Tracks the current position in the input string
33
+ #
34
+ # Genreally, it should only be used to skip a few characters in the error hander.
35
+ #
36
+ # @api semipublic
37
+ # @return [Fixnum] index of a starting character for current token
38
+ #
39
+ # @example
40
+ # on_error do |t|
41
+ # t.lexer.pos += 1
42
+ # nil # skip the bad character
43
+ # end
44
+ attr_accessor :pos
45
+
46
+ # Creates a new lexer instance for given input
47
+ #
48
+ # @api public
49
+ # @param input [String] a string to be tokenized
50
+ # @example
51
+ # class MyLexer < Rly::Lex
52
+ # ignore " "
53
+ # token :LOWERS, /[a-z]+/
54
+ # token :UPPERS, /[A-Z]+/
55
+ # end
56
+ #
57
+ # lex = MyLexer.new("hello WORLD")
58
+ # lex.each do |tok|
59
+ # puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
60
+ # #=> "UPPERS -> WORLD"
61
+ # end
62
+ def initialize(input="")
63
+ @input = input
64
+ @pos = 0
65
+ @lineno = 0
66
+ end
67
+
68
+ # Processes the next token in input
69
+ #
70
+ # This is the main interface to lexer. If block is given, {#each} behaves like
71
+ # an usual enumerator, yielding the next token. If there is no block, {#each}
72
+ # returns an Enumerator object.
73
+ #
74
+ # {#each} Raises {LexError} if the input cannot be processed. This happens if
75
+ # there were no matches by 'token' rules and no matches by 'literals' rule.
76
+ # If the {.on_error} handler is not set, the exception will be raised immediately,
77
+ # however, if the handler is set, the eception will be raised only if the {#pos}
78
+ # after returning from error handler is still unchanged.
79
+ #
80
+ # @api public
81
+ # @yieldparam tok [LexToken] next processed token
82
+ # @raise [LexError] if the input cannot be processed
83
+ # @return [Enumerator] if block is not given
84
+ # @return [nil] if block is given
85
+ #
86
+ # @example
87
+ # lex = MyLexer.new("hello WORLD")
88
+ #
89
+ # lex.each #=> #<Enumerator: ...>
90
+ #
91
+ # lex.each do |tok|
92
+ # puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
93
+ # #=> "UPPERS -> WORLD"
94
+ # end
95
+ def each
96
+ return self.to_enum unless block_given?
97
+
98
+ while @pos < @input.length
99
+ if self.class.ignores_list[@input[@pos]]
100
+ @pos += 1
101
+ next
102
+ end
103
+
104
+ matched = false
105
+ self.class.tokens.each do |type, rule, block|
106
+ m = rule.match(@input, @pos)
107
+ next unless m
108
+
109
+ tok = LexToken.new(type, m[0], self)
110
+
111
+ matched = true
112
+
113
+ tok = block.call(tok) if block
114
+ yield tok if tok.type
115
+
116
+ @pos = m.end(0)
117
+ end
118
+
119
+ unless matched
120
+ if self.class.literals_list[@input[@pos]]
121
+ tok = LexToken.new(@input[@pos], @input[@pos], self)
122
+
123
+ matched = true
124
+ yield tok
125
+ @pos += 1
126
+ end
127
+ end
128
+
129
+ unless matched
130
+ if self.class.error_hander
131
+ pos = @pos
132
+ tok = LexToken.new(:error, @input[@pos], self)
133
+ tok = self.class.error_hander.call(tok)
134
+ if pos == @pos
135
+ raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
136
+ else
137
+ yield tok if tok && tok.type
138
+ end
139
+ else
140
+ raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
141
+ end
142
+ end
143
+ end
144
+ end
145
+
146
+ class << self
147
+ # Returns the list of registered tokens
148
+ #
149
+ # @api private
150
+ # @visibility protected
151
+ # @return [Array] array of [type, regex, block] triples
152
+ def tokens
153
+ @tokens ||= []
154
+ end
155
+
156
+ # Returns the list of registered literals
157
+ #
158
+ # @api private
159
+ # @visibility protected
160
+ # @return [String] registered literals
161
+ def literals_list
162
+ @literals ||= ""
163
+ end
164
+
165
+ # Returns the list of registered ignorables
166
+ #
167
+ # @api private
168
+ # @visibility protected
169
+ # @return [String] registered ignorables
170
+ def ignores_list
171
+ @ignores ||= ""
172
+ end
173
+
174
+ # Returns the registered error handler, if any
175
+ #
176
+ # @api private
177
+ # @visibility protected
178
+ # @return [Proc] registered error handler
179
+ def error_hander
180
+ @error_block
181
+ end
182
+
183
+ private
184
+ # @!group DSL Class Methods
185
+ # Adds a token definition to a class
186
+ #
187
+ # This method adds a token definition to be lated used to tokenize input.
188
+ # It can be used to register normal tokens, and also functional tokens (the
189
+ # latter ones are processed as usual but are not being returned).
190
+ #
191
+ # @!visibility public
192
+ # @api public
193
+ # @param type [Symbol] token type. It should be an all-caps symbol by convention
194
+ # @param regex [Regexp] a regular expression to match the token
195
+ #
196
+ # @yieldparam tok [LexToken] a new token instance for processed input
197
+ # @yieldreturn [LexToken] the same or modified token instance. Return nil
198
+ # to ignore the input
199
+ # @see .literals
200
+ # @see .ignores
201
+ # @example
202
+ # class MyLexer < Rly::Lex
203
+ # token :LOWERS, /[a-z]+/ # this would match LOWERS on 1+ lowercase letters
204
+ #
205
+ # token :INT, /\d+/ do |t| # this would match on integers
206
+ # t.value = t.value.to_i # additionally the value is converted to Fixnum
207
+ # t # the updated token is returned
208
+ # end
209
+ #
210
+ # token /\n/ do |t| # this would match on newlines
211
+ # t.lexer.lineno += 1 # the block will be executed on match, but
212
+ # end # no token will be returned (as name is not specified)
213
+ #
214
+ # end
215
+ def token(*args, &block)
216
+ if args.length == 2
217
+ self.tokens << [args[0], args[1], block]
218
+ elsif args.length == 1
219
+ self.tokens << [nil, args[0], block]
220
+ else
221
+ raise ArgumentError
222
+ end
223
+ end
224
+
225
+ # Specifies a list of one-char literals
226
+ #
227
+ # Literals may be used in the case when you have several one-character tokens
228
+ # and you don't want to define them one by one using {.token} method.
229
+ #
230
+ # @!visibility public
231
+ # @api public
232
+ # @param lit [String] the list of literals
233
+ # @see .token
234
+ # @example
235
+ # class MyLexer < Rly::Lex
236
+ # literals "+-/*"
237
+ # end
238
+ #
239
+ # lex = MyLexer.new("+-")
240
+ # lex.each do |tok|
241
+ # puts "#{tok.type} -> #{tok.value}" #=> "+ -> +"
242
+ # #=> "- -> -"
243
+ # end
244
+ def literals(lit)
245
+ @literals = lit
246
+ end
247
+
248
+ # Specifies a list of one-char symbols to be ignored in input
249
+ #
250
+ # This method allows to skip over formatting symbols (like tabs and spaces) quickly.
251
+ #
252
+ # @!visibility public
253
+ # @api public
254
+ # @param ign [String] the list of ignored symbols
255
+ # @see .token
256
+ # @example
257
+ # class MyLexer < Rly::Lex
258
+ # literals "+-"
259
+ # token :INT, /\d+/
260
+ # ignore " \t"
261
+ # end
262
+ #
263
+ # lex = MyLexer.new("2 + 2")
264
+ # lex.each do |tok|
265
+ # puts "#{tok.type} -> #{tok.value}" #=> "INT -> 2"
266
+ # #=> "+ -> +"
267
+ # #=> "INT -> 2"
268
+ # end
269
+ def ignore(ign)
270
+ @ignores = ign
271
+ end
272
+
273
+ # Specifies a block that should be called on error
274
+ #
275
+ # In case of lexing error the lexer first tries to fix it by providing a
276
+ # chance for developer to look on the failing character. If this block is
277
+ # not provided, the lexing error always results in {LexError}.
278
+ #
279
+ # You must increment the lexer's {#pos} as part of the action. You may also
280
+ # return a new {LexToken} or nil to skip the input
281
+ #
282
+ # @!visibility public
283
+ # @api public
284
+ # @see .token
285
+ # @example
286
+ # class MyLexer < Rly::Lex
287
+ # token :INT, /\d+/
288
+ # on_error do |tok|
289
+ # tok.lexer.pos += 1 # just skip the offending character
290
+ # end
291
+ # end
292
+ #
293
+ # lex = MyLexer.new("123qwe")
294
+ # lex.each do |tok|
295
+ # puts "#{tok.type} -> #{tok.value}" #=> "INT -> 123"
296
+ # end
297
+ def on_error(&block)
298
+ @error_block = block
299
+ end
300
+ end
301
+ end
302
+ end
@@ -0,0 +1,13 @@
1
+ module Rly
2
+
3
+ class LexToken
4
+ attr_accessor :value
5
+ attr_reader :type, :lexer
6
+
7
+ def initialize(type, value, lexer)
8
+ @type = type
9
+ @value = value
10
+ @lexer = lexer
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,3 @@
1
+ module Rly
2
+ VERSION = "0.1.0"
3
+ end
data/rly.gemspec ADDED
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'rly/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "rly"
8
+ gem.version = Rly::VERSION
9
+ gem.authors = ["Vladimir Pouzanov"]
10
+ gem.email = ["farcaller@gmail.com"]
11
+ gem.description = "A simple ruby implementation of lex and yacc, based on Python's ply"
12
+ gem.summary = "A simple ruby implementation of lex and yacc, based on Python's ply"
13
+ gem.homepage = ""
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_development_dependency 'rspec'
21
+ end
@@ -0,0 +1,144 @@
1
+ require "rly"
2
+
3
+ describe Rly::Lex do
4
+ context "Simple Lexer" do
5
+ testLexer = Class.new(Rly::Lex) do
6
+ token :FIRST, /[a-z]+/
7
+ token :SECOND, /[A-Z]+/
8
+ end
9
+
10
+ it "should have a list of defined tokens" do
11
+ testLexer.tokens.map { |t, r, b| t }.should == [:FIRST, :SECOND]
12
+ end
13
+
14
+ it "should output tokens one by one" do
15
+ test = 'qweASDzxc'
16
+ l = testLexer.new(test).to_enum
17
+
18
+ tok = l.next
19
+ tok.type.should == :FIRST
20
+ tok.value.should == 'qwe'
21
+
22
+ tok = l.next
23
+ tok.type.should == :SECOND
24
+ tok.value.should == 'ASD'
25
+
26
+ tok = l.next
27
+ tok.type.should == :FIRST
28
+ tok.value.should == 'zxc'
29
+
30
+ expect { l.next } .to raise_error(StopIteration)
31
+ end
32
+ end
33
+
34
+ context "Literals Lexer" do
35
+ testLexer = Class.new(Rly::Lex) do
36
+ literals "+-*/"
37
+ end
38
+
39
+ it "should output literal tokens" do
40
+ test = '++--'
41
+ l = testLexer.new(test).to_enum
42
+
43
+ l.next.value.should == '+'
44
+ l.next.value.should == '+'
45
+ l.next.value.should == '-'
46
+ l.next.value.should == '-'
47
+ end
48
+ end
49
+
50
+ context "Ignores Lexer" do
51
+ testLexer = Class.new(Rly::Lex) do
52
+ ignore " \t"
53
+ end
54
+
55
+ it "should honour ignores list" do
56
+ test = " \t\t \t \t"
57
+ l = testLexer.new(test).to_enum
58
+
59
+ expect { l.next } .to raise_error(StopIteration)
60
+ end
61
+ end
62
+
63
+ context "Block-based Token Lexer" do
64
+ testLexer = Class.new(Rly::Lex) do
65
+ token :TEST, /\d+/ do |t|
66
+ t.value = t.value.to_i
67
+ t
68
+ end
69
+ end
70
+
71
+ it "calls a block to further process a token" do
72
+ test = "42"
73
+ l = testLexer.new(test).to_enum
74
+
75
+ l.next.value == 42
76
+ end
77
+ end
78
+
79
+ context "Non-outputtable tokens Lexer" do
80
+ testLexer = Class.new(Rly::Lex) do
81
+ token /\n+/ do |t| t.lexer.lineno = t.value.count("\n"); t end
82
+ end
83
+
84
+ it "process but don't output tokens without a name" do
85
+ test = "\n\n\n"
86
+ l = testLexer.new(test)
87
+
88
+ expect { l.to_enum.next } .to raise_error(StopIteration)
89
+
90
+ l.lineno.should == 3
91
+ end
92
+ end
93
+
94
+ context "Error handling" do
95
+ it "raises an error, if there are no suitable tokens" do
96
+ testLexer = Class.new(Rly::Lex) do
97
+ token :NUM, /\d+/
98
+ end
99
+ l = testLexer.new("test")
100
+
101
+ expect { l.to_enum.next } .to raise_error(Rly::LexError)
102
+ end
103
+
104
+ it "raises an error, if there is no possible tokens defined" do
105
+ testLexer = Class.new(Rly::Lex) do ; end
106
+ l = testLexer.new("test")
107
+
108
+ expect { l.to_enum.next } .to raise_error(Rly::LexError)
109
+ end
110
+
111
+ it "calls an error function if it is available, which returns a fixed token" do
112
+ testLexer = Class.new(Rly::Lex) do
113
+ token :NUM, /\d+/
114
+ on_error do |t|
115
+ t.value = "BAD #{t.value}"
116
+ t.lexer.pos += 1
117
+ t
118
+ end
119
+ end
120
+ l = testLexer.new("test")
121
+
122
+ tok = l.to_enum.next
123
+ tok.value.should == "BAD t"
124
+ tok.type.should == :error
125
+
126
+ tok = l.to_enum.next
127
+ tok.value.should == "BAD e"
128
+ tok.type.should == :error
129
+ end
130
+
131
+ it "calls an error function if it is available, which can skip a token" do
132
+ testLexer = Class.new(Rly::Lex) do
133
+ token :NUM, /\d+/
134
+ on_error do |t|
135
+ t.lexer.pos += 1
136
+ nil
137
+ end
138
+ end
139
+ l = testLexer.new("test1")
140
+
141
+ l.to_enum.next.value.should == '1'
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,14 @@
1
+ require "rubygems"
2
+ require "bundler/setup"
3
+
4
+ RSpec.configure do |config|
5
+ config.treat_symbols_as_metadata_keys_with_true_values = true
6
+ config.run_all_when_everything_filtered = true
7
+ config.filter_run :focus
8
+
9
+ # Run specs in random order to surface order dependencies. If you find an
10
+ # order dependency and want to debug it, you can fix the order by providing
11
+ # the seed, which is printed after each run.
12
+ # --seed 1234
13
+ config.order = 'random'
14
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rly
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Vladimir Pouzanov
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-11-09 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: A simple ruby implementation of lex and yacc, based on Python's ply
31
+ email:
32
+ - farcaller@gmail.com
33
+ executables: []
34
+ extensions: []
35
+ extra_rdoc_files: []
36
+ files:
37
+ - .gitignore
38
+ - .rspec
39
+ - .travis.yml
40
+ - Gemfile
41
+ - LICENSE.txt
42
+ - README.md
43
+ - Rakefile
44
+ - lib/rly.rb
45
+ - lib/rly/lex.rb
46
+ - lib/rly/lex_token.rb
47
+ - lib/rly/version.rb
48
+ - rly.gemspec
49
+ - spec/lex/lexer_spec.rb
50
+ - spec/spec_helper.rb
51
+ homepage: ''
52
+ licenses: []
53
+ post_install_message:
54
+ rdoc_options: []
55
+ require_paths:
56
+ - lib
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ none: false
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ requirements: []
70
+ rubyforge_project:
71
+ rubygems_version: 1.8.21
72
+ signing_key:
73
+ specification_version: 3
74
+ summary: A simple ruby implementation of lex and yacc, based on Python's ply
75
+ test_files:
76
+ - spec/lex/lexer_spec.rb
77
+ - spec/spec_helper.rb
78
+ has_rdoc: