rly 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ measurement/report.txt
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --format progress
3
+ -rspec_helper
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - rbx-19mode
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'rake', :group => :test
4
+ gem 'pry-nav', :group => :development
5
+
6
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Vladimir Pouzanov
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,18 @@
1
+ [![Build Status](https://secure.travis-ci.org/farcaller/rly.png?branch=master)](http://travis-ci.org/farcaller/rly)
2
+
3
+ # Rly
4
+
5
+ Rly is a lexer and parser generator for ruby, based on ideas and solutions of
6
+ Python's [Ply](http://www.dabeaz.com/ply/).
7
+
8
+ ## Installation
9
+
10
+ Install via rubygems
11
+
12
+ gem install rly
13
+
14
+ ## Usage
15
+
16
+ You need to create lexer and parser classes for each grammar you want to process.
17
+ It is commonly done by subclassing {Rly::Lex} and {Rly::Parse} classes (check the
18
+ appropriate docs).
data/Rakefile ADDED
@@ -0,0 +1,17 @@
1
+ require "bundler/gem_tasks"
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new('spec')
6
+
7
+ # require 'yardstick/rake/verify'
8
+ # Yardstick::Rake::Verify.new do |verify|
9
+ # verify.threshold = 100
10
+ # end
11
+
12
+ # require 'yardstick/rake/measurement'
13
+ # Yardstick::Rake::Measurement.new(:yardstick_measure) do |measurement|
14
+ # measurement.output = 'measurement/report.txt'
15
+ # end
16
+
17
+ task :default => :spec
data/lib/rly.rb ADDED
@@ -0,0 +1,6 @@
1
+ require "rly/version"
2
+ require "rly/lex"
3
+
4
+ module Rly
5
+ # Your code goes here...
6
+ end
data/lib/rly/lex.rb ADDED
@@ -0,0 +1,302 @@
1
+ require "rly/lex_token"
2
+
3
+ module Rly
4
+
5
+ # Exception, which is returned on unhandled lexing errors.
6
+ class LexError < Exception; end
7
+
8
+ # Base class for your lexer.
9
+ #
10
+ # Generally, you define a new lexer by subclassing Rly::Lex. Your code should
11
+ # use methods {.token}, {.ignore}, {.literals}, {.on_error} to make the lexer
12
+ # configuration (check the methods documentation for details).
13
+ #
14
+ # Once you got your lexer configured, you can create its instances passing a
15
+ # String to be tokenized. You can then use either {#each} method or common
16
+ # *Enumerable* methods to get the processed tokens.
17
+ class Lex
18
+ include Enumerable
19
+
20
+ # Tracks the current line number for generated tokens
21
+ #
22
+ # *lineno*'s value should be increased manually. Check the example for a demo
23
+ # rule.
24
+ #
25
+ # @api semipublic
26
+ # @return [Fixnum] current line number
27
+ #
28
+ # @example
29
+ # token /\n+/ do |t| t.lexer.lineno = t.value.count("\n"); t end
30
+ attr_accessor :lineno
31
+
32
+ # Tracks the current position in the input string
33
+ #
34
+ # Genreally, it should only be used to skip a few characters in the error hander.
35
+ #
36
+ # @api semipublic
37
+ # @return [Fixnum] index of a starting character for current token
38
+ #
39
+ # @example
40
+ # on_error do |t|
41
+ # t.lexer.pos += 1
42
+ # nil # skip the bad character
43
+ # end
44
+ attr_accessor :pos
45
+
46
+ # Creates a new lexer instance for given input
47
+ #
48
+ # @api public
49
+ # @param input [String] a string to be tokenized
50
+ # @example
51
+ # class MyLexer < Rly::Lex
52
+ # ignore " "
53
+ # token :LOWERS, /[a-z]+/
54
+ # token :UPPERS, /[A-Z]+/
55
+ # end
56
+ #
57
+ # lex = MyLexer.new("hello WORLD")
58
+ # lex.each do |tok|
59
+ # puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
60
+ # #=> "UPPERS -> WORLD"
61
+ # end
62
+ def initialize(input="")
63
+ @input = input
64
+ @pos = 0
65
+ @lineno = 0
66
+ end
67
+
68
+ # Processes the next token in input
69
+ #
70
+ # This is the main interface to lexer. If block is given, {#each} behaves like
71
+ # an usual enumerator, yielding the next token. If there is no block, {#each}
72
+ # returns an Enumerator object.
73
+ #
74
+ # {#each} Raises {LexError} if the input cannot be processed. This happens if
75
+ # there were no matches by 'token' rules and no matches by 'literals' rule.
76
+ # If the {.on_error} handler is not set, the exception will be raised immediately,
77
+ # however, if the handler is set, the eception will be raised only if the {#pos}
78
+ # after returning from error handler is still unchanged.
79
+ #
80
+ # @api public
81
+ # @yieldparam tok [LexToken] next processed token
82
+ # @raise [LexError] if the input cannot be processed
83
+ # @return [Enumerator] if block is not given
84
+ # @return [nil] if block is given
85
+ #
86
+ # @example
87
+ # lex = MyLexer.new("hello WORLD")
88
+ #
89
+ # lex.each #=> #<Enumerator: ...>
90
+ #
91
+ # lex.each do |tok|
92
+ # puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
93
+ # #=> "UPPERS -> WORLD"
94
+ # end
95
+ def each
96
+ return self.to_enum unless block_given?
97
+
98
+ while @pos < @input.length
99
+ if self.class.ignores_list[@input[@pos]]
100
+ @pos += 1
101
+ next
102
+ end
103
+
104
+ matched = false
105
+ self.class.tokens.each do |type, rule, block|
106
+ m = rule.match(@input, @pos)
107
+ next unless m
108
+
109
+ tok = LexToken.new(type, m[0], self)
110
+
111
+ matched = true
112
+
113
+ tok = block.call(tok) if block
114
+ yield tok if tok.type
115
+
116
+ @pos = m.end(0)
117
+ end
118
+
119
+ unless matched
120
+ if self.class.literals_list[@input[@pos]]
121
+ tok = LexToken.new(@input[@pos], @input[@pos], self)
122
+
123
+ matched = true
124
+ yield tok
125
+ @pos += 1
126
+ end
127
+ end
128
+
129
+ unless matched
130
+ if self.class.error_hander
131
+ pos = @pos
132
+ tok = LexToken.new(:error, @input[@pos], self)
133
+ tok = self.class.error_hander.call(tok)
134
+ if pos == @pos
135
+ raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
136
+ else
137
+ yield tok if tok && tok.type
138
+ end
139
+ else
140
+ raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
141
+ end
142
+ end
143
+ end
144
+ end
145
+
146
+ class << self
147
+ # Returns the list of registered tokens
148
+ #
149
+ # @api private
150
+ # @visibility protected
151
+ # @return [Array] array of [type, regex, block] triples
152
+ def tokens
153
+ @tokens ||= []
154
+ end
155
+
156
+ # Returns the list of registered literals
157
+ #
158
+ # @api private
159
+ # @visibility protected
160
+ # @return [String] registered literals
161
+ def literals_list
162
+ @literals ||= ""
163
+ end
164
+
165
+ # Returns the list of registered ignorables
166
+ #
167
+ # @api private
168
+ # @visibility protected
169
+ # @return [String] registered ignorables
170
+ def ignores_list
171
+ @ignores ||= ""
172
+ end
173
+
174
+ # Returns the registered error handler, if any
175
+ #
176
+ # @api private
177
+ # @visibility protected
178
+ # @return [Proc] registered error handler
179
+ def error_hander
180
+ @error_block
181
+ end
182
+
183
+ private
184
+ # @!group DSL Class Methods
185
+ # Adds a token definition to a class
186
+ #
187
+ # This method adds a token definition to be lated used to tokenize input.
188
+ # It can be used to register normal tokens, and also functional tokens (the
189
+ # latter ones are processed as usual but are not being returned).
190
+ #
191
+ # @!visibility public
192
+ # @api public
193
+ # @param type [Symbol] token type. It should be an all-caps symbol by convention
194
+ # @param regex [Regexp] a regular expression to match the token
195
+ #
196
+ # @yieldparam tok [LexToken] a new token instance for processed input
197
+ # @yieldreturn [LexToken] the same or modified token instance. Return nil
198
+ # to ignore the input
199
+ # @see .literals
200
+ # @see .ignores
201
+ # @example
202
+ # class MyLexer < Rly::Lex
203
+ # token :LOWERS, /[a-z]+/ # this would match LOWERS on 1+ lowercase letters
204
+ #
205
+ # token :INT, /\d+/ do |t| # this would match on integers
206
+ # t.value = t.value.to_i # additionally the value is converted to Fixnum
207
+ # t # the updated token is returned
208
+ # end
209
+ #
210
+ # token /\n/ do |t| # this would match on newlines
211
+ # t.lexer.lineno += 1 # the block will be executed on match, but
212
+ # end # no token will be returned (as name is not specified)
213
+ #
214
+ # end
215
+ def token(*args, &block)
216
+ if args.length == 2
217
+ self.tokens << [args[0], args[1], block]
218
+ elsif args.length == 1
219
+ self.tokens << [nil, args[0], block]
220
+ else
221
+ raise ArgumentError
222
+ end
223
+ end
224
+
225
+ # Specifies a list of one-char literals
226
+ #
227
+ # Literals may be used in the case when you have several one-character tokens
228
+ # and you don't want to define them one by one using {.token} method.
229
+ #
230
+ # @!visibility public
231
+ # @api public
232
+ # @param lit [String] the list of literals
233
+ # @see .token
234
+ # @example
235
+ # class MyLexer < Rly::Lex
236
+ # literals "+-/*"
237
+ # end
238
+ #
239
+ # lex = MyLexer.new("+-")
240
+ # lex.each do |tok|
241
+ # puts "#{tok.type} -> #{tok.value}" #=> "+ -> +"
242
+ # #=> "- -> -"
243
+ # end
244
+ def literals(lit)
245
+ @literals = lit
246
+ end
247
+
248
+ # Specifies a list of one-char symbols to be ignored in input
249
+ #
250
+ # This method allows to skip over formatting symbols (like tabs and spaces) quickly.
251
+ #
252
+ # @!visibility public
253
+ # @api public
254
+ # @param ign [String] the list of ignored symbols
255
+ # @see .token
256
+ # @example
257
+ # class MyLexer < Rly::Lex
258
+ # literals "+-"
259
+ # token :INT, /\d+/
260
+ # ignore " \t"
261
+ # end
262
+ #
263
+ # lex = MyLexer.new("2 + 2")
264
+ # lex.each do |tok|
265
+ # puts "#{tok.type} -> #{tok.value}" #=> "INT -> 2"
266
+ # #=> "+ -> +"
267
+ # #=> "INT -> 2"
268
+ # end
269
+ def ignore(ign)
270
+ @ignores = ign
271
+ end
272
+
273
+ # Specifies a block that should be called on error
274
+ #
275
+ # In case of lexing error the lexer first tries to fix it by providing a
276
+ # chance for developer to look on the failing character. If this block is
277
+ # not provided, the lexing error always results in {LexError}.
278
+ #
279
+ # You must increment the lexer's {#pos} as part of the action. You may also
280
+ # return a new {LexToken} or nil to skip the input
281
+ #
282
+ # @!visibility public
283
+ # @api public
284
+ # @see .token
285
+ # @example
286
+ # class MyLexer < Rly::Lex
287
+ # token :INT, /\d+/
288
+ # on_error do |tok|
289
+ # tok.lexer.pos += 1 # just skip the offending character
290
+ # end
291
+ # end
292
+ #
293
+ # lex = MyLexer.new("123qwe")
294
+ # lex.each do |tok|
295
+ # puts "#{tok.type} -> #{tok.value}" #=> "INT -> 123"
296
+ # end
297
+ def on_error(&block)
298
+ @error_block = block
299
+ end
300
+ end
301
+ end
302
+ end
@@ -0,0 +1,13 @@
1
+ module Rly
2
+
3
+ class LexToken
4
+ attr_accessor :value
5
+ attr_reader :type, :lexer
6
+
7
+ def initialize(type, value, lexer)
8
+ @type = type
9
+ @value = value
10
+ @lexer = lexer
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,3 @@
1
+ module Rly
2
+ VERSION = "0.1.0"
3
+ end
data/rly.gemspec ADDED
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'rly/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "rly"
8
+ gem.version = Rly::VERSION
9
+ gem.authors = ["Vladimir Pouzanov"]
10
+ gem.email = ["farcaller@gmail.com"]
11
+ gem.description = "A simple ruby implementation of lex and yacc, based on Python's ply"
12
+ gem.summary = "A simple ruby implementation of lex and yacc, based on Python's ply"
13
+ gem.homepage = ""
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_development_dependency 'rspec'
21
+ end
@@ -0,0 +1,144 @@
1
+ require "rly"
2
+
3
+ describe Rly::Lex do
4
+ context "Simple Lexer" do
5
+ testLexer = Class.new(Rly::Lex) do
6
+ token :FIRST, /[a-z]+/
7
+ token :SECOND, /[A-Z]+/
8
+ end
9
+
10
+ it "should have a list of defined tokens" do
11
+ testLexer.tokens.map { |t, r, b| t }.should == [:FIRST, :SECOND]
12
+ end
13
+
14
+ it "should output tokens one by one" do
15
+ test = 'qweASDzxc'
16
+ l = testLexer.new(test).to_enum
17
+
18
+ tok = l.next
19
+ tok.type.should == :FIRST
20
+ tok.value.should == 'qwe'
21
+
22
+ tok = l.next
23
+ tok.type.should == :SECOND
24
+ tok.value.should == 'ASD'
25
+
26
+ tok = l.next
27
+ tok.type.should == :FIRST
28
+ tok.value.should == 'zxc'
29
+
30
+ expect { l.next } .to raise_error(StopIteration)
31
+ end
32
+ end
33
+
34
+ context "Literals Lexer" do
35
+ testLexer = Class.new(Rly::Lex) do
36
+ literals "+-*/"
37
+ end
38
+
39
+ it "should output literal tokens" do
40
+ test = '++--'
41
+ l = testLexer.new(test).to_enum
42
+
43
+ l.next.value.should == '+'
44
+ l.next.value.should == '+'
45
+ l.next.value.should == '-'
46
+ l.next.value.should == '-'
47
+ end
48
+ end
49
+
50
+ context "Ignores Lexer" do
51
+ testLexer = Class.new(Rly::Lex) do
52
+ ignore " \t"
53
+ end
54
+
55
+ it "should honour ignores list" do
56
+ test = " \t\t \t \t"
57
+ l = testLexer.new(test).to_enum
58
+
59
+ expect { l.next } .to raise_error(StopIteration)
60
+ end
61
+ end
62
+
63
+ context "Block-based Token Lexer" do
64
+ testLexer = Class.new(Rly::Lex) do
65
+ token :TEST, /\d+/ do |t|
66
+ t.value = t.value.to_i
67
+ t
68
+ end
69
+ end
70
+
71
+ it "calls a block to further process a token" do
72
+ test = "42"
73
+ l = testLexer.new(test).to_enum
74
+
75
+ l.next.value == 42
76
+ end
77
+ end
78
+
79
+ context "Non-outputtable tokens Lexer" do
80
+ testLexer = Class.new(Rly::Lex) do
81
+ token /\n+/ do |t| t.lexer.lineno = t.value.count("\n"); t end
82
+ end
83
+
84
+ it "process but don't output tokens without a name" do
85
+ test = "\n\n\n"
86
+ l = testLexer.new(test)
87
+
88
+ expect { l.to_enum.next } .to raise_error(StopIteration)
89
+
90
+ l.lineno.should == 3
91
+ end
92
+ end
93
+
94
+ context "Error handling" do
95
+ it "raises an error, if there are no suitable tokens" do
96
+ testLexer = Class.new(Rly::Lex) do
97
+ token :NUM, /\d+/
98
+ end
99
+ l = testLexer.new("test")
100
+
101
+ expect { l.to_enum.next } .to raise_error(Rly::LexError)
102
+ end
103
+
104
+ it "raises an error, if there is no possible tokens defined" do
105
+ testLexer = Class.new(Rly::Lex) do ; end
106
+ l = testLexer.new("test")
107
+
108
+ expect { l.to_enum.next } .to raise_error(Rly::LexError)
109
+ end
110
+
111
+ it "calls an error function if it is available, which returns a fixed token" do
112
+ testLexer = Class.new(Rly::Lex) do
113
+ token :NUM, /\d+/
114
+ on_error do |t|
115
+ t.value = "BAD #{t.value}"
116
+ t.lexer.pos += 1
117
+ t
118
+ end
119
+ end
120
+ l = testLexer.new("test")
121
+
122
+ tok = l.to_enum.next
123
+ tok.value.should == "BAD t"
124
+ tok.type.should == :error
125
+
126
+ tok = l.to_enum.next
127
+ tok.value.should == "BAD e"
128
+ tok.type.should == :error
129
+ end
130
+
131
+ it "calls an error function if it is available, which can skip a token" do
132
+ testLexer = Class.new(Rly::Lex) do
133
+ token :NUM, /\d+/
134
+ on_error do |t|
135
+ t.lexer.pos += 1
136
+ nil
137
+ end
138
+ end
139
+ l = testLexer.new("test1")
140
+
141
+ l.to_enum.next.value.should == '1'
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,14 @@
1
+ require "rubygems"
2
+ require "bundler/setup"
3
+
4
+ RSpec.configure do |config|
5
+ config.treat_symbols_as_metadata_keys_with_true_values = true
6
+ config.run_all_when_everything_filtered = true
7
+ config.filter_run :focus
8
+
9
+ # Run specs in random order to surface order dependencies. If you find an
10
+ # order dependency and want to debug it, you can fix the order by providing
11
+ # the seed, which is printed after each run.
12
+ # --seed 1234
13
+ config.order = 'random'
14
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rly
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Vladimir Pouzanov
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-11-09 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: A simple ruby implementation of lex and yacc, based on Python's ply
31
+ email:
32
+ - farcaller@gmail.com
33
+ executables: []
34
+ extensions: []
35
+ extra_rdoc_files: []
36
+ files:
37
+ - .gitignore
38
+ - .rspec
39
+ - .travis.yml
40
+ - Gemfile
41
+ - LICENSE.txt
42
+ - README.md
43
+ - Rakefile
44
+ - lib/rly.rb
45
+ - lib/rly/lex.rb
46
+ - lib/rly/lex_token.rb
47
+ - lib/rly/version.rb
48
+ - rly.gemspec
49
+ - spec/lex/lexer_spec.rb
50
+ - spec/spec_helper.rb
51
+ homepage: ''
52
+ licenses: []
53
+ post_install_message:
54
+ rdoc_options: []
55
+ require_paths:
56
+ - lib
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ none: false
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ requirements: []
70
+ rubyforge_project:
71
+ rubygems_version: 1.8.21
72
+ signing_key:
73
+ specification_version: 3
74
+ summary: A simple ruby implementation of lex and yacc, based on Python's ply
75
+ test_files:
76
+ - spec/lex/lexer_spec.rb
77
+ - spec/spec_helper.rb
78
+ has_rdoc: