RubyGems - rltk - Versions diffs - 1.1.0 → 1.2.0 - Mend

rltk 1.1.0 → 1.2.0

Files changed (9) hide show

data/README CHANGED Viewed

@@ -132,6 +132,10 @@ A RLTK::Lexer may be told to select either the first substring that is found to
 		...
 	end
+=== Match Data
+Because it isn't RLTK's job to tell you how to write lexers and parsers, the MatchData object from a pattern match is available inside the Lexer::Environment object via the +match+ accessor.
 == Parsers
 To create a parser using RLTK simply subclass RLTK::Parser, define the productions of the grammar you wish to parse, and call +finalize+.  During finalization RLTK will build an LALR(1) parsing table, which may contain conflicts that can't be resolved with LALR(1) lookahead sets or precedence/associativity information.  Traditionally, when parser generators such as *YACC* encounter conflicts during parsing table generation they will resolve shift/reduce conflicts in favor of shifts and reduce/reduce conflicts in favor of the production that was defined first.  This means that the generated parsers can't handle ambiguous grammars.
@@ -222,7 +226,7 @@ The parsing environment is the context in which the proc objects associated with
 === Instantiating Parsers
-In addition to using the RLTK::Parser::parse class method you may also instantiate parser objects.  The only difference then is that the parsing environment used between subsequent calls to +object.lex+ is the same object, and therefor allows you to keep persistent state.
+In addition to using the RLTK::Parser::parse class method you may also instantiate parser objects.  The only difference then is that the parsing environment used between subsequent calls to +object.parse+ is the same object, and therefor allows you to keep persistent state.
 === Finalization Options
@@ -259,7 +263,7 @@ Calls to RLTK::Parser::ParserCore.parse may raise one of four exceptions:
 === Error Productions
-<b>Warning, this is the lest tested feature of RLTK.  If you encounter any problems while using it, please let me know so I can fix any bugs as soon as possible</b>
+<b>Warning: this is the lest tested feature of RLTK.  If you encounter any problems while using it, please let me know so I can fix any bugs as soon as possible.</b>
 When an RLTK parser encounters a token for which there are no more valid tokens (and it is on the last parse stack / possible parse-tree path) it will enter error handling mode.  In this mode the parser pops states and input off of the parse stack (the parser is a pushdown automaton after all) until it finds a state that has a shift action for the +ERROR+ terminal.  A dummy +ERROR+ terminal is then placed onto the parse stack and the shift action is taken.  This error token will have the position information of the token that caused the parser to enter error handling mode.

data/Rakefile CHANGED Viewed

@@ -8,60 +8,40 @@
 ##############
 require 'rake/testtask'
-require 'rubygems/package_task'
-require 'rdoc/task'
+require 'bundler'
-RDoc::Task.new do |t|
-	t.title		= 'The Ruby Language Toolkit'
-	t.main		= 'README'
-	t.rdoc_dir	= 'doc'
+begin
+	require 'rdoc/task'
-	t.rdoc_files.include('README', 'lib/*.rb', 'lib/rltk/*.rb', 'lib/rltk/**/*.rb')
-end
-#Rake::TestTask.new do |t|
-#	t.libs << 'test'
-#	t.test_files = FileList['test/ts_rltk.rb']
-#end
+	RDoc::Task.new do |t|
+		t.title		= 'The Ruby Language Toolkit'
+		t.main		= 'README'
+		t.rdoc_dir	= 'doc'
+		t.rdoc_files.include('README', 'lib/*.rb', 'lib/rltk/*.rb', 'lib/rltk/**/*.rb')
+	end
-# This workaround is here because the Rake::DSL module gets auto-loaded into
-# the Object class, and therefor any object that defines conflicting methods
-# get over-ridden.
-task :test do
-	exec "ruby -C \"test\" -e \"require 'ts_rltk.rb'\""
+rescue LoadError
+	warn 'RDoc is not installed.'
 end
-def spec
-	Gem::Specification.new do |s|
-		s.platform = Gem::Platform::RUBY
-		s.name		= 'rltk'
-		s.version		= '1.1.0'
-		s.summary		= 'The Ruby Language Toolkit'
-		s.description	=
-			'The Ruby Language Toolkit provides classes for creating' +
-			'context-free grammars, lexers, parsers, and abstract syntax trees.'
-		s.files = [
-				'LICENSE',
-				'AUTHORS',
-				'README',
-				'Rakefile',
-				] +
-				Dir.glob('lib/rltk/**/*.rb')
-		s.require_path	= 'lib'
-		s.author		= 'Chris Wailes'
-		s.email		= 'chris.wailes@gmail.com'
-		s.homepage	= 'http://github.com/chriswailes/RLTK'
-		s.license		= 'University of Illinois/NCSA Open Source License'
-		s.test_files	= Dir.glob('test/tc_*.rb')
+begin
+	require 'rcov/rcovtask'
+	Rcov::RcovTask.new do |t|
+		t.libs		<< 'test'
+		t.rcov_opts	<< '--exclude gems,ruby'
+		t.test_files	= FileList['test/tc_*.rb']
 	end
+rescue LoadError
+	warn 'Rcov not installed.'
 end
-Gem::PackageTask.new(spec) do |t|
-	t.need_tar = true
+Rake::TestTask.new do |t|
+	t.libs << 'test'
+	t.loader = :testrb
+	t.test_files = FileList['test/ts_rltk.rb']
 end
+Bundler::GemHelper.install_tasks

data/lib/rltk/ast.rb CHANGED Viewed

@@ -90,26 +90,50 @@ module RLTK # :nodoc:
 					end
 					if type.is_a?(Class)
-						define_method((name.to_s + '=').to_sym) do |value|
-							if value.is_a?(type) or value == nil
-								self.instance_variable_set(ivar_name, value)
+						if set_parent
+							define_method((name.to_s + '=').to_sym) do |value|
+								if value.is_a?(type) or value == nil
+									self.instance_variable_set(ivar_name, value)
-								value.parent = self if value and set_parent
-							else
-								raise TypeMismatch.new(type, value.class)
+									value.parent = self if value
+								else
+									raise TypeMismatch.new(type, value.class)
+								end
+							end
+						else
+							define_method((name.to_s + '=').to_sym) do |value|
+								if value.is_a?(type) or value == nil
+									self.instance_variable_set(ivar_name, value)
+								else
+									raise TypeMismatch.new(type, value.class)
+								end
 							end
 						end
 					else
 						type = type.first
-						define_method((name.to_s + '=').to_sym) do |value|
-							if value.inject(true) { |m, o| m and o.is_a?(type) }
-								self.instance_variable_set(ivar_name, value)
+						if set_parent
+							define_method((name.to_s + '=').to_sym) do |value|
+								if value.inject(true) { |m, o| m and o.is_a?(type) }
+									self.instance_variable_set(ivar_name, value)
-								value.each { |c| c.parent = self } if set_parent
-							else
-								raise TypeMismatch.new(type, value.class)
+									value.each { |c| c.parent = self }
+								else
+									raise TypeMismatch.new(type, value.class)
+								end
+							end
+						else
+							define_method((name.to_s + '=').to_sym) do |value|
+								if value.inject(true) { |m, o| m and o.is_a?(type) }
+									self.instance_variable_set(ivar_name, value)
+								else
+									raise TypeMismatch.new(type, value.class)
+								end
 							end
 						end

data/lib/rltk/cfg.rb CHANGED Viewed

@@ -1,11 +1,11 @@
+# encoding: utf-8
 # Author:		Chris Wailes <chris.wailes@gmail.com>
 # Project: 	Ruby Language Toolkit
 # Date:		2011/03/24
 # Description:	This file contains the a class representing a context-free
 #			grammar.
-# encoding: utf-8
 ############
 # Requires #
 ############

data/lib/rltk/lexer.rb CHANGED Viewed

@@ -113,106 +113,108 @@ module RLTK # :nodoc:
 			# return the array of tokens generated by the lexer with a token
 			# of type EOS (End of Stream) appended to the end.
 			def lex(string, env, file_name = nil)
-					# Offset from start of stream.
-					stream_offset = 0
+				# Offset from start of stream.
+				stream_offset = 0
+				# Offset from the start of the line.
+				line_offset = 0
+				line_number = 1
-					# Offset from the start of the line.
-					line_offset = 0
-					line_number = 1
-					# Empty token list.
-					tokens = Array.new
-					# The scanner.
-					scanner = StringScanner.new(string)
+				# Empty token list.
+				tokens = Array.new
+				# The scanner.
+				scanner = StringScanner.new(string)
+				# Start scanning the input string.
+				until scanner.eos?
+					match = nil
-					# Start scanning the input string.
-					until scanner.eos?
-						match = nil
-						# If the match_type is set to :longest all of the
-						# rules for the current state need to be scanned
-						# and the longest match returned.  If the
-						# match_type is :first, we only need to scan until
-						# we find a match.
-						@rules[env.state].each do |rule|
-							if (rule.flags - env.flags).empty?
-								if txt = scanner.check(rule.pattern)
-									if not match or match.first.length < txt.length
-										match = [txt, rule]
-										break if @match_type == :first
-									end
+					# If the match_type is set to :longest all of the
+					# rules for the current state need to be scanned
+					# and the longest match returned.  If the
+					# match_type is :first, we only need to scan until
+					# we find a match.
+					@rules[env.state].each do |rule|
+						if (rule.flags - env.flags).empty?
+							if txt = scanner.check(rule.pattern)
+								if not match or match.first.length < txt.length
+									match = [txt, rule]
+									break if @match_type == :first
 								end
 							end
 						end
+					end
+					if match
+						rule = match.last
-						if match
-							rule = match.last
-							txt = scanner.scan(rule.pattern)
-							type, value = env.instance_exec(txt, &rule.action)
-							if type
-								pos = StreamPosition.new(stream_offset, line_number, line_offset, txt.length, file_name)
-								tokens << Token.new(type, value, pos)
-							end
-							# Advance our stat counters.
-							stream_offset += txt.length
-							if (newlines = txt.count("\n")) > 0
-								line_number += newlines
-								line_offset  = 0
-							else
-								line_offset += txt.length()
-							end
+						txt = scanner.scan(rule.pattern)
+						type, value = env.rule_exec(rule.pattern.match(txt), txt, &rule.action)
+						if type
+							pos = StreamPosition.new(stream_offset, line_number, line_offset, txt.length, file_name)
+							tokens << Token.new(type, value, pos)
+						end
+						# Advance our stat counters.
+						stream_offset += txt.length
+						if (newlines = txt.count("\n")) > 0
+							line_number += newlines
+							line_offset  = 0
 						else
-							error = LexingError.new(stream_offset, line_number, line_offset, scanner.post_match)
-							raise(error, 'Unable to match string with any of the given rules')
+							line_offset += txt.length()
 						end
+					else
+						error = LexingError.new(stream_offset, line_number, line_offset, scanner.post_match)
+						raise(error, 'Unable to match string with any of the given rules')
 					end
-					return tokens << Token.new(:EOS)
-				end
-				# A wrapper function that calls ParserCore.lex on the
-				# contents of a file.
-				def lex_file(file_name, evn)
-					File.open(file_name, 'r') { |f| lex(f.read, env, file_name) }
 				end
-				# Used to tell a lexer to use the first match found instead
-				# of the longest match found.
-				def match_first
-					@match_type = :first
-				end
+				return tokens << Token.new(:EOS)
+			end
+			# A wrapper function that calls ParserCore.lex on the
+			# contents of a file.
+			def lex_file(file_name, env)
+				File.open(file_name, 'r') { |f| lex(f.read, env, file_name) }
+			end
+			# Used to tell a lexer to use the first match found instead
+			# of the longest match found.
+			def match_first
+				@match_type = :first
+			end
+			# This method is used to define a new lexing rule.  The
+			# first argument is the regular expression used to match
+			# substrings of the input.  The second argument is the state
+			# to which the rule belongs.  Flags that need to be set for
+			# the rule to be considered are specified by the third
+			# argument.  The last argument is a block that returns a
+			# type and value to be used in constructing a Token. If no
+			# block is specified the matched substring will be
+			# discarded and lexing will continue.
+			def rule(pattern, state = :default, flags = [], &action)
+				# If no action is given we will set it to an empty
+				# action.
+				action ||= Proc.new() {}
-				# This method is used to define a new lexing rule.  The
-				# first argument is the regular expression used to match
-				# substrings of the input.  The second argument is the state
-				# to which the rule belongs.  Flags that need to be set for
-				# the rule to be considered are specified by the third
-				# argument.  The last argument is a block that returns a
-				# type and value to be used in constructing a Token. If no
-				# block is specified the matched substring will be
-				# discarded and lexing will continue.
-				def rule(pattern, state = :default, flags = [], &action)
-					# If no action is given we will set it to an empty
-					# action.
-					action ||= Proc.new() {}
-					r = Rule.new(pattern, action, state, flags)
-					if state == :ALL then @rules.each_key { |k| @rules[k] << r } else @rules[state] << r end
-				end
+				pattern = Regexp.new(pattern) if pattern.is_a?(String)
-				alias :r :rule
+				r = Rule.new(pattern, action, state, flags)
-				# Changes the starting state of the lexer.
-				def start(state)
-					@start_state = state
-				end
+				if state == :ALL then @rules.each_key { |k| @rules[k] << r } else @rules[state] << r end
+			end
+			alias :r :rule
+			# Changes the starting state of the lexer.
+			def start(state)
+				@start_state = state
+			end
 		end
 		# All actions passed to LexerCore.rule are evaluated inside an
@@ -224,12 +226,24 @@ module RLTK # :nodoc:
 			# The flags currently set in this environment.
 			attr_reader :flags
+			# The Match object generated by a rule's regular expression.
+			attr_accessor :match
 			# Instantiates a new Environment object.
-			def initialize(start_state)
+			def initialize(start_state, match = nil)
 				@state	= [start_state]
+				@match	= match
 				@flags	= Array.new
 			end
+			# This function will instance_exec a block for a rule after
+			# setting the match value.
+			def rule_exec(match, txt, &block)
+				self.match = match
+				self.instance_exec(txt, &block)
+			end
 			# Pops a state from the state stack.
 			def pop_state
 				@state.pop

data/lib/rltk/parser.rb CHANGED Viewed

@@ -1010,7 +1010,7 @@ module RLTK # :nodoc:
 							# Build the lookahead set.
 							@states.each do |state1|
 								if self.check_reachability(state1, state0, production.rhs)
-									lookahead |= (var = self.grammar_prime.follow_set("#{state1.id}_#{production.lhs}".to_sym))
+									lookahead |= self.grammar_prime.follow_set("#{state1.id}_#{production.lhs}".to_sym)
 								end
 							end

data/lib/rltk/version.rb ADDED Viewed

@@ -0,0 +1,8 @@
+# Author:		Chris Wailes <chris.wailes@gmail.com>
+# Project: 	Ruby Language Toolkit
+# Date:		2012/03/08
+# Description:	This file specifies the version number of RLTK.
+module RLTK
+	VERSION = "1.2.0"
+end

data/test/tc_lexer.rb CHANGED Viewed

@@ -70,6 +70,10 @@ class StateLexer < RLTK::Lexer
 	rule(/./,    :comment)
 end
+class MatchDataLexer < RLTK::Lexer
+	rule(/a(b*)(c+)/) { [:FOO, match[1,2]] }
+end
 class LexerTester < Test::Unit::TestCase
 	def test_calc
 		expected =
@@ -200,6 +204,13 @@ class LexerTester < Test::Unit::TestCase
 		assert_equal(expected, actual)
 	end
+	def test_match_data
+		expected	= [RLTK::Token.new(:FOO, ['', 'ccc']), RLTK::Token.new(:EOS)]
+		actual	= MatchDataLexer.lex('accc')
+		assert_equal(expected, actual)
+	end
 	def test_state
 		expected =
 			[

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: rltk
 version: !ruby/object:Gem::Version
-  hash: 19
+  hash: 31
   prerelease:
   segments:
   - 1
-  - 1
+  - 2
   - 0
-  version: 1.1.0
+  version: 1.2.0
 platform: ruby
 authors:
 - Chris Wailes
@@ -15,11 +15,66 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-11-27 00:00:00 Z
-dependencies: []
-description: The Ruby Language Toolkit provides classes for creatingcontext-free grammars, lexers, parsers, and abstract syntax trees.
-email: chris.wailes@gmail.com
+date: 2012-03-08 00:00:00 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: builder
+  prerelease: false
+  requirement: &id001 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        hash: 3
+        segments:
+        - 0
+        version: "0"
+  type: :development
+  version_requirements: *id001
+- !ruby/object:Gem::Dependency
+  name: rake
+  prerelease: false
+  requirement: &id002 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        hash: 3
+        segments:
+        - 0
+        version: "0"
+  type: :development
+  version_requirements: *id002
+- !ruby/object:Gem::Dependency
+  name: rcov
+  prerelease: false
+  requirement: &id003 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        hash: 3
+        segments:
+        - 0
+        version: "0"
+  type: :development
+  version_requirements: *id003
+- !ruby/object:Gem::Dependency
+  name: rdoc
+  prerelease: false
+  requirement: &id004 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        hash: 3
+        segments:
+        - 0
+        version: "0"
+  type: :development
+  version_requirements: *id004
+description: The Ruby Language Toolkit provides classes for creating context-free grammars, lexers, parsers, and abstract syntax trees.
+email: chris.wailes+rltk@gmail.com
 executables: []
 extensions: []
@@ -41,6 +96,7 @@ files:
 - lib/rltk/parsers/postfix_calc.rb
 - lib/rltk/parsers/infix_calc.rb
 - lib/rltk/parsers/prefix_calc.rb
+- lib/rltk/version.rb
 - test/tc_ast.rb
 - test/tc_token.rb
 - test/tc_cfg.rb
@@ -75,7 +131,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
 requirements: []
 rubyforge_project:
-rubygems_version: 1.8.10
+rubygems_version: 1.8.11
 signing_key:
 specification_version: 3
 summary: The Ruby Language Toolkit