RubyGems - antelope - Versions diffs - 0.2.4 → 0.3.0 - Mend

antelope 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

checksums.yaml +4 -4
data/.rubocop.yml +11 -0
data/bin/antelope +3 -3
data/examples/{example.err → example.ace.err} +9 -9
data/examples/{example.inf → example.ace.inf} +57 -9
data/examples/example.ate +70 -0
data/examples/example.ate.err +192 -0
data/examples/example.ate.inf +432 -0
data/lib/antelope/ace/compiler.rb +4 -4
data/lib/antelope/ace/errors.rb +0 -18
data/lib/antelope/ace.rb +6 -12
data/lib/antelope/cli.rb +1 -1
data/lib/antelope/dsl/compiler.rb +117 -0
data/lib/antelope/dsl/contexts/base.rb +29 -0
data/lib/antelope/dsl/contexts/main.rb +63 -0
data/lib/antelope/dsl/contexts/match.rb +24 -0
data/lib/antelope/dsl/contexts/precedence.rb +20 -0
data/lib/antelope/dsl/contexts/production.rb +24 -0
data/lib/antelope/dsl/contexts/terminal.rb +28 -0
data/lib/antelope/dsl/contexts.rb +16 -0
data/lib/antelope/dsl.rb +9 -0
data/lib/antelope/errors.rb +18 -1
data/lib/antelope/generation/constructor/first.rb +10 -12
data/lib/antelope/generation/constructor/follow.rb +6 -6
data/lib/antelope/generation/constructor/nullable.rb +6 -6
data/lib/antelope/generation/constructor.rb +4 -4
data/lib/antelope/generation/recognizer/rule.rb +17 -17
data/lib/antelope/generation/recognizer/state.rb +9 -10
data/lib/antelope/generation/recognizer.rb +8 -11
data/lib/antelope/generation/tableizer.rb +2 -2
data/lib/antelope/generator/base.rb +7 -7
data/lib/antelope/generator/ruby.rb +1 -1
data/lib/antelope/grammar/generation.rb +77 -0
data/lib/antelope/grammar/loading.rb +84 -0
data/lib/antelope/{ace → grammar}/precedence.rb +2 -4
data/lib/antelope/grammar/precedences.rb +64 -0
data/lib/antelope/{ace → grammar}/production.rb +11 -12
data/lib/antelope/grammar/productions.rb +154 -0
data/lib/antelope/grammar/symbols.rb +64 -0
data/lib/antelope/{ace → grammar}/token/epsilon.rb +1 -2
data/lib/antelope/{ace → grammar}/token/error.rb +1 -3
data/lib/antelope/{ace → grammar}/token/nonterminal.rb +1 -3
data/lib/antelope/{ace → grammar}/token/terminal.rb +1 -3
data/lib/antelope/{ace → grammar}/token.rb +12 -15
data/lib/antelope/grammar.rb +68 -0
data/lib/antelope/version.rb +1 -1
data/lib/antelope.rb +12 -6
data/spec/antelope/ace/compiler_spec.rb +6 -6
data/spec/antelope/ace/scanner_spec.rb +7 -7
data/spec/antelope/generation/constructor_spec.rb +131 -0
data/spec/support/grammar_helper.rb +2 -3
metadata +32 -19
data/lib/antelope/ace/grammar/generation.rb +0 -80
data/lib/antelope/ace/grammar/loading.rb +0 -53
data/lib/antelope/ace/grammar/precedences.rb +0 -68
data/lib/antelope/ace/grammar/productions.rb +0 -156
data/lib/antelope/ace/grammar/symbols.rb +0 -66
data/lib/antelope/ace/grammar.rb +0 -69
data/spec/antelope/constructor_spec.rb +0 -133

data/lib/antelope/grammar/generation.rb ADDED Viewed

@@ -0,0 +1,77 @@
+# encoding: utf-8
+module Antelope
+  class Grammar
+    # The default modifiers for generation.  It's not really
+    # recommended to (heh) modify this; however, adding your own
+    # modifier is always acceptable.
+    DEFAULT_MODIFIERS = [
+      [:recognizer,  Generation::Recognizer ],
+      [:constructor, Generation::Constructor],
+      [:tableizer,   Generation::Tableizer  ]
+    ].freeze
+    # Handles the generation of output for the grammar.
+    module Generation
+      # Generates the output.  First, it runs through every given
+      # modifier, and instintates it.  It then calls every modifier,
+      # turns it into a hash, and passes that hash to each of the
+      # given generators.
+      #
+      # @param options [Hash] options.
+      # @param generators [Array<Generator>] a list of generators
+      #   to use in generation.
+      # @param modifiers [Array<Array<(Symbol, #call)>>] a list of
+      #   modifiers to apply to the grammar.
+      # @return [void]
+      def generate(options    = {},
+                   generators = :guess,
+                   modifiers  = DEFAULT_MODIFIERS)
+        pp self
+        mods = modifiers.map(&:last)
+               .map  { |x| x.new(self) }
+        mods.each do |mod|
+          puts "Running mod #{mod.class}..." if options[:verbose]
+          mod.call
+        end
+        hash = Hash[modifiers.map(&:first).zip(mods)]
+        # This is when we'd generate
+        find_generators(generators, options).each do |gen|
+          puts "Running generator #{gen}..." if options[:verbose]
+          gen.new(self, hash).generate
+        end
+      end
+      private
+      # Find the corresponding generators.  If the first argument
+      # isn't `:guess`, it returns the first argument.  Otherwise,
+      # it tries to "intelligently guess" by checking the type from
+      # the options _or_ the compiler.  If it is unable to find the
+      # type, it will raise a {NoTypeError}.
+      #
+      # @raise [NoTypeError] if it could not determine the type of
+      #   the generator.
+      # @param generators [Symbol, Array<Generator>]
+      # @param options [Hash]
+      # @return [Array<Generator>]
+      def find_generators(generators, options)
+        return generators unless generators == :guess
+        generators = [Generator::Output]
+        # command line precedence...
+        type = options[:type] || options['type'] ||
+               compiler.options.fetch(:type)
+        generators << Generator.generators.fetch(type.to_s)
+        generators
+      rescue KeyError
+        raise NoTypeError, "Undefined type #{type}"
+      end
+    end
+  end
+end

data/lib/antelope/grammar/loading.rb ADDED Viewed

@@ -0,0 +1,84 @@
+# encoding: utf-8
+module Antelope
+  class Grammar
+    # Handles loading to and from files and strings.
+    module Loading
+      # Defines class methods on the grammar.
+      module ClassMethods
+        # Loads a grammar from a file.  Assumes the output
+        # directory and name from the file name.
+        #
+        # @param file_name [String] the file name.
+        # @return [Grammar]
+        # @see #from_string
+        def from_file(file_name)
+          ext = File.extname(file_name)
+          case ext
+          when ".rb", ".ate"
+            from_dsl_file(file_name)
+          when ".ace"
+            from_ace_file(file_name)
+          else
+            raise ArgumentError, "Unexpected file extension #{ext},"\
+              " expected one of .rb, .ate, or .ace"
+          end
+        end
+        def from_dsl_file(file_name)
+          body   = File.read(file_name)
+          output = File.dirname(file_name)
+          from_dsl_string(file_name, output, body)
+        end
+        def from_ace_file(file_name)
+          body   = File.read(file_name)
+          output = File.dirname(file_name)
+          name   = File.basename(file_name)
+          from_ace_string(name, output, body)
+        end
+        # Loads a grammar from a string.  First runs the scanner and
+        # compiler over the string, and then instantiates a new
+        # Grammar from the resultant.
+        #
+        # @param file [String] the path of the grammar.  This is
+        #   used for eval.
+        # @param output [String] the output directory.
+        # @param string [String] the grammar body.
+        # @return [Grammar]
+        # @see DSL::Compiler
+        def from_dsl_string(file, output, string)
+          eval(string, TOPLEVEL_BINDING, file, 0)
+          grammar = Antelope.grammar
+          compiler = DSL::Compiler.compile(grammar[1], &grammar[2])
+          new(File.basename(file), output, compiler)
+        end
+        # Loads a grammar from a string.  First runs the scanner and
+        # compiler over the string, and then instantiates a new
+        # Grammar from the resultant.
+        #
+        # @param name [String] the name of the grammar.
+        # @param output [String] the output directory.
+        # @param string [String] the grammar body.
+        # @return [Grammar]
+        # @see Ace::Scanner
+        # @see Ace::Compiler
+        def from_ace_string(name, output, string)
+          scanner  = Ace::Scanner.scan(string, name)
+          compiler = Ace::Compiler.compile(scanner)
+          new(name, output, compiler)
+        end
+      end
+      # Extends the grammar with the class methods.
+      #
+      # @param receiver [Grammar]
+      # @see ClassMethods
+      def self.included(receiver)
+        receiver.extend ClassMethods
+      end
+    end
+  end
+end

data/lib/antelope/{ace → grammar}/precedence.rb RENAMED Viewed

@@ -1,12 +1,10 @@
 # encoding: utf-8
 module Antelope
-  module Ace
+  class Grammar
     # Defines a precedence.  A precedence has a type, tokens, and a
     # level.
-    class Precedence < Struct.new(:type, :tokens, :level)
+    Precedence = Struct.new(:type, :tokens, :level) do
       # @!attribute [rw] type
       #   The type of precedence level.  This should be one of
       #   `:left`, `:right`, or `:nonassoc`.

data/lib/antelope/grammar/precedences.rb ADDED Viewed

@@ -0,0 +1,64 @@
+# encoding: utf-8
+require "set"
+module Antelope
+  class Grammar
+    # Manages precedence for tokens.
+    module Precedences
+      # Accesses the generated precedence list.  Lazily generates
+      # the precedence rules on the go, and then caches it.
+      #
+      # @return [Array<Ace::Precedence>]
+      def precedence
+        @_precedence ||= generate_precedence
+      end
+      # Finds a precedence rule for a given token.  If no direct
+      # rule is defined for that token, it will check for a rule
+      # defined for the special symbol, `:_`.  By default, there
+      # is always a rule defined for `:_`.
+      #
+      # @param token [Ace::Token, Symbol]
+      # @return [Ace::Precedence]
+      def precedence_for(token)
+        token = token.name if token.is_a?(Token)
+        prec = precedence.
+          detect { |pr| pr.tokens.include?(token) } ||
+        precedence.
+          detect { |pr| pr.tokens.include?(:_) }
+        prec
+      end
+      private
+      # Generates the precedence rules.  Loops through the compiler
+      # given precedence settings, and then adds two default
+      # precedence rules; one for `:$` (level 0, nonassoc), and one
+      # for `:_` (level 1, nonassoc).
+      #
+      # @return [Array<Ace::Precedence>]
+      def generate_precedence
+        size = @compiler.options[:prec].size + 1
+        index = 0
+        precedence = []
+        while index < size - 1
+          prec = @compiler.options[:prec][index]
+          precedence <<
+            Precedence.new(prec[0], prec[1..-1].to_set,
+            size - index)
+          index += 1
+        end
+        precedence <<
+          Precedence.new(:nonassoc, [:$end].to_set, 0) <<
+          Precedence.new(:nonassoc, [:_].to_set, 1)
+        precedence.sort_by(&:level).reverse
+      end
+    end
+  end
+end

data/lib/antelope/{ace → grammar}/production.rb RENAMED Viewed

@@ -1,10 +1,9 @@
 # encoding: utf-8
 module Antelope
-  module Ace
+  class Grammar
     # Defines a production.
-    class Production < Struct.new(:label, :items, :block, :prec, :id)
+    Production = Struct.new(:label, :items, :block, :prec, :id) do
       # @!attribute [rw] label
       #   The label (or left-hand side) of the production.  This
       #   should be a nonterminal.
@@ -35,11 +34,11 @@ module Antelope
       #
       # @param hash [Hash<(Symbol, Object)>]
       def self.from_hash(hash)
-        new(hash[:label] || hash["label"],
-            hash[:items] || hash["items"],
-            hash[:block] || hash["block"],
-            hash[:prec]  || hash["prec"],
-            hash[:id]    || hash["id"])
+        new(hash[:label] || hash['label'],
+            hash[:items] || hash['items'],
+            hash[:block] || hash['block'],
+            hash[:prec]  || hash['prec'],
+            hash[:id]    || hash['id'])
       end
       # Create a new version of the production with duplicated values.
@@ -47,10 +46,10 @@ module Antelope
       # @return [Production]
       def clone
         Production.new(label.dup,
-            items.map(&:dup),
-            block.dup,
-            prec.dup,
-            id)
+          items.map(&:dup),
+          block.dup,
+          prec.dup,
+          id)
       end
     end
   end

data/lib/antelope/grammar/productions.rb ADDED Viewed

@@ -0,0 +1,154 @@
+# encoding: utf-8
+module Antelope
+  class Grammar
+    # Manages the productions of the grammar.
+    module Productions
+      # Returns a hash of all of the productions.  The result is
+      # cached.
+      #
+      # @return [Hash<(Symbol, Array<Production>)>]
+      def productions
+        @_productions || generate_productions
+      end
+      # Returns all productions for all nonterminals, sorted by id.
+      #
+      # @return [Array<Production>]
+      def all_productions
+        productions.values.flatten.sort_by(&:id)
+      end
+      # Finds a token based on its corresponding symbol.  First
+      # checks the productions, to see if it's a nonterminal; then,
+      # tries to find it in the terminals; otherwise, if the symbol
+      # is `error`, it returns a {Token::Error}; if the symbol is
+      # `nothing` or `ε`, it returns a {Token::Epsilon}; if it's
+      # none of those, it raises an {UndefinedTokenError}.
+      #
+      # @raise [UndefinedTokenError] if the token doesn't exist.
+      # @param value [String, Symbol, #intern] the token's symbol to
+      #   check.
+      # @return [Token]
+      def find_token(value)
+        value = value.intern
+        if productions.key?(value)
+          typed_nonterminals.find { |term| term.name == value } ||
+            Token::Nonterminal.new(value)
+        elsif terminal = terminals.
+            find { |term| term.name == value }
+          terminal
+        elsif value == :$error || value == :error
+          Token::Error.new
+        elsif [:nothing, :ε, :"%empty"].include?(value)
+          Token::Epsilon.new
+        else
+          raise UndefinedTokenError, "Could not find a token " \
+            "named #{value.inspect}"
+        end
+      end
+      private
+      # Actually generates the productions.  Uses the rules from the
+      # compiler to construct the productions.  Makes two loops over
+      # the compiler's rules; the first to tell the grammar that the
+      # nonterminal does exist, and the second to actually construct
+      # the productions.  The first loop is for {#find_token},
+      # because otherwise it wouldn't be able to return a
+      # nonterminal properly.
+      #
+      # @return [Hash<(Symbol, Array<Production>)>]
+      def generate_productions
+        @_productions = {}
+        index = 0
+        rules = @compiler.rules.each do |rule|
+          productions[rule[:label]] = []
+        end
+        while index < rules.size
+          rule = rules[index]
+          productions[rule[:label]] <<
+            generate_production_for(rule, index)
+          index += 1
+        end
+        productions[:$start] = [default_production]
+        productions
+      end
+      # Generates a production for a given compiler rule.  Converts
+      # the tokens in the set to their {Token} counterparts,
+      # and then sets the precedence for the production.  If the
+      # precedence declaration from the compiler rule is empty,
+      # then it'll use the last terminal from the set to check for
+      # precedence; otherwise, it'll use the precedence declaration.
+      # This is to make sure that every production has a precedence
+      # declaration.
+      #
+      # @param rule [Hash] the compiler's rule.
+      # @param id [Numeric] the id for the production.
+      # @return [Production]
+      def generate_production_for(rule, id)
+        left  = Token::Nonterminal.new(rule[:label])
+        items = rule[:set].map { |_| find_token(_[0]) }
+        prec  = if rule[:prec].empty?
+          items.select(&:terminal?).first
+        else
+          rule[:prec].intern
+        end
+        prec  = precedence_for(prec)
+        left.type = type_for(rule[:label])
+        left.id = rule[:label_id]
+        rule[:set].each_with_index do |tok, i|
+          items[i] = items[i].dup
+          items[i].id = tok[1]
+        end
+        items.delete_if(&:epsilon?)
+        Production.new(left, items, rule[:block], prec, id + 1)
+      end
+      # Returns the defined type for the given token name.
+      # Uses the `%type` directive to infer the corresponding types.
+      #
+      # @param token [Symbol] the token to check for
+      #   types.
+      def type_for(token)
+        token = find_token(token) unless token.is_a?(Token)
+        case token
+        when Token::Nonterminal
+          token.type
+        when Token::Terminal
+          token.type
+        when Token::Epsilon
+          ""
+        when Token::Error
+          ""
+        end
+      end
+      # Creates the default production for the grammar.  The left
+      # hand side of the production is the `:$start` symbol, with
+      # the right hand side being the first rule's left-hand side
+      # and the terminal `$`.  This production is automagically
+      # given the last precedence, and an id of 0.
+      #
+      # @return [Production]
+      def default_production
+        Production.new(Token::Nonterminal.new(:$start), [
+            Token::Nonterminal.new(@compiler.rules.first[:label]),
+            Token::Terminal.new(:$end)
+          ], "", precedence.last, 0)
+      end
+    end
+  end
+end

data/lib/antelope/grammar/symbols.rb ADDED Viewed

@@ -0,0 +1,64 @@
+# encoding: utf-8
+module Antelope
+  class Grammar
+    # Manages a list of the symbols in the grammar.
+    module Symbols
+      # A list of all terminals in the grammar.  Checks the compiler
+      # options for terminals, and then returns an array of
+      # terminals.  Caches the result.
+      #
+      # @return [Array<Token::Terminal>]
+      def terminals
+        @_terminals ||= begin
+          @compiler.options.fetch(:terminals) { [] }.map do |v|
+            Token::Terminal.new(*v)
+          end
+        end
+      end
+      # A list of all nonterminals in the grammar.
+      #
+      # @return [Array<Symbol>]
+      # @see #productions
+      def nonterminals
+        @_nonterminals ||= productions.keys
+      end
+      # A list of all nonterminals, with types.
+      #
+      # @return [Array<Token::Nonterminal>>]
+      def typed_nonterminals
+        @_typed_nonterminals ||= begin
+          typed = []
+          compiler.options[:nonterminals].each do |data|
+            data[1].each do |nonterm|
+              typed << Token::Nonterminal.new(nonterm, data[0])
+            end
+          end
+          typed
+        end
+      end
+      # A list of all symbols in the grammar; includes both
+      # terminals and nonterminals.
+      #
+      # @return [Array<Token::Terminal, Symbol>]
+      # @see #terminals
+      # @see #nonterminals
+      def symbols
+        @_symbols ||= terminals + nonterminals
+      end
+      # Checks to see if the grammar uses the `error` terminal
+      # anywhere.
+      #
+      # @return [Boolean]
+      def contains_error_token?
+        all_productions.any? { |_| _.items.any?(&:error?) }
+      end
+    end
+  end
+end

data/lib/antelope/{ace → grammar}/token/epsilon.rb RENAMED Viewed

@@ -1,9 +1,8 @@
 # encoding: utf-8
 module Antelope
-  module Ace
+  class Grammar
     class Token
       # Defines an epsilon token.  An epsilon token represents
       # nothing.  This is used to say that a nonterminal can
       # reduce to nothing.

data/lib/antelope/{ace → grammar}/token/error.rb RENAMED Viewed

@@ -1,15 +1,13 @@
 # encoding: utf-8
 module Antelope
-  module Ace
+  class Grammar
     class Token
       # Defines an error token.  This may be used internally by the
       # parser when it enters panic mode; any tokens following this
       # are the synchronisation tokens.  This is considered a terminal
       # for the purposes of rule definitions.
       class Error < Terminal
         # Initialize the error token.  Technically takes no arguments.
         # Sets the name to be `:$error`.
         def initialize(*)

data/lib/antelope/{ace → grammar}/token/nonterminal.rb RENAMED Viewed

@@ -1,12 +1,10 @@
 # encoding: utf-8
 module Antelope
-  module Ace
+  class Grammar
     class Token
       # Defines a nonterminal token.
       class Nonterminal < Token
         # (see Token#nonterminal?)
         def nonterminal?
           true

data/lib/antelope/{ace → grammar}/token/terminal.rb RENAMED Viewed

@@ -1,12 +1,10 @@
 # encoding: utf-8
 module Antelope
-  module Ace
+  class Grammar
     class Token
       # Defines a terminal token.
       class Terminal < Token
         # (see Token#terminal?)
         def terminal?
           true

data/lib/antelope/{ace → grammar}/token.rb RENAMED Viewed

@@ -1,21 +1,18 @@
 # encoding: utf-8
-require "antelope/ace/token/nonterminal"
-require "antelope/ace/token/terminal"
-require "antelope/ace/token/epsilon"
-require "antelope/ace/token/error"
+require 'antelope/grammar/token/nonterminal'
+require 'antelope/grammar/token/terminal'
+require 'antelope/grammar/token/epsilon'
+require 'antelope/grammar/token/error'
 module Antelope
-  module Ace
+  class Grammar
     # Defines a token type for productions/rules.
     #
     # @abstract This class should be inherited to define a real token.
     #   A base class does not match any token; however, any token can
     #   match the base class.
     class Token
       # The name of the token.
       #
       # @return [Symbol]
@@ -138,16 +135,16 @@ module Antelope
       # @see #name
       def to_s
         buf = if @value
-          @value.inspect
-        else
-          @name.to_s
-        end
+                @value.inspect
+              else
+                @name.to_s
+              end
-        if from or to
-          buf << "("
+        if from || to
+          buf << '('
           buf << "#{from.id}" if from
           buf << ":#{to.id}"  if to
-          buf << ")"
+          buf << ')'
         end
         buf

data/lib/antelope/grammar.rb ADDED Viewed

@@ -0,0 +1,68 @@
+# encoding: utf-8
+require 'hashie'
+require 'antelope/grammar/symbols'
+require 'antelope/grammar/productions'
+require 'antelope/grammar/production'
+require 'antelope/grammar/precedences'
+require 'antelope/grammar/precedence'
+require 'antelope/grammar/loading'
+require 'antelope/grammar/generation'
+require 'antelope/grammar/token'
+module Antelope
+  # Defines a grammar from an Ace file.  This handles setting up
+  # productions, loading from files, symbols, precedence, and
+  # generation.
+  class Grammar
+    include Symbols
+    include Productions
+    include Precedences
+    include Loading
+    include Grammar::Generation
+    # Used by a generation class; this is all the generated states
+    # of the grammar.
+    #
+    # @return [Set<Generation::Recognizer::State>]
+    # @see Generation::Recognizer
+    attr_accessor :states
+    # The name of the grammar.  This is normally assumed from a file
+    # name.
+    #
+    # @return [String]
+    attr_accessor :name
+    # The output directory for the grammar.  This is normally the
+    # same directory as the Ace file.
+    #
+    # @return [Pathname]
+    attr_accessor :output
+    # The compiler for the Ace file.
+    #
+    # @return [Compiler]
+    attr_reader :compiler
+    # Initialize.
+    #
+    # @param name [String]
+    # @param output [String] the output directory.  Automagically
+    #   turned into a Pathname.
+    # @param compiler [Compiler]
+    def initialize(name, output, compiler)
+      @name     = name
+      @output   = Pathname.new(output)
+      @compiler = compiler
+    end
+    # Extra options from the compiler.  This can be used by
+    # generators for output information.
+    #
+    # @return [Hash]
+    def options
+      compiler.options[:extra]
+    end
+  end
+end

data/lib/antelope/version.rb CHANGED Viewed

@@ -2,5 +2,5 @@
 module Antelope
   # The current running version of antelope.
-  VERSION = "0.2.4".freeze
+  VERSION = '0.3.0'.freeze
 end