RubyGems - stamina - Versions diffs - 0.3.1 → 0.4.0 - Mend

stamina 0.3.1 → 0.4.0

Files changed (71) hide show

data/CHANGELOG.md +24 -0
data/Gemfile.lock +5 -1
data/bin/stamina +10 -0
data/lib/stamina.rb +2 -1
data/lib/stamina/abbadingo.rb +2 -0
data/lib/stamina/abbadingo/random_dfa.rb +48 -0
data/lib/stamina/abbadingo/random_sample.rb +146 -0
data/lib/stamina/adl.rb +6 -6
data/lib/stamina/automaton.rb +29 -4
data/lib/stamina/automaton/complete.rb +36 -0
data/lib/stamina/automaton/equivalence.rb +55 -0
data/lib/stamina/automaton/metrics.rb +8 -1
data/lib/stamina/automaton/minimize.rb +25 -0
data/lib/stamina/automaton/minimize/hopcroft.rb +116 -0
data/lib/stamina/automaton/minimize/pitchies.rb +64 -0
data/lib/stamina/automaton/strip.rb +16 -0
data/lib/stamina/automaton/walking.rb +46 -19
data/lib/stamina/command.rb +45 -0
data/lib/stamina/command/abbadingo_dfa.rb +81 -0
data/lib/stamina/command/abbadingo_samples.rb +40 -0
data/lib/stamina/command/adl2dot.rb +71 -0
data/lib/stamina/command/classify.rb +48 -0
data/lib/stamina/command/help.rb +27 -0
data/lib/stamina/command/infer.rb +141 -0
data/lib/stamina/command/metrics.rb +51 -0
data/lib/stamina/command/robustness.rb +22 -0
data/lib/stamina/command/score.rb +35 -0
data/lib/stamina/errors.rb +4 -1
data/lib/stamina/ext/math.rb +20 -0
data/lib/stamina/induction/{redblue.rb → blue_fringe.rb} +29 -28
data/lib/stamina/induction/commons.rb +32 -46
data/lib/stamina/induction/rpni.rb +7 -9
data/lib/stamina/induction/union_find.rb +3 -3
data/lib/stamina/loader.rb +1 -0
data/lib/stamina/sample.rb +79 -2
data/lib/stamina/scoring.rb +37 -0
data/lib/stamina/version.rb +2 -2
data/stamina.gemspec +2 -1
data/stamina.noespec +9 -12
data/test/stamina/abbadingo/random_dfa_test.rb +16 -0
data/test/stamina/abbadingo/random_sample_test.rb +78 -0
data/test/stamina/adl_test.rb +27 -2
data/test/stamina/automaton/complete_test.rb +58 -0
data/test/stamina/automaton/equivalence_test.rb +120 -0
data/test/stamina/automaton/minimize/hopcroft_test.rb +15 -0
data/test/stamina/automaton/minimize/minimize_test.rb +55 -0
data/test/stamina/automaton/minimize/pitchies_test.rb +15 -0
data/test/stamina/automaton/minimize/rice_edu_10.adl +16 -0
data/test/stamina/automaton/minimize/rice_edu_10.min.adl +13 -0
data/test/stamina/automaton/minimize/rice_edu_13.adl +13 -0
data/test/stamina/automaton/minimize/rice_edu_13.min.adl +7 -0
data/test/stamina/automaton/minimize/should_strip_1.adl +8 -0
data/test/stamina/automaton/minimize/should_strip_1.min.adl +6 -0
data/test/stamina/automaton/minimize/unknown_1.adl +16 -0
data/test/stamina/automaton/minimize/unknown_1.min.adl +12 -0
data/test/stamina/automaton/strip_test.rb +36 -0
data/test/stamina/automaton/walking/dfa_delta_test.rb +39 -0
data/test/stamina/automaton_test.rb +13 -1
data/test/stamina/induction/{redblue_test.rb → blue_fringe_test.rb} +22 -22
data/test/stamina/sample_test.rb +75 -0
data/test/stamina/stamina_test.rb +13 -2
metadata +98 -23
data/bin/adl2dot +0 -12
data/bin/classify +0 -12
data/bin/redblue +0 -12
data/bin/rpni +0 -12
data/lib/stamina/command/adl2dot_command.rb +0 -73
data/lib/stamina/command/classify_command.rb +0 -57
data/lib/stamina/command/redblue_command.rb +0 -58
data/lib/stamina/command/rpni_command.rb +0 -58
data/lib/stamina/command/stamina_command.rb +0 -79

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,27 @@
+# 0.4.0 / FIX ME
+* Major Enhancements
+    * Added Automaton#to_adl as an shortcut for Stamina::ADL::print_automaton(...)
+    * Added Sample#to_pta taken from Induction::Commons
+    * Added Automaton completion (all strings parsable) under Automaton#complete[!?]
+    * Added Automaton stripping (removal of unreachable states) under Automaton#strip[!]
+    * Added Automaton minimization (Hopcroft + Pitchies) under Automaton#minimize
+    * Added Abbadingo generators under Abbadingo::RandomDFA and Abbadingo::RandomSample
+    * Added a main 'stamina' command relying on Quickl. classiy/adl2dot commands become
+      subcommands of stamina itself (see stamina --help for a list of available commands).
+      Induction command (rpni and redblue) are now handled by a 'stamina infer' with
+      options.
+    * Error states and now correctly handled in ADL::parse and ADL::flush
+    * RedBlue has been renamed as BlueFringe everywhere (red_?blue -> blue_fringe)
+* Minnor Enhancements
+    * Added a few optimizations here and there
+* Bug fixes
+    * Fixed a bug in Automaton#depth when some states are unreachable
 # 0.3.1 / 2011-03-24
 * Major Enhancements

data/Gemfile.lock CHANGED Viewed

@@ -1,13 +1,16 @@
 PATH
   remote: .
   specs:
-    stamina (0.3.1)
+    stamina (0.4.0)
+      quickl (~> 0.2.0)
 GEM
   remote: http://rubygems.org/
   specs:
     bluecloth (2.0.11)
     diff-lcs (1.1.2)
+    gnuplot (2.3.6)
+    quickl (0.2.0)
     rake (0.8.7)
     rspec (2.4.0)
       rspec-core (~> 2.4.0)
@@ -26,6 +29,7 @@ PLATFORMS
 DEPENDENCIES
   bluecloth (~> 2.0.9)
   bundler (~> 1.0)
+  gnuplot (~> 2.3.6)
   rake (~> 0.8.7)
   rspec (~> 2.4.0)
   stamina!

data/bin/stamina ADDED Viewed

@@ -0,0 +1,10 @@
+#!/usr/bin/env ruby
+require 'rubygems'
+require 'bundler'
+Bundler.setup(:default)
+$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
+require "stamina/command"
+Stamina::Command.run(ARGV, __FILE__)

data/lib/stamina.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 module Stamina
 end
+require 'stamina/ext/math'
 require 'stamina/version'
 require 'stamina/loader'
 require 'set'
@@ -18,4 +19,4 @@ require 'stamina/utils'
 require 'stamina/induction/union_find'
 require 'stamina/induction/commons'
 require "stamina/induction/rpni"
-require "stamina/induction/redblue"
+require "stamina/induction/blue_fringe"

data/lib/stamina/abbadingo.rb ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ require 'stamina/abbadingo/random_dfa'
2	+ require 'stamina/abbadingo/random_sample'

data/lib/stamina/abbadingo/random_dfa.rb ADDED Viewed

@@ -0,0 +1,48 @@
+module Stamina
+  module Abbadingo
+    #
+    # Generates a random DFA using the Abbadingo protocol.
+    #
+    class RandomDFA
+      # Number of wished states
+      attr_reader :state_count
+      # Accepting ratio
+      attr_reader :accepting_ratio
+      # Creates an algorithm instance with default options
+      def initialize(state_count = 64, accepting_ratio = 0.5)
+        @state_count = state_count
+        @accepting_ratio = accepting_ratio
+      end
+      def execute
+        dfa = Automaton.new
+        # Generate 5/4*state_count states
+        (state_count.to_f * 5.0 / 4.0).to_i.times do
+          dfa.add_state(:initial   => false,
+                        :accepting => (Kernel.rand <= accepting_ratio),
+                        :error     => false)
+        end
+        # Generate all edges
+        dfa.each_state do |source|
+          ["0", "1"].each do |symbol|
+            target = dfa.ith_state(Kernel.rand(dfa.state_count))
+            dfa.connect(source, target, symbol)
+          end
+        end
+        # Choose an initial state
+        dfa.ith_state(Kernel.rand(dfa.state_count)).initial!
+        # Minimize the automaton and return it
+        Stamina::Automaton::Minimize::Pitchies.execute(dfa)
+      end
+    end # class RandomDFA
+  end # module Abbadingo
+end # module Stamina

data/lib/stamina/abbadingo/random_sample.rb ADDED Viewed

@@ -0,0 +1,146 @@
+module Stamina
+  module Abbadingo
+    #
+    # Generates a random Sample using the Abbadingo protocol.
+    #
+    class RandomSample
+      #
+      # Implements an enumerator for binary strings whose length lies between 0
+      # and max_length (passed at construction).
+      #
+      # The enumerator guarantees that strings are sampled with an uniform distribution
+      # among them. As the number of strings of a given length is an exponential
+      # function, this means that you've got 50% change of having a string of length
+      # max_length, 25% of max_length - 1, 12.5% of max_length - 2 and so on.
+      #
+      # How to use it?
+      #
+      #   # create for strings between 0 and 10 symbols, inclusive
+      #   enum = Stamina::Abbadingo::StringEnumerator.new(10)
+      #
+      #   # this is how to generate strings while a predicate is true
+      #   enum.each do |s|
+      #     # s is an array of binary integer symbols (0 or 1)
+      #     # true for continuing, false otherwise
+      #     return (true || false)
+      #   end
+      #
+      #   # this is how to generate a fixed number of strings
+      #   (1..1000).collect{ enum.one }
+      #
+      # How does it work? Well, the distribution of strings is as follows:
+      #
+      #    length     [n]b_strings        [c]umul       log2(n)         log2(c)    log2(c).floor
+      #                   (2**n)         2**(n+1)-1
+      #      0               1               1       0.0000000000       0.000000        0
+      #      1               2               3       1.0000000000       1.584963        1
+      #      2               4               7       2.0000000000       2.807355        2
+      #      3               8              15       3.0000000000       3.906891        3
+      #      4              16              31       4.0000000000       4.954196        4
+      #      5              32              63       5.0000000000       5.977280        5
+      #
+      # where _cumul_ is the total number of string upto _length_ symbols.
+      #
+      # Therefore, the idea is to see each string has an identifier, say _x_,
+      # between 1 and 2**(max_length+1)-1 (see max).
+      #   * The length of the _x_th string is log2(x).floor (see length_for)
+      #   * The string itself is the binary decomposition of x, up to length_for(x)
+      #     symbols (see string_for)
+      #
+      # As those identifiers naturally respect the exponential distribution, sampling
+      # the strings is the same as taking string_for(x) for random x upto _max_.
+      #
+      class StringEnumerator
+        include Enumerable
+        # Maximal length of a string
+        attr_reader :max_length
+        def initialize(max_length = 16)
+          @max_length = max_length
+        end
+        #
+        # Returns the length of the string whose identifier is _x_ (> 0)
+        #
+        def length_for(x)
+          Math.log2(x).floor
+        end
+        #
+        # Returns the binary string whose identifier is _x_ (> 0)
+        #
+        def string_for(x)
+          length = length_for(x)
+          (0..length-1).collect{|i| ((x >> i) % 2).to_s}
+        end
+        #
+        # Returns the maximum identifier, which is also the number of strings
+        # up to max_length symbols
+        #
+        def max
+          @max ||= 2 ** (max_length+1) - 1
+        end
+        #
+        # Generates a string at random
+        #
+        def one
+          string_for(1+Kernel.rand(max))
+        end
+        #
+        # Yields the block with a random string, until the block return false
+        # or nil.
+        #
+        def each
+          begin
+            cont = yield(one)
+          end while cont
+        end
+      end # class StringEnumerator
+      #
+      # Generates a Sample instance with _nb_ strings randomly sampled with a
+      # uniform distribution over all strings up
+      #
+      def self.execute(classifier, max_length = classifier.depth + 3)
+        enum = StringEnumerator.new(max_length)
+        # We generate 1800 strings for the test set plus n^2/2 strings for
+        # the training set. If there are no enough strings available, we generate
+        # the maximum we can
+        seen = {}
+        nb = Math.min(1800 + (classifier.state_count**2), enum.max)
+        # Let's go now
+        enum.each do |s|
+          seen[s] = true
+          seen.size < nb
+        end
+        # Make them
+        strings = seen.keys.collect{|s| InputString.new(s, classifier.accepts?(s))}
+        pos, neg = strings.partition{|s| s.positive?}
+        # Split them, 1800 in test and the rest in training set
+        if (pos.size > 900) && (neg.size > 900)
+          pos_test, pos_training = pos[0...900], pos[900..-1]
+          neg_test, neg_training = neg[0...900], neg[900..-1]
+        else
+          pos_test, pos_training = pos.partition{|s| Kernel.rand < 0.5}
+          neg_test, neg_training = neg.partition{|s| Kernel.rand < 0.5}
+        end
+        flusher = lambda{|x,y| Kernel.rand < 0.5 ? 1 : -1}
+        training = (pos_training + neg_training).sort &flusher
+        test = (pos_test + neg_test).sort &flusher
+        [Sample.new(training), Sample.new(test)]
+      end
+    end # class RandomSample
+  end # module Abbadingo
+end # module Stamina

data/lib/stamina/adl.rb CHANGED Viewed

@@ -71,11 +71,11 @@ module Stamina
               # looking for |number initial accepting|
               raise(ADL::ParseError,
                     "Parse error line #{line_number}: state definition expected, "\
-                    "'#{l}' found.") unless /^(\S+)\s+(true|false)\s+(true|false)$/ =~ l
-              id, initial, accepting = $1, $2, $3
-              initial, accepting = ("true"==initial), ("true"==accepting)
+                    "'#{l}' found.") unless /^(\S+)\s+(true|false)\s+(true|false)(\s+(true|false))?$/ =~ l
+              id, initial, accepting, error = $1, $2, $3, $5
+              initial, accepting, error = ("true"==initial), ("true"==accepting), ("true"==error)
-              state = fa.add_state(:initial => initial, :accepting => accepting)
+              state = fa.add_state(:initial => initial, :accepting => accepting, :error => error)
               state[:name]=id.to_s
               states[id] = state
@@ -142,7 +142,7 @@ module Stamina
     def self.print_automaton(fa, buffer="")
       buffer << "#{fa.state_count.to_s} #{fa.edge_count.to_s}" << "\n"
       fa.states.each do |s|
-        buffer << "#{s.index.to_s} #{s.initial?} #{s.accepting?}" << "\n"
+        buffer << "#{s.index.to_s} #{s.initial?} #{s.accepting?}" << (s.error? ? " true" : "") << "\n"
       end
       fa.edges.each do |e|
         buffer << "#{e.source.index.to_s} #{e.target.index.to_s} #{e.symbol.to_s}" << "\n"
@@ -295,4 +295,4 @@ module Stamina
     end
   end # module ADL
-end # module Stamina
+end # module Stamina

data/lib/stamina/automaton.rb CHANGED Viewed

@@ -198,6 +198,15 @@ module Stamina
         (outs.size==@out_edges.size) and not(outs.include?(nil))
       end
+      # Checks if this state is a sink state or not. Sink states are defined as
+      # non accepting states having no outgoing transition or only loop
+      # transitions.
+      def sink?
+        return false if accepting?
+        out_edges.each{|e| return false unless e.target==self}
+        true
+      end
       #
       # Returns an array containing all incoming edges of the state. Edges are
       # sorted if _sorted_ is set to true. If two incoming edges have same symbol
@@ -364,8 +373,8 @@ module Stamina
       #
       def dfa_delta(symbol)
         return nil if symbol.nil?
-        @out_edges.each {|e| return e.target if e.symbol==symbol}
-        return nil
+        edge = @out_edges.find{|e| e.symbol==symbol}
+        edge.nil? ? nil : edge.target
       end
       #
@@ -456,10 +465,14 @@ module Stamina
       end
       # Returns edge symbol.
-      def symbol() @data[:symbol] end
+      def symbol()
+        @data[:symbol]
+      end
       # Sets edge symbol.
-      def symbol=(symbol) @data[:symbol]=symbol end
+      def symbol=(symbol)
+        @data[:symbol] = symbol
+      end
       alias :source :from
       alias :target :to
@@ -1207,6 +1220,14 @@ module Stamina
       end
     end
+    ### public section about adl utilities #######################################
+    public
+    # Prints this automaton in ADL format
+    def to_adl(buffer = "")
+      Stamina::ADL.print_automaton(self, buffer)
+    end
     ### public section about reordering ##########################################
     public
@@ -1235,4 +1256,8 @@ module Stamina
 end # module Stamina
 require 'stamina/automaton/walking'
+require 'stamina/automaton/complete'
+require 'stamina/automaton/strip'
+require 'stamina/automaton/equivalence'
+require 'stamina/automaton/minimize'
 require 'stamina/automaton/metrics'

data/lib/stamina/automaton/complete.rb ADDED Viewed

@@ -0,0 +1,36 @@
+module Stamina
+  class Automaton
+    #
+    # Checks if this automaton is complete
+    #
+    def complete?
+      alph = alphabet
+      states.find{|s| !(alphabet - s.out_symbols).empty?}.nil?
+    end
+    #
+    # Returns a completed copy of this automaton
+    #
+    def complete
+      self.dup.complete!
+    end
+    #
+    # Completes this automaton.
+    #
+    def complete!(sink_data = {:initial => false, :accepting => false, :error => false})
+      alph = alphabet
+      sink = add_state(sink_data)
+      each_state do |s|
+        out_symbols = s.out_symbols
+        (alph-out_symbols).each do |symbol|
+          connect(s, sink, symbol)
+        end
+      end
+      drop_state(sink) if sink.adjacent_states == [sink]
+      self
+    end
+  end # class Automaton
+end # module Stamina

data/lib/stamina/automaton/equivalence.rb ADDED Viewed

@@ -0,0 +1,55 @@
+module Stamina
+  class Automaton
+    #
+    # Checks if this automaton is equivalent to another one.
+    #
+    # Automata must be both minimal and complete to guarantee that this method
+    # works.
+    #
+    def equivalent?(other, equiv = nil, key = :equiv_state)
+      equiv ||= Proc.new{|s1,s2| (s1.accepting? == s2.accepting?) &&
+                                 (s1.error? == s2.error?) &&
+                                 (s1.initial? == s2.initial?) }
+      # Both must already have basic attributes in common
+      return false unless state_count==other.state_count
+      return false unless edge_count==other.edge_count
+      return false unless equiv[initial_state, other.initial_state]
+      # We instantiate the decoration algorithm for checking equivalence on this
+      # automaton:
+      #   * decoration is the index of the equivalent state in other automaton
+      #   * d0 is thus 'other.initial_state.index'
+      #   * suppremum is identity and fails when the equivalent state is not unique
+      #   * propagation checks transition function delta
+      #
+      algo = Stamina::Utils::Decorate.new(key)
+      algo.set_suppremum do |d0, d1|
+        if (d0.nil? or d1.nil?)
+           (d0 || d1)
+        elsif d0==d1
+          d0
+        else
+          raise Stamina::Abord
+        end
+      end
+      algo.set_propagate do |d,e|
+        reached = other.ith_state(d).dfa_step(e.symbol)
+        raise Stamina::Abord if reached.nil?
+        raise Stamina::Abord unless equiv[e.target, reached]
+        reached.index
+      end
+      # Run the algorithm now
+      begin
+        algo.execute(self, nil, other.initial_state.index)
+        return true
+      rescue Stamina::Abord
+        return false
+      end
+    end
+    alias :<=> :equivalent?
+  end # class Automaton
+end # module Stamina