RubyGems - stamina - Versions diffs - 0.3.1 → 0.4.0 - Mend

stamina 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

data/CHANGELOG.md +24 -0
data/Gemfile.lock +5 -1
data/bin/stamina +10 -0
data/lib/stamina.rb +2 -1
data/lib/stamina/abbadingo.rb +2 -0
data/lib/stamina/abbadingo/random_dfa.rb +48 -0
data/lib/stamina/abbadingo/random_sample.rb +146 -0
data/lib/stamina/adl.rb +6 -6
data/lib/stamina/automaton.rb +29 -4
data/lib/stamina/automaton/complete.rb +36 -0
data/lib/stamina/automaton/equivalence.rb +55 -0
data/lib/stamina/automaton/metrics.rb +8 -1
data/lib/stamina/automaton/minimize.rb +25 -0
data/lib/stamina/automaton/minimize/hopcroft.rb +116 -0
data/lib/stamina/automaton/minimize/pitchies.rb +64 -0
data/lib/stamina/automaton/strip.rb +16 -0
data/lib/stamina/automaton/walking.rb +46 -19
data/lib/stamina/command.rb +45 -0
data/lib/stamina/command/abbadingo_dfa.rb +81 -0
data/lib/stamina/command/abbadingo_samples.rb +40 -0
data/lib/stamina/command/adl2dot.rb +71 -0
data/lib/stamina/command/classify.rb +48 -0
data/lib/stamina/command/help.rb +27 -0
data/lib/stamina/command/infer.rb +141 -0
data/lib/stamina/command/metrics.rb +51 -0
data/lib/stamina/command/robustness.rb +22 -0
data/lib/stamina/command/score.rb +35 -0
data/lib/stamina/errors.rb +4 -1
data/lib/stamina/ext/math.rb +20 -0
data/lib/stamina/induction/{redblue.rb → blue_fringe.rb} +29 -28
data/lib/stamina/induction/commons.rb +32 -46
data/lib/stamina/induction/rpni.rb +7 -9
data/lib/stamina/induction/union_find.rb +3 -3
data/lib/stamina/loader.rb +1 -0
data/lib/stamina/sample.rb +79 -2
data/lib/stamina/scoring.rb +37 -0
data/lib/stamina/version.rb +2 -2
data/stamina.gemspec +2 -1
data/stamina.noespec +9 -12
data/test/stamina/abbadingo/random_dfa_test.rb +16 -0
data/test/stamina/abbadingo/random_sample_test.rb +78 -0
data/test/stamina/adl_test.rb +27 -2
data/test/stamina/automaton/complete_test.rb +58 -0
data/test/stamina/automaton/equivalence_test.rb +120 -0
data/test/stamina/automaton/minimize/hopcroft_test.rb +15 -0
data/test/stamina/automaton/minimize/minimize_test.rb +55 -0
data/test/stamina/automaton/minimize/pitchies_test.rb +15 -0
data/test/stamina/automaton/minimize/rice_edu_10.adl +16 -0
data/test/stamina/automaton/minimize/rice_edu_10.min.adl +13 -0
data/test/stamina/automaton/minimize/rice_edu_13.adl +13 -0
data/test/stamina/automaton/minimize/rice_edu_13.min.adl +7 -0
data/test/stamina/automaton/minimize/should_strip_1.adl +8 -0
data/test/stamina/automaton/minimize/should_strip_1.min.adl +6 -0
data/test/stamina/automaton/minimize/unknown_1.adl +16 -0
data/test/stamina/automaton/minimize/unknown_1.min.adl +12 -0
data/test/stamina/automaton/strip_test.rb +36 -0
data/test/stamina/automaton/walking/dfa_delta_test.rb +39 -0
data/test/stamina/automaton_test.rb +13 -1
data/test/stamina/induction/{redblue_test.rb → blue_fringe_test.rb} +22 -22
data/test/stamina/sample_test.rb +75 -0
data/test/stamina/stamina_test.rb +13 -2
metadata +98 -23
data/bin/adl2dot +0 -12
data/bin/classify +0 -12
data/bin/redblue +0 -12
data/bin/rpni +0 -12
data/lib/stamina/command/adl2dot_command.rb +0 -73
data/lib/stamina/command/classify_command.rb +0 -57
data/lib/stamina/command/redblue_command.rb +0 -58
data/lib/stamina/command/rpni_command.rb +0 -58
data/lib/stamina/command/stamina_command.rb +0 -79

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,27 @@
+# 0.4.0 / FIX ME
+* Major Enhancements
+    * Added Automaton#to_adl as an shortcut for Stamina::ADL::print_automaton(...)
+    * Added Sample#to_pta taken from Induction::Commons
+    * Added Automaton completion (all strings parsable) under Automaton#complete[!?]
+    * Added Automaton stripping (removal of unreachable states) under Automaton#strip[!]
+    * Added Automaton minimization (Hopcroft + Pitchies) under Automaton#minimize
+    * Added Abbadingo generators under Abbadingo::RandomDFA and Abbadingo::RandomSample
+    * Added a main 'stamina' command relying on Quickl. classiy/adl2dot commands become
+      subcommands of stamina itself (see stamina --help for a list of available commands).
+      Induction command (rpni and redblue) are now handled by a 'stamina infer' with
+      options.
+    * Error states and now correctly handled in ADL::parse and ADL::flush
+    * RedBlue has been renamed as BlueFringe everywhere (red_?blue -> blue_fringe)
+* Minnor Enhancements
+    * Added a few optimizations here and there
+* Bug fixes
+    * Fixed a bug in Automaton#depth when some states are unreachable
 # 0.3.1 / 2011-03-24
 * Major Enhancements

data/Gemfile.lock CHANGED Viewed

@@ -1,13 +1,16 @@
 PATH
   remote: .
   specs:
-    stamina (0.3.1)
+    stamina (0.4.0)
+      quickl (~> 0.2.0)
 GEM
   remote: http://rubygems.org/
   specs:
     bluecloth (2.0.11)
     diff-lcs (1.1.2)
+    gnuplot (2.3.6)
+    quickl (0.2.0)
     rake (0.8.7)
     rspec (2.4.0)
       rspec-core (~> 2.4.0)
@@ -26,6 +29,7 @@ PLATFORMS
 DEPENDENCIES
   bluecloth (~> 2.0.9)
   bundler (~> 1.0)
+  gnuplot (~> 2.3.6)
   rake (~> 0.8.7)
   rspec (~> 2.4.0)
   stamina!

data/bin/stamina ADDED Viewed

@@ -0,0 +1,10 @@
+#!/usr/bin/env ruby
+require 'rubygems'
+require 'bundler'
+Bundler.setup(:default)
+$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
+require "stamina/command"
+Stamina::Command.run(ARGV, __FILE__)

data/lib/stamina.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 module Stamina
 end
+require 'stamina/ext/math'
 require 'stamina/version'
 require 'stamina/loader'
 require 'set'
@@ -18,4 +19,4 @@ require 'stamina/utils'
 require 'stamina/induction/union_find'
 require 'stamina/induction/commons'
 require "stamina/induction/rpni"
-require "stamina/induction/redblue"
+require "stamina/induction/blue_fringe"

data/lib/stamina/abbadingo.rb ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ require 'stamina/abbadingo/random_dfa'
2	+ require 'stamina/abbadingo/random_sample'

data/lib/stamina/abbadingo/random_dfa.rb ADDED Viewed

@@ -0,0 +1,48 @@
+module Stamina
+  module Abbadingo
+    #
+    # Generates a random DFA using the Abbadingo protocol.
+    #
+    class RandomDFA
+      # Number of wished states
+      attr_reader :state_count
+      # Accepting ratio
+      attr_reader :accepting_ratio
+      # Creates an algorithm instance with default options
+      def initialize(state_count = 64, accepting_ratio = 0.5)
+        @state_count = state_count
+        @accepting_ratio = accepting_ratio
+      end
+      def execute
+        dfa = Automaton.new
+        # Generate 5/4*state_count states
+        (state_count.to_f * 5.0 / 4.0).to_i.times do
+          dfa.add_state(:initial   => false,
+                        :accepting => (Kernel.rand <= accepting_ratio),
+                        :error     => false)
+        end
+        # Generate all edges
+        dfa.each_state do |source|
+          ["0", "1"].each do |symbol|
+            target = dfa.ith_state(Kernel.rand(dfa.state_count))
+            dfa.connect(source, target, symbol)
+          end
+        end
+        # Choose an initial state
+        dfa.ith_state(Kernel.rand(dfa.state_count)).initial!
+        # Minimize the automaton and return it
+        Stamina::Automaton::Minimize::Pitchies.execute(dfa)
+      end
+    end # class RandomDFA
+  end # module Abbadingo
+end # module Stamina

data/lib/stamina/abbadingo/random_sample.rb ADDED Viewed

@@ -0,0 +1,146 @@
+module Stamina
+  module Abbadingo
+    #
+    # Generates a random Sample using the Abbadingo protocol.
+    #
+    class RandomSample
+      #
+      # Implements an enumerator for binary strings whose length lies between 0
+      # and max_length (passed at construction).
+      #
+      # The enumerator guarantees that strings are sampled with an uniform distribution
+      # among them. As the number of strings of a given length is an exponential
+      # function, this means that you've got 50% change of having a string of length
+      # max_length, 25% of max_length - 1, 12.5% of max_length - 2 and so on.
+      #
+      # How to use it?
+      #
+      #   # create for strings between 0 and 10 symbols, inclusive
+      #   enum = Stamina::Abbadingo::StringEnumerator.new(10)
+      #
+      #   # this is how to generate strings while a predicate is true
+      #   enum.each do |s|
+      #     # s is an array of binary integer symbols (0 or 1)
+      #     # true for continuing, false otherwise
+      #     return (true || false)
+      #   end
+      #
+      #   # this is how to generate a fixed number of strings
+      #   (1..1000).collect{ enum.one }
+      #
+      # How does it work? Well, the distribution of strings is as follows:
+      #
+      #    length     [n]b_strings        [c]umul       log2(n)         log2(c)    log2(c).floor
+      #                   (2**n)         2**(n+1)-1
+      #      0               1               1       0.0000000000       0.000000        0
+      #      1               2               3       1.0000000000       1.584963        1
+      #      2               4               7       2.0000000000       2.807355        2
+      #      3               8              15       3.0000000000       3.906891        3
+      #      4              16              31       4.0000000000       4.954196        4
+      #      5              32              63       5.0000000000       5.977280        5
+      #
+      # where _cumul_ is the total number of string upto _length_ symbols.
+      #
+      # Therefore, the idea is to see each string has an identifier, say _x_,
+      # between 1 and 2**(max_length+1)-1 (see max).
+      #   * The length of the _x_th string is log2(x).floor (see length_for)
+      #   * The string itself is the binary decomposition of x, up to length_for(x)
+      #     symbols (see string_for)
+      #
+      # As those identifiers naturally respect the exponential distribution, sampling
+      # the strings is the same as taking string_for(x) for random x upto _max_.
+      #
+      class StringEnumerator
+        include Enumerable
+        # Maximal length of a string
+        attr_reader :max_length
+        def initialize(max_length = 16)
+          @max_length = max_length
+        end
+        #
+        # Returns the length of the string whose identifier is _x_ (> 0)
+        #
+        def length_for(x)
+          Math.log2(x).floor
+        end
+        #
+        # Returns the binary string whose identifier is _x_ (> 0)
+        #
+        def string_for(x)
+          length = length_for(x)
+          (0..length-1).collect{|i| ((x >> i) % 2).to_s}
+        end
+        #
+        # Returns the maximum identifier, which is also the number of strings
+        # up to max_length symbols
+        #
+        def max
+          @max ||= 2 ** (max_length+1) - 1
+        end
+        #
+        # Generates a string at random
+        #
+        def one
+          string_for(1+Kernel.rand(max))
+        end
+        #
+        # Yields the block with a random string, until the block return false
+        # or nil.
+        #
+        def each
+          begin
+            cont = yield(one)
+          end while cont
+        end
+      end # class StringEnumerator
+      #
+      # Generates a Sample instance with _nb_ strings randomly sampled with a
+      # uniform distribution over all strings up
+      #
+      def self.execute(classifier, max_length = classifier.depth + 3)
+        enum = StringEnumerator.new(max_length)
+        # We generate 1800 strings for the test set plus n^2/2 strings for
+        # the training set. If there are no enough strings available, we generate
+        # the maximum we can
+        seen = {}
+        nb = Math.min(1800 + (classifier.state_count**2), enum.max)
+        # Let's go now
+        enum.each do |s|
+          seen[s] = true
+          seen.size < nb
+        end
+        # Make them
+        strings = seen.keys.collect{|s| InputString.new(s, classifier.accepts?(s))}
+        pos, neg = strings.partition{|s| s.positive?}
+        # Split them, 1800 in test and the rest in training set
+        if (pos.size > 900) && (neg.size > 900)
+          pos_test, pos_training = pos[0...900], pos[900..-1]
+          neg_test, neg_training = neg[0...900], neg[900..-1]
+        else
+          pos_test, pos_training = pos.partition{|s| Kernel.rand < 0.5}
+          neg_test, neg_training = neg.partition{|s| Kernel.rand < 0.5}
+        end
+        flusher = lambda{|x,y| Kernel.rand < 0.5 ? 1 : -1}
+        training = (pos_training + neg_training).sort &flusher
+        test = (pos_test + neg_test).sort &flusher
+        [Sample.new(training), Sample.new(test)]
+      end
+    end # class RandomSample
+  end # module Abbadingo
+end # module Stamina

data/lib/stamina/adl.rb CHANGED Viewed

@@ -71,11 +71,11 @@ module Stamina
               # looking for |number initial accepting|
               raise(ADL::ParseError,
                     "Parse error line #{line_number}: state definition expected, "\
-                    "'#{l}' found.") unless /^(\S+)\s+(true|false)\s+(true|false)$/ =~ l
-              id, initial, accepting = $1, $2, $3
-              initial, accepting = ("true"==initial), ("true"==accepting)
+                    "'#{l}' found.") unless /^(\S+)\s+(true|false)\s+(true|false)(\s+(true|false))?$/ =~ l
+              id, initial, accepting, error = $1, $2, $3, $5
+              initial, accepting, error = ("true"==initial), ("true"==accepting), ("true"==error)
-              state = fa.add_state(:initial => initial, :accepting => accepting)
+              state = fa.add_state(:initial => initial, :accepting => accepting, :error => error)
               state[:name]=id.to_s
               states[id] = state
@@ -142,7 +142,7 @@ module Stamina
     def self.print_automaton(fa, buffer="")
       buffer << "#{fa.state_count.to_s} #{fa.edge_count.to_s}" << "\n"
       fa.states.each do |s|
-        buffer << "#{s.index.to_s} #{s.initial?} #{s.accepting?}" << "\n"
+        buffer << "#{s.index.to_s} #{s.initial?} #{s.accepting?}" << (s.error? ? " true" : "") << "\n"
       end
       fa.edges.each do |e|
         buffer << "#{e.source.index.to_s} #{e.target.index.to_s} #{e.symbol.to_s}" << "\n"
@@ -295,4 +295,4 @@ module Stamina
     end
   end # module ADL
-end # module Stamina
+end # module Stamina

data/lib/stamina/automaton.rb CHANGED Viewed

@@ -198,6 +198,15 @@ module Stamina
         (outs.size==@out_edges.size) and not(outs.include?(nil))
       end
+      # Checks if this state is a sink state or not. Sink states are defined as
+      # non accepting states having no outgoing transition or only loop
+      # transitions.
+      def sink?
+        return false if accepting?
+        out_edges.each{|e| return false unless e.target==self}
+        true
+      end
       #
       # Returns an array containing all incoming edges of the state. Edges are
       # sorted if _sorted_ is set to true. If two incoming edges have same symbol
@@ -364,8 +373,8 @@ module Stamina
       #
       def dfa_delta(symbol)
         return nil if symbol.nil?
-        @out_edges.each {|e| return e.target if e.symbol==symbol}
-        return nil
+        edge = @out_edges.find{|e| e.symbol==symbol}
+        edge.nil? ? nil : edge.target
       end
       #
@@ -456,10 +465,14 @@ module Stamina
       end
       # Returns edge symbol.
-      def symbol() @data[:symbol] end
+      def symbol()
+        @data[:symbol]
+      end
       # Sets edge symbol.
-      def symbol=(symbol) @data[:symbol]=symbol end
+      def symbol=(symbol)
+        @data[:symbol] = symbol
+      end
       alias :source :from
       alias :target :to
@@ -1207,6 +1220,14 @@ module Stamina
       end
     end
+    ### public section about adl utilities #######################################
+    public
+    # Prints this automaton in ADL format
+    def to_adl(buffer = "")
+      Stamina::ADL.print_automaton(self, buffer)
+    end
     ### public section about reordering ##########################################
     public
@@ -1235,4 +1256,8 @@ module Stamina
 end # module Stamina
 require 'stamina/automaton/walking'
+require 'stamina/automaton/complete'
+require 'stamina/automaton/strip'
+require 'stamina/automaton/equivalence'
+require 'stamina/automaton/minimize'
 require 'stamina/automaton/metrics'

data/lib/stamina/automaton/complete.rb ADDED Viewed

@@ -0,0 +1,36 @@
+module Stamina
+  class Automaton
+    #
+    # Checks if this automaton is complete
+    #
+    def complete?
+      alph = alphabet
+      states.find{|s| !(alphabet - s.out_symbols).empty?}.nil?
+    end
+    #
+    # Returns a completed copy of this automaton
+    #
+    def complete
+      self.dup.complete!
+    end
+    #
+    # Completes this automaton.
+    #
+    def complete!(sink_data = {:initial => false, :accepting => false, :error => false})
+      alph = alphabet
+      sink = add_state(sink_data)
+      each_state do |s|
+        out_symbols = s.out_symbols
+        (alph-out_symbols).each do |symbol|
+          connect(s, sink, symbol)
+        end
+      end
+      drop_state(sink) if sink.adjacent_states == [sink]
+      self
+    end
+  end # class Automaton
+end # module Stamina

data/lib/stamina/automaton/equivalence.rb ADDED Viewed

@@ -0,0 +1,55 @@
+module Stamina
+  class Automaton
+    #
+    # Checks if this automaton is equivalent to another one.
+    #
+    # Automata must be both minimal and complete to guarantee that this method
+    # works.
+    #
+    def equivalent?(other, equiv = nil, key = :equiv_state)
+      equiv ||= Proc.new{|s1,s2| (s1.accepting? == s2.accepting?) &&
+                                 (s1.error? == s2.error?) &&
+                                 (s1.initial? == s2.initial?) }
+      # Both must already have basic attributes in common
+      return false unless state_count==other.state_count
+      return false unless edge_count==other.edge_count
+      return false unless equiv[initial_state, other.initial_state]
+      # We instantiate the decoration algorithm for checking equivalence on this
+      # automaton:
+      #   * decoration is the index of the equivalent state in other automaton
+      #   * d0 is thus 'other.initial_state.index'
+      #   * suppremum is identity and fails when the equivalent state is not unique
+      #   * propagation checks transition function delta
+      #
+      algo = Stamina::Utils::Decorate.new(key)
+      algo.set_suppremum do |d0, d1|
+        if (d0.nil? or d1.nil?)
+           (d0 || d1)
+        elsif d0==d1
+          d0
+        else
+          raise Stamina::Abord
+        end
+      end
+      algo.set_propagate do |d,e|
+        reached = other.ith_state(d).dfa_step(e.symbol)
+        raise Stamina::Abord if reached.nil?
+        raise Stamina::Abord unless equiv[e.target, reached]
+        reached.index
+      end
+      # Run the algorithm now
+      begin
+        algo.execute(self, nil, other.initial_state.index)
+        return true
+      rescue Stamina::Abord
+        return false
+      end
+    end
+    alias :<=> :equivalent?
+  end # class Automaton
+end # module Stamina