RubyGems - stamina - Versions diffs - 0.3.1 → 0.4.0 - Mend

stamina 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

data/CHANGELOG.md +24 -0
data/Gemfile.lock +5 -1
data/bin/stamina +10 -0
data/lib/stamina.rb +2 -1
data/lib/stamina/abbadingo.rb +2 -0
data/lib/stamina/abbadingo/random_dfa.rb +48 -0
data/lib/stamina/abbadingo/random_sample.rb +146 -0
data/lib/stamina/adl.rb +6 -6
data/lib/stamina/automaton.rb +29 -4
data/lib/stamina/automaton/complete.rb +36 -0
data/lib/stamina/automaton/equivalence.rb +55 -0
data/lib/stamina/automaton/metrics.rb +8 -1
data/lib/stamina/automaton/minimize.rb +25 -0
data/lib/stamina/automaton/minimize/hopcroft.rb +116 -0
data/lib/stamina/automaton/minimize/pitchies.rb +64 -0
data/lib/stamina/automaton/strip.rb +16 -0
data/lib/stamina/automaton/walking.rb +46 -19
data/lib/stamina/command.rb +45 -0
data/lib/stamina/command/abbadingo_dfa.rb +81 -0
data/lib/stamina/command/abbadingo_samples.rb +40 -0
data/lib/stamina/command/adl2dot.rb +71 -0
data/lib/stamina/command/classify.rb +48 -0
data/lib/stamina/command/help.rb +27 -0
data/lib/stamina/command/infer.rb +141 -0
data/lib/stamina/command/metrics.rb +51 -0
data/lib/stamina/command/robustness.rb +22 -0
data/lib/stamina/command/score.rb +35 -0
data/lib/stamina/errors.rb +4 -1
data/lib/stamina/ext/math.rb +20 -0
data/lib/stamina/induction/{redblue.rb → blue_fringe.rb} +29 -28
data/lib/stamina/induction/commons.rb +32 -46
data/lib/stamina/induction/rpni.rb +7 -9
data/lib/stamina/induction/union_find.rb +3 -3
data/lib/stamina/loader.rb +1 -0
data/lib/stamina/sample.rb +79 -2
data/lib/stamina/scoring.rb +37 -0
data/lib/stamina/version.rb +2 -2
data/stamina.gemspec +2 -1
data/stamina.noespec +9 -12
data/test/stamina/abbadingo/random_dfa_test.rb +16 -0
data/test/stamina/abbadingo/random_sample_test.rb +78 -0
data/test/stamina/adl_test.rb +27 -2
data/test/stamina/automaton/complete_test.rb +58 -0
data/test/stamina/automaton/equivalence_test.rb +120 -0
data/test/stamina/automaton/minimize/hopcroft_test.rb +15 -0
data/test/stamina/automaton/minimize/minimize_test.rb +55 -0
data/test/stamina/automaton/minimize/pitchies_test.rb +15 -0
data/test/stamina/automaton/minimize/rice_edu_10.adl +16 -0
data/test/stamina/automaton/minimize/rice_edu_10.min.adl +13 -0
data/test/stamina/automaton/minimize/rice_edu_13.adl +13 -0
data/test/stamina/automaton/minimize/rice_edu_13.min.adl +7 -0
data/test/stamina/automaton/minimize/should_strip_1.adl +8 -0
data/test/stamina/automaton/minimize/should_strip_1.min.adl +6 -0
data/test/stamina/automaton/minimize/unknown_1.adl +16 -0
data/test/stamina/automaton/minimize/unknown_1.min.adl +12 -0
data/test/stamina/automaton/strip_test.rb +36 -0
data/test/stamina/automaton/walking/dfa_delta_test.rb +39 -0
data/test/stamina/automaton_test.rb +13 -1
data/test/stamina/induction/{redblue_test.rb → blue_fringe_test.rb} +22 -22
data/test/stamina/sample_test.rb +75 -0
data/test/stamina/stamina_test.rb +13 -2
metadata +98 -23
data/bin/adl2dot +0 -12
data/bin/classify +0 -12
data/bin/redblue +0 -12
data/bin/rpni +0 -12
data/lib/stamina/command/adl2dot_command.rb +0 -73
data/lib/stamina/command/classify_command.rb +0 -57
data/lib/stamina/command/redblue_command.rb +0 -58
data/lib/stamina/command/rpni_command.rb +0 -58
data/lib/stamina/command/stamina_command.rb +0 -79

data/lib/stamina/errors.rb CHANGED Viewed

@@ -1,5 +1,8 @@
 module Stamina
+  # Raised when an algorithm explicitely abords something
+  class Abord < StandardError; end
   # Main class of all stamina errors.
   class StaminaError < StandardError; end
@@ -17,4 +20,4 @@ module Stamina
   end
-end # module Stamina
+end # module Stamina

data/lib/stamina/ext/math.rb ADDED Viewed

@@ -0,0 +1,20 @@
+if RUBY_VERSION < "1.9"
+  def Math.log2( x )
+    Math.log( x ) / Math.log( 2 )
+  end
+  def Math.logn( x, n )
+    Math.log( x ) / Math.log( n )
+  end
+end
+def Math.max(i, j)
+  i > j ? i : j
+end
+def Math.min(i, j)
+  i < j ? i : j
+end

data/lib/stamina/induction/{redblue.rb → blue_fringe.rb} RENAMED Viewed

@@ -2,7 +2,7 @@ module Stamina
   module Induction
     #
-    # Implementation of the RedBlue variant of the RPNI algorithm (with the blue-fringe
+    # Implementation of the BlueFringe variant of the RPNI algorithm (with the blue-fringe
     # heuristics).
     #
     # See Lang, K., B. Pearlmutter, andR. Price. 1998. Results of the Abbadingo One DFA
@@ -13,34 +13,31 @@ module Stamina
     #   # sample typically comes from an ADL file
     #   sample = Stamina::ADL.parse_sample_file('sample.adl')
     #
-    #   # let RedBlue build the smallest dfa
-    #   dfa = Stamina::Induction::RedBlue.execute(sample, {:verbose => true})
+    #   # let BlueFringe build the smallest dfa
+    #   dfa = Stamina::Induction::BlueFringe.execute(sample, {:verbose => true})
     #
     # Remarks:
     # - Constructor and instance methods of this class are public but not intended
     #   to be used directly. They are left public for testing purposes only.
-    # - Having read the Stamina::Induction::RedBlue base algorithm may help undertanding
+    # - Having read the Stamina::Induction::BlueFringe base algorithm may help undertanding
     #   this variant.
     # - This class intensively uses the Stamina::Induction::UnionFind class and
     #   methods defined in the Stamina::Induction::Commons module which are worth
     #   reading to understand the algorithm implementation.
     #
-    class RedBlue
+    class BlueFringe
       include Stamina::Induction::Commons
       # Union-find data structure used internally
       attr_reader :ufds
-      # Additional options of the algorithm
-      attr_reader :options
-      #
-      # Creates an algorithm instance with specific options
-      #
+      # Creates an algorithm instance with given options.
       def initialize(options={})
-        @options = options
+        raise ArgumentError, "Invalid options #{options.inspect}" unless options.is_a?(Hash)
+        @options = DEFAULT_OPTIONS.merge(options)
+        @score_cache = {}
       end
       #
       # Computes the score of a single (group) merge. Returned value is 1 if both are
       # accepting states or both are error states and 0 otherwise. Note that d1 and d2
@@ -123,13 +120,16 @@ module Stamina
       #   been evaluated and is then seen unchanged by the caller.
       #
       def merge_and_determinize_score(i, j)
-        # score the merging, always rollback the transaction
-        score = nil
-        @ufds.transactional do
-          score = merge_and_determinize(i, j)
-          false
+        score = @score_cache[[i,j]] ||= begin
+          # score the merging, always rollback the transaction
+          score = nil
+          @ufds.transactional do
+            score = merge_and_determinize(i, j)
+            false
+          end
+          score || -1
         end
-        score
+        score == -1 ? nil : score
       end
       #
@@ -163,8 +163,8 @@ module Stamina
       #   sample are correctly classified by it.
       #
       def main(ufds)
-        puts "Starting RedBlue (#{ufds.size} states)" if @options[:verbose]
-        @ufds, @kernel = ufds, [0]
+        info("Starting BlueFringe (#{ufds.size} states)")
+        @ufds, @kernel, @score_cache = ufds, [0], {}
         # we do it until the fringe is empty (compute it only once each step)
         until (the_fringe=fringe).empty?
@@ -196,15 +196,16 @@ module Stamina
           # If not found, the last candidate must be consolidated. Otherwise, we
           # do the best merging
           unless to_consolidate.nil?
-            puts "Consolidation of #{to_consolidate}" if @options[:verbose]
+            info("Consolidation of #{to_consolidate}")
             @kernel << to_consolidate
           else
-            puts "Merging #{best[0]} and #{best[1]} [#{best[2]}]" if @options[:verbose]
+            @score_cache.clear
+            info("Merging #{best[0]} and #{best[1]} [#{best[2]}]")
             # this one should never fail because its score was positive before
             raise "Unexpected case" unless merge_and_determinize(best[0], best[1])
           end
-          # redblue does not guarantee that it will not merge a state of lower rank
+          # blue_fringe does not guarantee that it will not merge a state of lower rank
           # with a kernel state. The kernel should then be update at each step to keep
           # lowest indices for the whole kernel, and we sort it
           @kernel = @kernel.collect{|k| @ufds.find(k)}.sort
@@ -226,13 +227,13 @@ module Stamina
       #   given as input.
       #
       # Remarks:
-      # - This instance version of RedBlue.execute is not intended to be used directly and
+      # - This instance version of BlueFringe.execute is not intended to be used directly and
       #   is mainly provided for testing purposes. Please use the class variant of this
       #   method if possible.
       #
       def execute(sample)
         # create union-find
-        puts "Creating PTA and UnionFind structure" if @options[:verbose]
+        info("Creating PTA and UnionFind structure")
         ufds = sample2ufds(sample)
         # refine it
         ufds = main(ufds)
@@ -255,10 +256,10 @@ module Stamina
       #   given as input.
       #
       def self.execute(sample, options={})
-        RedBlue.new(options).execute(sample)
+        BlueFringe.new(options).execute(sample)
       end
-    end # class RedBlue
+    end # class BlueFringe
   end # module Induction
 end # module Stamina

data/lib/stamina/induction/commons.rb CHANGED Viewed

@@ -2,20 +2,45 @@ module Stamina
   module Induction
     #
-    # Defines common utilities used by rpni and redblue. About acronyms:
+    # Defines common utilities used by rpni and blue_fringe. About acronyms:
     # - _pta_ stands for Prefix Tree Acceptor
     # - _ufds_ stands for Union-Find Data Structure
     #
-    # Methods pta2ufds, sample2pta and sample2ufds are simply conversion methods used
-    # when the induction algorithm starts (executed on a sample, it first built a pta
-    # then convert it to a union find). Method ufds2pta is used when the algorithm ends,
-    # to convert refined union find to a dfa.
+    # Methods pta2ufds and sample2ufds are simply conversion methods used when the induction
+    # algorithm starts (executed on a sample, it first built a pta then convert it to a union
+    # find). Method ufds2dfa is used when the algorithm ends, to convert refined union find to
+    # a dfa.
     #
     # The merge_user_data method is probably the most important as it actually computes
     # the merging of two states and build information about merging for determinization.
     #
     module Commons
+      DEFAULT_OPTIONS = {
+        :verbose    => false,
+        :verbose_io => $stderr
+      }
+      # Additional options of the algorithm
+      attr_reader :options
+      # Is the verbose mode on ?
+      def verbose?
+        @verbose ||= !!options[:verbose]
+      end
+      def verbose_io
+        @verbose_io ||= options[:verbose_io] || $stderr
+      end
+      # Display an information message (when verbose)
+      def info(msg)
+        if verbose?
+          verbose_io << msg << "\n"
+          verbose_io.flush
+        end
+      end
       #
       # Factors and returns a UnionFind data structure from a PTA, keeping natural order
       # of its states for union-find elements. The resulting UnionFind contains a Hash as
@@ -47,46 +72,7 @@ module Stamina
       # non accepting and error.
       #
       def sample2pta(sample)
-        Automaton.new do |pta|
-          initial_state = add_state(:initial => true, :accepting => false)
-          # Fill the PTA with each string
-          sample.each do |str|
-            # split string using the dfa
-            parsed, reached, remaining = pta.dfa_split(str, initial_state)
-            # remaining symbols are not empty -> build the PTA
-            unless remaining.empty?
-              remaining.each do |symbol|
-                newone = pta.add_state(:initial => false, :accepting => false, :error => false)
-                pta.connect(reached, newone, symbol)
-                reached = newone
-              end
-            end
-            # flag state
-            str.positive? ? reached.accepting! : reached.error!
-            # check consistency, should not arrive as Sample does not allow
-            # inconsistencies. Should appear only if _sample_ is not a Sample
-            # instance but some other enumerable.
-            raise(InconsistencyError, "Inconsistent sample on #{str}", caller)\
-              if (reached.error? and reached.accepting?)
-          end
-          # Reindex states by applying BFS
-          to_index, index = [initial_state], 0
-          until to_index.empty?
-            state = to_index.shift
-            state[:__index__] = index
-            state.out_edges.sort{|e,f| e.symbol<=>f.symbol}.each {|e| to_index << e.target}
-            index += 1
-          end
-          # Force the automaton to reindex
-          pta.order_states{|s0,s1| s0[:__index__]<=>s1[:__index__]}
-          # Remove marks
-          pta.states.each{|s| s.remove_mark(:__index__)}
-        end
+        sample.to_pta
       end
       #
@@ -167,4 +153,4 @@ module Stamina
     end # module Commons
   end # module Induction
-end # module Stamina
+end # module Stamina

data/lib/stamina/induction/rpni.rb CHANGED Viewed

@@ -31,14 +31,12 @@ module Stamina
       # Union-find data structure used internally
       attr_reader :ufds
-      # Additional options of the algorithm
-      attr_reader :options
       # Creates an algorithm instance with given options.
       def initialize(options={})
-        @options = options
+        raise ArgumentError, "Invalid options #{options.inspect}" unless options.is_a?(Hash)
+        @options = DEFAULT_OPTIONS.merge(options)
       end
       #
       # Merges a state of rank j with a state of lower rank i. This merge method
       # includes merging for determinization.
@@ -118,7 +116,7 @@ module Stamina
       #
       def main(ufds)
         @ufds = ufds
-        puts "Starting RPNI (#{@ufds.size} states)" if @options[:verbose]
+        info("Starting RPNI (#{@ufds.size} states)")
         # First loop, iterating all PTA states
         (1...@ufds.size).each do |i|
           # we ignore those that have been previously merged
@@ -130,7 +128,7 @@ module Stamina
             # simply break the loop if it works!
             success = successfull_merge_or_nothing(i,j)
             if success
-              puts "#{i} and #{j} successfully merged" if @options[:verbose]
+              info("#{i} and #{j} successfully merged")
               break
             end
           end # j loop
@@ -156,7 +154,7 @@ module Stamina
       #
       def execute(sample)
         # create union-find
-        puts "Creating PTA and UnionFind structure" if @options[:verbose]
+        info("Creating PTA and UnionFind structure")
         ufds = sample2ufds(sample)
         # refine it
         ufds = main(ufds)
@@ -185,4 +183,4 @@ module Stamina
     end # class RPNI
   end # module Induction
-end # module Stamina
+end # module Stamina

data/lib/stamina/induction/union_find.rb CHANGED Viewed

@@ -86,7 +86,7 @@ module Stamina
     # == Transactional support
     #
     # The main aim of this UnionFind is to make the implementation induction algorithms
-    # Stamina::Induction::RPNI and Stamina::Induction::RedBlue (sufficiently) efficient,
+    # Stamina::Induction::RPNI and Stamina::Induction::BlueFringe (sufficiently) efficient,
     # simple and readable. These algorithms rely on a try-and-error strategy are must be
     # able to revert the changes they have made during their last try. The transaction
     # support implemented by this data structure helps them achieving this goal. For this
@@ -129,7 +129,7 @@ module Stamina
         # Duplicates this node, ensuring that future changes will not affect the copy.
         # Please note that the user data itself is not duplicated and is not expected
         # to change. This property (not changing user data) is respected by the RPNI
-        # and RedBlue classes as implemented in this library.
+        # and BlueFringe classes as implemented in this library.
         #
         def dup
           Node.new(@parent, @data)
@@ -374,4 +374,4 @@ module Stamina
     end # class UnionFind
   end # module Induction
-end # module Stamina
+end # module Stamina

data/lib/stamina/loader.rb CHANGED Viewed

	@@ -0,0 +1 @@
1	+ require "quickl"

data/lib/stamina/sample.rb CHANGED Viewed

@@ -28,9 +28,10 @@ module Stamina
     #
     # Creates an empty sample.
     #
-    def initialize()
+    def initialize(strings = nil)
       @strings = []
       @size, @positive_count, @negative_count = 0, 0, 0
+      strings.each{|s| self << s } unless strings.nil?
     end
     #
@@ -175,6 +176,16 @@ module Stamina
       end
       signature
     end
+    #
+    # Takes only a given proportion of this sample and returns it as a new Sample.
+    #
+    def take(proportion = 0.5)
+      taken = Stamina::Sample.new
+      each_positive{|s| taken << s if Kernel.rand < proportion}
+      each_negative{|s| taken << s if Kernel.rand < proportion}
+      taken
+    end
     #
     # Prints an ADL description of this sample on the buffer.
@@ -184,7 +195,73 @@ module Stamina
     end
     alias :to_s :to_adl
     alias :inspect :to_adl
+    #
+    # Converts a Sample to an (augmented) prefix tree acceptor. This method ensures
+    # that the states of the PTA are in lexical order, according to the <code><=></code>
+    # operator defined on symbols. States reached by negative strings are tagged as
+    # non accepting and error.
+    #
+    def self.to_pta(sample)
+      thepta = Automaton.new do |pta|
+        initial_state = add_state(:initial => true, :accepting => false)
+        # Fill the PTA with each string
+        sample.each do |str|
+          # split string using the dfa
+          parsed, reached, remaining = pta.dfa_split(str, initial_state)
-  end # class Sample
+          # remaining symbols are not empty -> build the PTA
+          unless remaining.empty?
+            remaining.each do |symbol|
+              newone = pta.add_state(:initial => false, :accepting => false, :error => false)
+              pta.connect(reached, newone, symbol)
+              reached = newone
+            end
+          end
+          # flag state
+          str.positive? ? reached.accepting! : reached.error!
+          # check consistency, should not arrive as Sample does not allow
+          # inconsistencies. Should appear only if _sample_ is not a Sample
+          # instance but some other enumerable.
+          raise(InconsistencyError, "Inconsistent sample on #{str}", caller)\
+            if (reached.error? and reached.accepting?)
+        end
+        # Reindex states by applying BFS
+        to_index, index = [initial_state], 0
+        until to_index.empty?
+          state = to_index.shift
+          state[:__index__] = index
+          state.out_edges.sort{|e,f| e.symbol<=>f.symbol}.each{|e| to_index << e.target}
+          index += 1
+        end
+      end
+      # Now we rebuild a fresh one with states in order.
+      # This look more efficient that reordering states of the PTA
+      Automaton.new do |ordered|
+        ordered.add_n_states(thepta.state_count)
+        thepta.each_state do |pta_state|
+          source = ordered.ith_state(pta_state[:__index__])
+          source.initial!   if pta_state.initial?
+          source.accepting! if pta_state.accepting?
+          source.error!     if pta_state.error?
+          pta_state.out_edges.each do |e|
+            target = ordered.ith_state(e.target[:__index__])
+            ordered.connect(source, target, e.symbol)
+          end
+        end
+      end
+    end
+    # Convenient shortcut for Sample.to_pta(sample_instance)
+    def to_pta
+      Sample.to_pta(self)
+    end
+  end # class Sample
 end # module Stamina