RubyGems - stamina - Versions diffs - 0.4.0 → 0.5.0 - Mend

stamina 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

data/CHANGELOG.md +22 -5
data/LICENCE.md +2 -2
data/bin/stamina +1 -7
data/lib/stamina.rb +10 -19
metadata +54 -333
data/.gemtest +0 -0
data/Gemfile +0 -2
data/Gemfile.lock +0 -37
data/Manifest.txt +0 -16
data/README.md +0 -78
data/Rakefile +0 -23
data/example/adl/automaton.adl +0 -49
data/example/adl/sample.adl +0 -53
data/example/basic/characteristic_sample.adl +0 -32
data/example/basic/target.adl +0 -9
data/example/competition/31_test.adl +0 -1500
data/example/competition/31_training.adl +0 -1759
data/lib/stamina/abbadingo.rb +0 -2
data/lib/stamina/abbadingo/random_dfa.rb +0 -48
data/lib/stamina/abbadingo/random_sample.rb +0 -146
data/lib/stamina/adl.rb +0 -298
data/lib/stamina/automaton.rb +0 -1263
data/lib/stamina/automaton/complete.rb +0 -36
data/lib/stamina/automaton/equivalence.rb +0 -55
data/lib/stamina/automaton/metrics.rb +0 -78
data/lib/stamina/automaton/minimize.rb +0 -25
data/lib/stamina/automaton/minimize/hopcroft.rb +0 -116
data/lib/stamina/automaton/minimize/pitchies.rb +0 -64
data/lib/stamina/automaton/strip.rb +0 -16
data/lib/stamina/automaton/walking.rb +0 -363
data/lib/stamina/classifier.rb +0 -52
data/lib/stamina/command.rb +0 -45
data/lib/stamina/command/abbadingo_dfa.rb +0 -81
data/lib/stamina/command/abbadingo_samples.rb +0 -40
data/lib/stamina/command/adl2dot.rb +0 -71
data/lib/stamina/command/classify.rb +0 -48
data/lib/stamina/command/help.rb +0 -27
data/lib/stamina/command/infer.rb +0 -141
data/lib/stamina/command/metrics.rb +0 -51
data/lib/stamina/command/robustness.rb +0 -22
data/lib/stamina/command/score.rb +0 -35
data/lib/stamina/errors.rb +0 -23
data/lib/stamina/ext/math.rb +0 -20
data/lib/stamina/induction/blue_fringe.rb +0 -265
data/lib/stamina/induction/commons.rb +0 -156
data/lib/stamina/induction/rpni.rb +0 -186
data/lib/stamina/induction/union_find.rb +0 -377
data/lib/stamina/input_string.rb +0 -123
data/lib/stamina/loader.rb +0 -1
data/lib/stamina/markable.rb +0 -42
data/lib/stamina/sample.rb +0 -267
data/lib/stamina/scoring.rb +0 -213
data/lib/stamina/utils.rb +0 -1
data/lib/stamina/utils/decorate.rb +0 -81
data/lib/stamina/version.rb +0 -14
data/stamina.gemspec +0 -191
data/stamina.noespec +0 -32
data/tasks/debug_mail.rake +0 -78
data/tasks/debug_mail.txt +0 -13
data/tasks/gem.rake +0 -68
data/tasks/spec_test.rake +0 -79
data/tasks/unit_test.rake +0 -77
data/tasks/yard.rake +0 -51
data/test/stamina/abbadingo/random_dfa_test.rb +0 -16
data/test/stamina/abbadingo/random_sample_test.rb +0 -78
data/test/stamina/adl_test.rb +0 -516
data/test/stamina/automaton/classifier_test.rb +0 -259
data/test/stamina/automaton/complete_test.rb +0 -58
data/test/stamina/automaton/equivalence_test.rb +0 -120
data/test/stamina/automaton/metrics_test.rb +0 -36
data/test/stamina/automaton/minimize/hopcroft_test.rb +0 -15
data/test/stamina/automaton/minimize/minimize_test.rb +0 -55
data/test/stamina/automaton/minimize/pitchies_test.rb +0 -15
data/test/stamina/automaton/minimize/rice_edu_10.adl +0 -16
data/test/stamina/automaton/minimize/rice_edu_10.min.adl +0 -13
data/test/stamina/automaton/minimize/rice_edu_13.adl +0 -13
data/test/stamina/automaton/minimize/rice_edu_13.min.adl +0 -7
data/test/stamina/automaton/minimize/should_strip_1.adl +0 -8
data/test/stamina/automaton/minimize/should_strip_1.min.adl +0 -6
data/test/stamina/automaton/minimize/unknown_1.adl +0 -16
data/test/stamina/automaton/minimize/unknown_1.min.adl +0 -12
data/test/stamina/automaton/strip_test.rb +0 -36
data/test/stamina/automaton/to_dot_test.rb +0 -64
data/test/stamina/automaton/walking/dfa_delta_test.rb +0 -39
data/test/stamina/automaton/walking_test.rb +0 -206
data/test/stamina/automaton_additional_test.rb +0 -190
data/test/stamina/automaton_test.rb +0 -1104
data/test/stamina/exit.rb +0 -3
data/test/stamina/induction/blue_fringe_test.rb +0 -83
data/test/stamina/induction/induction_test.rb +0 -70
data/test/stamina/induction/redblue_mergesamestatebug_expected.adl +0 -19
data/test/stamina/induction/redblue_mergesamestatebug_pta.dot +0 -64
data/test/stamina/induction/redblue_mergesamestatebug_sample.adl +0 -9
data/test/stamina/induction/redblue_universal_expected.adl +0 -4
data/test/stamina/induction/redblue_universal_sample.adl +0 -5
data/test/stamina/induction/rpni_inria_expected.adl +0 -7
data/test/stamina/induction/rpni_inria_sample.adl +0 -9
data/test/stamina/induction/rpni_test.rb +0 -129
data/test/stamina/induction/rpni_test_pta.dot +0 -22
data/test/stamina/induction/rpni_universal_expected.adl +0 -4
data/test/stamina/induction/rpni_universal_sample.adl +0 -4
data/test/stamina/induction/union_find_test.rb +0 -124
data/test/stamina/input_string_test.rb +0 -323
data/test/stamina/markable_test.rb +0 -70
data/test/stamina/randdfa.adl +0 -66
data/test/stamina/sample.adl +0 -4
data/test/stamina/sample_classify_test.rb +0 -149
data/test/stamina/sample_test.rb +0 -290
data/test/stamina/scoring_test.rb +0 -63
data/test/stamina/small_dfa.dot +0 -16
data/test/stamina/small_dfa.gif +0 -0
data/test/stamina/small_nfa.dot +0 -18
data/test/stamina/small_nfa.gif +0 -0
data/test/stamina/stamina_test.rb +0 -80
data/test/stamina/utils/decorate_test.rb +0 -65
data/test/test_all.rb +0 -7

data/lib/stamina/abbadingo.rb DELETED Viewed

	@@ -1,2 +0,0 @@
1	- require 'stamina/abbadingo/random_dfa'
2	- require 'stamina/abbadingo/random_sample'

data/lib/stamina/abbadingo/random_dfa.rb DELETED Viewed

@@ -1,48 +0,0 @@
-module Stamina
-  module Abbadingo
-    #
-    # Generates a random DFA using the Abbadingo protocol.
-    #
-    class RandomDFA
-      # Number of wished states
-      attr_reader :state_count
-      # Accepting ratio
-      attr_reader :accepting_ratio
-      # Creates an algorithm instance with default options
-      def initialize(state_count = 64, accepting_ratio = 0.5)
-        @state_count = state_count
-        @accepting_ratio = accepting_ratio
-      end
-      def execute
-        dfa = Automaton.new
-        # Generate 5/4*state_count states
-        (state_count.to_f * 5.0 / 4.0).to_i.times do
-          dfa.add_state(:initial   => false,
-                        :accepting => (Kernel.rand <= accepting_ratio),
-                        :error     => false)
-        end
-        # Generate all edges
-        dfa.each_state do |source|
-          ["0", "1"].each do |symbol|
-            target = dfa.ith_state(Kernel.rand(dfa.state_count))
-            dfa.connect(source, target, symbol)
-          end
-        end
-        # Choose an initial state
-        dfa.ith_state(Kernel.rand(dfa.state_count)).initial!
-        # Minimize the automaton and return it
-        Stamina::Automaton::Minimize::Pitchies.execute(dfa)
-      end
-    end # class RandomDFA
-  end # module Abbadingo
-end # module Stamina

data/lib/stamina/abbadingo/random_sample.rb DELETED Viewed

@@ -1,146 +0,0 @@
-module Stamina
-  module Abbadingo
-    #
-    # Generates a random Sample using the Abbadingo protocol.
-    #
-    class RandomSample
-      #
-      # Implements an enumerator for binary strings whose length lies between 0
-      # and max_length (passed at construction).
-      #
-      # The enumerator guarantees that strings are sampled with an uniform distribution
-      # among them. As the number of strings of a given length is an exponential
-      # function, this means that you've got 50% change of having a string of length
-      # max_length, 25% of max_length - 1, 12.5% of max_length - 2 and so on.
-      #
-      # How to use it?
-      #
-      #   # create for strings between 0 and 10 symbols, inclusive
-      #   enum = Stamina::Abbadingo::StringEnumerator.new(10)
-      #
-      #   # this is how to generate strings while a predicate is true
-      #   enum.each do |s|
-      #     # s is an array of binary integer symbols (0 or 1)
-      #     # true for continuing, false otherwise
-      #     return (true || false)
-      #   end
-      #
-      #   # this is how to generate a fixed number of strings
-      #   (1..1000).collect{ enum.one }
-      #
-      # How does it work? Well, the distribution of strings is as follows:
-      #
-      #    length     [n]b_strings        [c]umul       log2(n)         log2(c)    log2(c).floor
-      #                   (2**n)         2**(n+1)-1
-      #      0               1               1       0.0000000000       0.000000        0
-      #      1               2               3       1.0000000000       1.584963        1
-      #      2               4               7       2.0000000000       2.807355        2
-      #      3               8              15       3.0000000000       3.906891        3
-      #      4              16              31       4.0000000000       4.954196        4
-      #      5              32              63       5.0000000000       5.977280        5
-      #
-      # where _cumul_ is the total number of string upto _length_ symbols.
-      #
-      # Therefore, the idea is to see each string has an identifier, say _x_,
-      # between 1 and 2**(max_length+1)-1 (see max).
-      #   * The length of the _x_th string is log2(x).floor (see length_for)
-      #   * The string itself is the binary decomposition of x, up to length_for(x)
-      #     symbols (see string_for)
-      #
-      # As those identifiers naturally respect the exponential distribution, sampling
-      # the strings is the same as taking string_for(x) for random x upto _max_.
-      #
-      class StringEnumerator
-        include Enumerable
-        # Maximal length of a string
-        attr_reader :max_length
-        def initialize(max_length = 16)
-          @max_length = max_length
-        end
-        #
-        # Returns the length of the string whose identifier is _x_ (> 0)
-        #
-        def length_for(x)
-          Math.log2(x).floor
-        end
-        #
-        # Returns the binary string whose identifier is _x_ (> 0)
-        #
-        def string_for(x)
-          length = length_for(x)
-          (0..length-1).collect{|i| ((x >> i) % 2).to_s}
-        end
-        #
-        # Returns the maximum identifier, which is also the number of strings
-        # up to max_length symbols
-        #
-        def max
-          @max ||= 2 ** (max_length+1) - 1
-        end
-        #
-        # Generates a string at random
-        #
-        def one
-          string_for(1+Kernel.rand(max))
-        end
-        #
-        # Yields the block with a random string, until the block return false
-        # or nil.
-        #
-        def each
-          begin
-            cont = yield(one)
-          end while cont
-        end
-      end # class StringEnumerator
-      #
-      # Generates a Sample instance with _nb_ strings randomly sampled with a
-      # uniform distribution over all strings up
-      #
-      def self.execute(classifier, max_length = classifier.depth + 3)
-        enum = StringEnumerator.new(max_length)
-        # We generate 1800 strings for the test set plus n^2/2 strings for
-        # the training set. If there are no enough strings available, we generate
-        # the maximum we can
-        seen = {}
-        nb = Math.min(1800 + (classifier.state_count**2), enum.max)
-        # Let's go now
-        enum.each do |s|
-          seen[s] = true
-          seen.size < nb
-        end
-        # Make them
-        strings = seen.keys.collect{|s| InputString.new(s, classifier.accepts?(s))}
-        pos, neg = strings.partition{|s| s.positive?}
-        # Split them, 1800 in test and the rest in training set
-        if (pos.size > 900) && (neg.size > 900)
-          pos_test, pos_training = pos[0...900], pos[900..-1]
-          neg_test, neg_training = neg[0...900], neg[900..-1]
-        else
-          pos_test, pos_training = pos.partition{|s| Kernel.rand < 0.5}
-          neg_test, neg_training = neg.partition{|s| Kernel.rand < 0.5}
-        end
-        flusher = lambda{|x,y| Kernel.rand < 0.5 ? 1 : -1}
-        training = (pos_training + neg_training).sort &flusher
-        test = (pos_test + neg_test).sort &flusher
-        [Sample.new(training), Sample.new(test)]
-      end
-    end # class RandomSample
-  end # module Abbadingo
-end # module Stamina

data/lib/stamina/adl.rb DELETED Viewed

@@ -1,298 +0,0 @@
-module Stamina
-  #
-  # Automaton Description Language module. This module provides parsing and
-  # printing methods for automata and samples. Documentation of the file format
-  # used for an automaton is given in parse_automaton; file format for samples is
-  # documented in parse_sample.
-  #
-  # Methods of this module are not intended to be included by a class but invoked
-  # on the module instead:
-  #
-  #   begin
-  #     dfa = Stamina::ADL.parse_automaton_file("my_automaton.adl")
-  #   rescue ADL::ParseError => ex
-  #     puts "Oops, the ADL automaton file seems corrupted..."
-  #   end
-  #
-  # == Detailed API
-  module ADL
-    #################################################################################
-    # Automaton Section                                                             #
-    #################################################################################
-    #
-    # Parses a given automaton description and returns an Automaton instance.
-    #
-    # Raises:
-    # - ArgumentError unless _descr_ is an IO object or a String.
-    # - ADL::ParseError if the ADL automaton format is not respected.
-    #
-    # ADL provides a really simple grammar to describe automata. Here is a succint
-    # example (full documentation of the ADL automaton grammar can be found in
-    # the self-documenting example/adl/automaton.adl file).
-    #
-    #    # Some header comments: tool which has generated this automaton,
-    #    # maybe a date or other tool options ...
-    #    # here: 'this automaton accepts the a(ba)* regular language'
-    #    2 2
-    #    0 true false
-    #    1 false true
-    #    0 1 a
-    #    1 0 b
-    #
-    def self.parse_automaton(descr)
-      automaton = nil
-      ADL::to_io(descr) do |io|
-        state_count, edge_count = nil, nil
-        state_read, edge_read = 0, 0
-        states = {}
-        mode = :header
-        automaton = Automaton.new do |fa|
-          # parse each description line
-          line_number = 1
-          io.each_line do |l|
-            index = l.index('#')
-            l = l[0,index] if index
-            l = l.strip
-            next if l.empty? or l[0,1]=='#'
-            case mode
-            when :header
-              # looking for |state_count edge_count|
-              raise(ADL::ParseError,
-                    "Parse error line #{line_number}: 'state_count edge_count' expected, "\
-                    "'#{l}' found.") unless /^(\d+)\s+(\d+)$/ =~ l
-              state_count, edge_count = $1.to_i, $2.to_i
-              mode = :states
-            when :states
-              # looking for |number initial accepting|
-              raise(ADL::ParseError,
-                    "Parse error line #{line_number}: state definition expected, "\
-                    "'#{l}' found.") unless /^(\S+)\s+(true|false)\s+(true|false)(\s+(true|false))?$/ =~ l
-              id, initial, accepting, error = $1, $2, $3, $5
-              initial, accepting, error = ("true"==initial), ("true"==accepting), ("true"==error)
-              state = fa.add_state(:initial => initial, :accepting => accepting, :error => error)
-              state[:name]=id.to_s
-              states[id] = state
-              state_read += 1
-              mode = (edge_count==0 ? :end : :edges) if state_read==state_count
-            when :edges
-              # looking for |source target symbol|
-              raise(ADL::ParseError,
-                    "Parse error line #{line_number}: edge definition expected, "\
-                    "'#{l}' found.") unless /^(\S+)\s+(\S+)\s+(\S+)$/ =~ l
-              source, target, symbol = $1, $2, $3
-              raise(ADL::ParseError,
-                    "Parse error line #{line_number}: no such state #{source}") \
-                    unless states[source]
-              raise(ADL::ParseError,
-                    "Parse error line #{line_number}: no such state #{target}") \
-                    unless states[target]
-              fa.connect(states[source], states[target], {:symbol => symbol})
-              edge_read += 1
-              mode = :end if edge_read==edge_count
-            when :end
-              raise(ADL::ParseError,
-                    "Parse error line #{line_number}: trailing data found '#{l}")
-            end # case mode
-            line_number += 1
-          end
-          raise(ADL::ParseError, "Parse error: #{state_count} states annouced, "\
-                               "#{state_read} found.") if state_count != state_read
-          raise(ADL::ParseError, "Parse error: #{edge_count} edges annouced, "\
-                               "#{edge_read} found.") if edge_count != edge_read
-        end # Automaton.new
-      end
-      return automaton
-    end # def self.parse
-    #
-    # Parses an automaton file _f_.
-    #
-    # Shortcut for:
-    #   File.open(f, 'r') do |io|
-    #     Stamina::ADL.parse_automaton(io)
-    #   end
-    #
-    def self.parse_automaton_file(f)
-      automaton = nil
-      File.open(f) do |file|
-        automaton = ADL::parse_automaton(file)
-      end
-      automaton
-    end
-    #
-    # Prints an automaton to a buffer (responding to <code>:&lt;&lt;</code>) in ADL
-    # format. Returns the buffer itself.
-    #
-    def self.print_automaton(fa, buffer="")
-      buffer << "#{fa.state_count.to_s} #{fa.edge_count.to_s}" << "\n"
-      fa.states.each do |s|
-        buffer << "#{s.index.to_s} #{s.initial?} #{s.accepting?}" << (s.error? ? " true" : "") << "\n"
-      end
-      fa.edges.each do |e|
-        buffer << "#{e.source.index.to_s} #{e.target.index.to_s} #{e.symbol.to_s}" << "\n"
-      end
-      buffer
-    end
-    #
-    # Prints an automaton to a file whose path is provided.
-    #
-    # Shortcut for:
-    #   File.open(file, 'w') do |io|
-    #     print_automaton(fa, io)
-    #  end
-    #
-    def self.print_automaton_to_file(fa, file)
-      File.open(file, 'w') do |io|
-        print_automaton(fa, io)
-      end
-    end
-    #################################################################################
-    # String and Sample Section                                                     #
-    #################################################################################
-    #
-    # Parses an input string _str_ and returns a InputString instance. Format of
-    # input strings is documented in parse_sample. _str_ is required to be a ruby
-    # String.
-    #
-    # Raises:
-    # - ADL::ParseError if the ADL string format is not respected.
-    #
-    def self.parse_string(str)
-      symbols = str.split(' ')
-      case symbols[0]
-        when '+'
-          symbols.shift
-          InputString.new symbols, true, false
-        when '-'
-          symbols.shift
-          InputString.new symbols, false, false
-        when '?'
-          symbols.shift
-          InputString.new symbols, nil, false
-        else
-          raise ADL::ParseError, "Invalid string format #{str}", caller
-      end
-    end
-    #
-    # Parses the sample provided by _descr_. When a block is provided, yields it with
-    # InputString instances and ignores the sample argument. Otherwise, fills the sample
-    # (any object responding to <code><<</code>) with string, creating a fresh new
-    # one (as a Sample instance) if sample is nil.
-    #
-    # ADL provides a really simple grammar to describe samples (here is a succint
-    # example, the full documentation of the sample grammar can be found in the
-    # self-documenting example/adl/sample.adl file):
-    #
-    #    #
-    #    # Some header comments: tool which has generated this sample,
-    #    # maybe a date or other tool options ...
-    #    # here: 'this sample is caracteristic for the a(ba)* regular language'
-    #    #
-    #    # Positive, Negative, Unlabeled strings become with +, -, ?, respectively
-    #    # Empty lines and lines becoming with # are simply ignored.
-    #    #
-    #    -
-    #    + a
-    #    - a b
-    #    + a b a
-    #
-    # Raises:
-    # - ArgumentError unless _descr_ argument is an IO object or a String.
-    # - ADL::ParseError if the ADL sample format is not respected.
-    # - InconsistencyError if the sample is not consistent (see Sample)
-    #
-    def self.parse_sample(descr, sample=nil)
-      sample = Sample.new if (sample.nil? and not block_given?)
-      ADL::to_io(descr) do |io|
-        io.each_line do |l|
-          l = l.strip
-          next if l.empty? or l[0,1]=='#'
-          if sample.nil? and block_given?
-            yield parse_string(l)
-          else
-            sample << parse_string(l)
-          end
-        end
-      end
-      sample
-    end
-    #
-    # Parses an automaton file _f_.
-    #
-    # Shortuct for:
-    #   File.open(f) do |file|
-    #      sample = ADL::parse_sample(file, sample)
-    #   end
-    #
-    def self.parse_sample_file(f, sample=nil)
-      File.open(f) do |file|
-        sample = ADL::parse_sample(file, sample)
-      end
-      sample
-    end
-    #
-    # Prints a sample in ADL format on a buffer. Sample argument is expected to be
-    # an object responding to each, yielding InputString instances. Buffer is expected
-    # to be an object responding to <code><<</code>.
-    #
-    def self.print_sample(sample, buffer="")
-      sample.each do |str|
-        buffer << str.to_s << "\n"
-      end
-    end
-    #
-    # Prints a sample in a file.
-    #
-    # Shortcut for:
-    #   File.open(file, 'w') do |io|
-    #     print_sample(sample, f)
-    #   end
-    #
-    def self.print_sample_in_file(sample, file)
-      File.open(file, 'w') do |f|
-        print_sample(sample, f)
-      end
-    end
-    ### private section ##########################################################
-    private
-    #
-    # Converts a parsable argument to an IO object or raises an ArgumentError.
-    #
-    def self.to_io(descr)
-      case descr
-      when IO
-        yield descr
-      when String
-        yield StringIO.new(descr)
-      else
-        raise ArgumentError, "IO instance expected, #{descr.class} received", caller
-      end
-    end
-  end # module ADL
-end # module Stamina