RubyGems - stamina - Versions diffs - 0.3.1 → 0.4.0 - Mend

stamina 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

data/CHANGELOG.md +24 -0
data/Gemfile.lock +5 -1
data/bin/stamina +10 -0
data/lib/stamina.rb +2 -1
data/lib/stamina/abbadingo.rb +2 -0
data/lib/stamina/abbadingo/random_dfa.rb +48 -0
data/lib/stamina/abbadingo/random_sample.rb +146 -0
data/lib/stamina/adl.rb +6 -6
data/lib/stamina/automaton.rb +29 -4
data/lib/stamina/automaton/complete.rb +36 -0
data/lib/stamina/automaton/equivalence.rb +55 -0
data/lib/stamina/automaton/metrics.rb +8 -1
data/lib/stamina/automaton/minimize.rb +25 -0
data/lib/stamina/automaton/minimize/hopcroft.rb +116 -0
data/lib/stamina/automaton/minimize/pitchies.rb +64 -0
data/lib/stamina/automaton/strip.rb +16 -0
data/lib/stamina/automaton/walking.rb +46 -19
data/lib/stamina/command.rb +45 -0
data/lib/stamina/command/abbadingo_dfa.rb +81 -0
data/lib/stamina/command/abbadingo_samples.rb +40 -0
data/lib/stamina/command/adl2dot.rb +71 -0
data/lib/stamina/command/classify.rb +48 -0
data/lib/stamina/command/help.rb +27 -0
data/lib/stamina/command/infer.rb +141 -0
data/lib/stamina/command/metrics.rb +51 -0
data/lib/stamina/command/robustness.rb +22 -0
data/lib/stamina/command/score.rb +35 -0
data/lib/stamina/errors.rb +4 -1
data/lib/stamina/ext/math.rb +20 -0
data/lib/stamina/induction/{redblue.rb → blue_fringe.rb} +29 -28
data/lib/stamina/induction/commons.rb +32 -46
data/lib/stamina/induction/rpni.rb +7 -9
data/lib/stamina/induction/union_find.rb +3 -3
data/lib/stamina/loader.rb +1 -0
data/lib/stamina/sample.rb +79 -2
data/lib/stamina/scoring.rb +37 -0
data/lib/stamina/version.rb +2 -2
data/stamina.gemspec +2 -1
data/stamina.noespec +9 -12
data/test/stamina/abbadingo/random_dfa_test.rb +16 -0
data/test/stamina/abbadingo/random_sample_test.rb +78 -0
data/test/stamina/adl_test.rb +27 -2
data/test/stamina/automaton/complete_test.rb +58 -0
data/test/stamina/automaton/equivalence_test.rb +120 -0
data/test/stamina/automaton/minimize/hopcroft_test.rb +15 -0
data/test/stamina/automaton/minimize/minimize_test.rb +55 -0
data/test/stamina/automaton/minimize/pitchies_test.rb +15 -0
data/test/stamina/automaton/minimize/rice_edu_10.adl +16 -0
data/test/stamina/automaton/minimize/rice_edu_10.min.adl +13 -0
data/test/stamina/automaton/minimize/rice_edu_13.adl +13 -0
data/test/stamina/automaton/minimize/rice_edu_13.min.adl +7 -0
data/test/stamina/automaton/minimize/should_strip_1.adl +8 -0
data/test/stamina/automaton/minimize/should_strip_1.min.adl +6 -0
data/test/stamina/automaton/minimize/unknown_1.adl +16 -0
data/test/stamina/automaton/minimize/unknown_1.min.adl +12 -0
data/test/stamina/automaton/strip_test.rb +36 -0
data/test/stamina/automaton/walking/dfa_delta_test.rb +39 -0
data/test/stamina/automaton_test.rb +13 -1
data/test/stamina/induction/{redblue_test.rb → blue_fringe_test.rb} +22 -22
data/test/stamina/sample_test.rb +75 -0
data/test/stamina/stamina_test.rb +13 -2
metadata +98 -23
data/bin/adl2dot +0 -12
data/bin/classify +0 -12
data/bin/redblue +0 -12
data/bin/rpni +0 -12
data/lib/stamina/command/adl2dot_command.rb +0 -73
data/lib/stamina/command/classify_command.rb +0 -57
data/lib/stamina/command/redblue_command.rb +0 -58
data/lib/stamina/command/rpni_command.rb +0 -58
data/lib/stamina/command/stamina_command.rb +0 -79

data/lib/stamina/command/abbadingo_samples.rb ADDED Viewed

@@ -0,0 +1,40 @@
+module Stamina
+  class Command
+    #
+    # Generates samples following Abbadingo's protocol
+    #
+    # SYNOPSIS
+    #   #{program_name} #{command_name} target.adl
+    #
+    # OPTIONS
+    # #{summarized_options}
+    #
+    class AbbadingoSamples < Quickl::Command(__FILE__, __LINE__)
+      # Install options
+      options do |opt|
+      end # options
+      # Command execution
+      def execute(args)
+        raise Quickl::Help unless args.size == 1
+        # Loads the target automaton
+        target_file = args.first
+        basename = File.basename(target_file, '.adl')
+        dirname = File.dirname(target_file)
+        target = Stamina::ADL::parse_automaton_file(target_file)
+        require 'stamina/abbadingo'
+        training, test = Stamina::Abbadingo::RandomSample.execute(target)
+        # Flush results aside the target automaton file
+        Stamina::ADL::print_sample_in_file(training, File.join(dirname, "#{basename}-training.adl"))
+        Stamina::ADL::print_sample_in_file(test,     File.join(dirname, "#{basename}-test.adl"))
+      end
+    end # class AbbadingoSamples
+  end # class Command
+end # module Stamina

data/lib/stamina/command/adl2dot.rb ADDED Viewed

@@ -0,0 +1,71 @@
+module Stamina
+  class Command
+    #
+    # Prints an automaton expressed in ADL in dot (or gif) format
+    #
+    # SYNOPSIS
+    #   #{program_name} #{command_name} automaton.adl
+    #
+    # OPTIONS
+    # #{summarized_options}
+    #
+    class Adl2dot < Quickl::Command(__FILE__, __LINE__)
+      include Robustness
+      attr_reader :gif_output
+      # Install options
+      options do |opt|
+        @output_file = nil
+        opt.on("-o", "--output=OUTPUT",
+               "Flush result output file") do |value|
+          @output_file = assert_writable_file(value)
+        end
+        opt.on("-g", "--gif",
+               "Generates a gif file instead of a dot one") do
+          @gif_output = true
+        end
+      end # options
+      def output_file(infile)
+        @output_file || "#{File.basename(infile || 'stdin.adl', '.adl')}.#{gif_output ? 'gif' : 'dot'}"
+      end
+      # Command execution
+      def execute(args)
+        raise Quickl::Help unless args.size <= 1
+        # Loads the target automaton
+        input = if args.size == 1
+          File.read assert_readable_file(args.first)
+        else
+          $stdin.readlines.join("\n")
+        end
+        automaton = Stamina::ADL::parse_automaton(input)
+        # create a file for the dot output
+        if gif_output
+          require 'tempfile'
+          dotfile = Tempfile.new("stamina").path
+        else
+          dotfile = output_file(args.first)
+        end
+        # Flush automaton inside it
+        File.open(dotfile, 'w') do |f|
+          f << automaton.to_dot
+        end
+        # if gif output, use dot to convert it
+        if gif_output
+          `dot -Tgif -o #{output_file(args.first)} #{dotfile}`
+        end
+      end
+    end # class Adl2dot
+  end # class Command
+end # module Stamina

data/lib/stamina/command/classify.rb ADDED Viewed

@@ -0,0 +1,48 @@
+module Stamina
+  class Command
+    #
+    # Classifies a sample thanks with an automaton
+    #
+    # SYNOPSIS
+    #   #{program_name} #{command_name} sample.adl automaton.adl
+    #
+    # OPTIONS
+    # #{summarized_options}
+    #
+    class Classify < Quickl::Command(__FILE__, __LINE__)
+      include Robustness
+      # Where to flush the output
+      attr_accessor :output_file
+      # Install options
+      options do |opt|
+        @output_file = nil
+        opt.on("-o", "--output=OUTPUT",
+               "Flush classification signature in output file") do |value|
+          assert_writable_file(value)
+          @output_file = value
+        end
+      end # options
+      # Command execution
+      def execute(args)
+        raise Quickl::Help unless args.size == 2
+        sample    = Stamina::ADL::parse_sample_file assert_readable_file(args.first)
+        automaton = Stamina::ADL::parse_automaton_file assert_readable_file(args.last)
+        if of = output_file
+          File.open(of, 'w'){|io|
+            io << automaton.signature(sample)
+          }
+        else
+          $stdout << automaton.signature(sample)
+        end
+      end
+    end # class Classify
+  end # class Command
+end # module Stamina

data/lib/stamina/command/help.rb ADDED Viewed

@@ -0,0 +1,27 @@
+module Stamina
+  class Command
+    #
+    # Show help about a specific command
+    #
+    # SYNOPSIS
+    #   #{program_name} #{command_name} COMMAND
+    #
+    class Help < Quickl::Command(__FILE__, __LINE__)
+      # Let NoSuchCommandError be passed to higher stage
+      no_react_to Quickl::NoSuchCommand
+      # Command execution
+      def execute(args)
+        if args.size != 1
+          puts super_command.help
+        else
+          cmd = has_command!(args.first, super_command)
+          puts cmd.help
+        end
+      end
+    end # class Help
+  end # class Command
+end # module Stamina

data/lib/stamina/command/infer.rb ADDED Viewed

@@ -0,0 +1,141 @@
+module Stamina
+  class Command
+    #
+    # Grammar inference, induces a DFA from a training sample using an
+    # chosen algorithm.
+    #
+    # SYNOPSIS
+    #   #{program_name} #{command_name} sample.adl
+    #
+    # OPTIONS
+    # #{summarized_options}
+    #
+    class Infer < Quickl::Command(__FILE__, __LINE__)
+      include Robustness
+      attr_accessor :algorithm
+      attr_accessor :take
+      attr_accessor :score
+      attr_accessor :verbose
+      attr_accessor :drop
+      attr_accessor :output_file
+      # Install options
+      options do |opt|
+        @algorithm = :rpni
+        opt.on("--algorithm=X", "Sets the induction algorithm to use (rpni, bluefringe)") do |x|
+          @algorithm = x.to_sym
+        end
+        @take = 1.0
+        opt.on("--take=X", Float, "Take only X% of available strings") do |x|
+          @take = x.to_f
+          unless @take > 0.0 and @take <= 1.0
+            raise Quickl::InvalidOption, "Invalid --take option: #{@take}"
+          end
+        end
+        @score = nil
+        opt.on("--score=test.adl", "Add scoring information to metadata, using test.adl file") do |x|
+          @score = assert_readable_file(x)
+        end
+        @verbose = true
+        opt.on("-v", "--[no-]verbose", "Verbose mode") do |x|
+          @verbose = x
+        end
+        @drop = false
+        opt.on("-d", "--drop", "Drop result") do |x|
+          @drop = x
+        end
+        @output_file = nil
+        opt.on("-o", "--output=OUTPUT",
+               "Flush induced DFA in output file") do |value|
+          @output_file = assert_writable_file(value)
+        end
+      end # options
+      def launch_induction(sample)
+        require 'benchmark'
+        algo_clazz = case algorithm
+          when :rpni
+            Stamina::Induction::RPNI
+          when :bluefringe
+            Stamina::Induction::BlueFringe
+          else
+            raise Quickl::InvalidOption, "Unknown induction algorithm: #{algo}"
+        end
+        dfa, tms = nil, nil
+        tms = Benchmark.measure do
+          dfa = algo_clazz.execute(sample, {:verbose => verbose})
+        end
+        [dfa, tms]
+      end
+      def load_sample(file)
+        sample = Stamina::ADL.parse_sample_file(file)
+        if @take != 1.0
+          sampled = Stamina::Sample.new
+          sample.each_positive{|s| sampled << s if Kernel.rand < @take}
+          sample.each_negative{|s| sampled << s if Kernel.rand < @take}
+          sample = sampled
+        end
+        sample
+      end
+      # Command execution
+      def execute(args)
+        raise Quickl::Help unless args.size == 1
+        # Parses the sample
+        $stderr << "Parsing sample...\n" if verbose
+        sample = load_sample(assert_readable_file(args.first))
+        # Induce the DFA
+        dfa, tms = launch_induction(sample)
+        # Flush result
+        unless drop
+          if output_file
+            File.open(output_file, 'w') do |file|
+              Stamina::ADL.print_automaton(dfa, file)
+            end
+          else
+            Stamina::ADL.print_automaton(dfa, $stdout)
+          end
+        end
+        # build meta information
+        meta = {:algorithm   => algorithm,
+                :sample      => File.basename(args.first),
+                :take        => take,
+                :sample_size => sample.size,
+                :positive_count => sample.positive_count,
+                :negative_count => sample.negative_count,
+                :real_time   => tms.real,
+                :total_time  => tms.total,
+                :user_time   => tms.utime + tms.cutime,
+                :system_time => tms.stime + tms.cstime}
+        if score
+          test = Stamina::ADL::parse_sample_file(score)
+          classified_as = dfa.signature(test)
+          reference = test.signature
+          scoring = Scoring.scoring(classified_as, reference)
+          meta.merge!(scoring.to_h)
+        end
+        # Display information
+        puts meta.inspect
+      end
+    end # class Infer
+  end # class Command
+end # module Stamina

data/lib/stamina/command/metrics.rb ADDED Viewed

@@ -0,0 +1,51 @@
+module Stamina
+  class Command
+    #
+    # Prints metrics about an automaton or sample
+    #
+    # SYNOPSIS
+    #   #{program_name} #{command_name} [file.adl]
+    #
+    # OPTIONS
+    # #{summarized_options}
+    #
+    class Metrics < Quickl::Command(__FILE__, __LINE__)
+      include Robustness
+      # Install options
+      options do |opt|
+      end # options
+      # Command execution
+      def execute(args)
+        raise Quickl::Help unless args.size <= 1
+        # Loads the target automaton
+        input = if args.size == 1
+          File.read assert_readable_file(args.first)
+        else
+          $stdin.readlines.join("\n")
+        end
+        # Flush metrics
+        begin
+          target = Stamina::ADL::parse_automaton(input)
+          puts "Alphabet size:   #{target.alphabet_size}"
+          puts "State count:     #{target.state_count}"
+          puts "Edge count:      #{target.edge_count}"
+          puts "Degree (avg):    #{target.avg_degree}"
+          puts "Accepting ratio: #{target.accepting_ratio}"
+          puts "Depth:           #{target.depth}"
+        rescue ADL::ParseError
+          sample = Stamina::ADL::parse_sample(input)
+          puts "Size:     #{sample.size}"
+          puts "Positive: #{sample.positive_count} (#{sample.positive_count.to_f / sample.size})"
+          puts "Negative: #{sample.negative_count} (#{sample.negative_count.to_f / sample.size})"
+        end
+      end
+    end # class Metrics
+  end # class Command
+end # module Stamina

data/lib/stamina/command/robustness.rb ADDED Viewed

@@ -0,0 +1,22 @@
+module Stamina
+  class Command
+    module Robustness
+      # Checks that a given file is readable or raises a Quickl::IOAccessError
+      def assert_readable_file(file)
+        raise Quickl::IOAccessError, "File #{file} does not exists" unless File.exists?(file)
+        raise Quickl::IOAccessError, "File #{file} cannot be read"  unless File.readable?(file)
+        file
+      end
+      # Checks that a given file is writable or raises a Quickl::IOAccessError
+      def assert_writable_file(file)
+        raise Quickl::IOAccessError, "File #{file} cannot be written" \
+          unless not(File.exists?(file)) or File.writable?(file)
+        file
+      end
+    end # module Robustness
+  end # class Command
+end # module Stamina

data/lib/stamina/command/score.rb ADDED Viewed

@@ -0,0 +1,35 @@
+module Stamina
+  class Command
+    #
+    # Scores the labelling of a sample by an automaton
+    #
+    # SYNOPSIS
+    #   #{program_name} #{command_name} sample.adl automaton.adl
+    #
+    # OPTIONS
+    # #{summarized_options}
+    #
+    class Score < Quickl::Command(__FILE__, __LINE__)
+      include Robustness
+      # Install options
+      options do |opt|
+      end # options
+      # Command execution
+      def execute(args)
+        raise Quickl::Help unless args.size == 2
+        sample    = Stamina::ADL::parse_sample_file assert_readable_file(args.first)
+        automaton = Stamina::ADL::parse_automaton_file assert_readable_file(args.last)
+        classified_as = automaton.signature(sample)
+        reference = sample.signature
+        scoring = Scoring.scoring(classified_as, reference)
+        puts scoring.to_s
+      end
+    end # class Score
+  end # class Command
+end # module Stamina