stamina 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. data/CHANGELOG.md +24 -0
  2. data/Gemfile.lock +5 -1
  3. data/bin/stamina +10 -0
  4. data/lib/stamina.rb +2 -1
  5. data/lib/stamina/abbadingo.rb +2 -0
  6. data/lib/stamina/abbadingo/random_dfa.rb +48 -0
  7. data/lib/stamina/abbadingo/random_sample.rb +146 -0
  8. data/lib/stamina/adl.rb +6 -6
  9. data/lib/stamina/automaton.rb +29 -4
  10. data/lib/stamina/automaton/complete.rb +36 -0
  11. data/lib/stamina/automaton/equivalence.rb +55 -0
  12. data/lib/stamina/automaton/metrics.rb +8 -1
  13. data/lib/stamina/automaton/minimize.rb +25 -0
  14. data/lib/stamina/automaton/minimize/hopcroft.rb +116 -0
  15. data/lib/stamina/automaton/minimize/pitchies.rb +64 -0
  16. data/lib/stamina/automaton/strip.rb +16 -0
  17. data/lib/stamina/automaton/walking.rb +46 -19
  18. data/lib/stamina/command.rb +45 -0
  19. data/lib/stamina/command/abbadingo_dfa.rb +81 -0
  20. data/lib/stamina/command/abbadingo_samples.rb +40 -0
  21. data/lib/stamina/command/adl2dot.rb +71 -0
  22. data/lib/stamina/command/classify.rb +48 -0
  23. data/lib/stamina/command/help.rb +27 -0
  24. data/lib/stamina/command/infer.rb +141 -0
  25. data/lib/stamina/command/metrics.rb +51 -0
  26. data/lib/stamina/command/robustness.rb +22 -0
  27. data/lib/stamina/command/score.rb +35 -0
  28. data/lib/stamina/errors.rb +4 -1
  29. data/lib/stamina/ext/math.rb +20 -0
  30. data/lib/stamina/induction/{redblue.rb → blue_fringe.rb} +29 -28
  31. data/lib/stamina/induction/commons.rb +32 -46
  32. data/lib/stamina/induction/rpni.rb +7 -9
  33. data/lib/stamina/induction/union_find.rb +3 -3
  34. data/lib/stamina/loader.rb +1 -0
  35. data/lib/stamina/sample.rb +79 -2
  36. data/lib/stamina/scoring.rb +37 -0
  37. data/lib/stamina/version.rb +2 -2
  38. data/stamina.gemspec +2 -1
  39. data/stamina.noespec +9 -12
  40. data/test/stamina/abbadingo/random_dfa_test.rb +16 -0
  41. data/test/stamina/abbadingo/random_sample_test.rb +78 -0
  42. data/test/stamina/adl_test.rb +27 -2
  43. data/test/stamina/automaton/complete_test.rb +58 -0
  44. data/test/stamina/automaton/equivalence_test.rb +120 -0
  45. data/test/stamina/automaton/minimize/hopcroft_test.rb +15 -0
  46. data/test/stamina/automaton/minimize/minimize_test.rb +55 -0
  47. data/test/stamina/automaton/minimize/pitchies_test.rb +15 -0
  48. data/test/stamina/automaton/minimize/rice_edu_10.adl +16 -0
  49. data/test/stamina/automaton/minimize/rice_edu_10.min.adl +13 -0
  50. data/test/stamina/automaton/minimize/rice_edu_13.adl +13 -0
  51. data/test/stamina/automaton/minimize/rice_edu_13.min.adl +7 -0
  52. data/test/stamina/automaton/minimize/should_strip_1.adl +8 -0
  53. data/test/stamina/automaton/minimize/should_strip_1.min.adl +6 -0
  54. data/test/stamina/automaton/minimize/unknown_1.adl +16 -0
  55. data/test/stamina/automaton/minimize/unknown_1.min.adl +12 -0
  56. data/test/stamina/automaton/strip_test.rb +36 -0
  57. data/test/stamina/automaton/walking/dfa_delta_test.rb +39 -0
  58. data/test/stamina/automaton_test.rb +13 -1
  59. data/test/stamina/induction/{redblue_test.rb → blue_fringe_test.rb} +22 -22
  60. data/test/stamina/sample_test.rb +75 -0
  61. data/test/stamina/stamina_test.rb +13 -2
  62. metadata +98 -23
  63. data/bin/adl2dot +0 -12
  64. data/bin/classify +0 -12
  65. data/bin/redblue +0 -12
  66. data/bin/rpni +0 -12
  67. data/lib/stamina/command/adl2dot_command.rb +0 -73
  68. data/lib/stamina/command/classify_command.rb +0 -57
  69. data/lib/stamina/command/redblue_command.rb +0 -58
  70. data/lib/stamina/command/rpni_command.rb +0 -58
  71. data/lib/stamina/command/stamina_command.rb +0 -79
@@ -0,0 +1,40 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Generates samples following Abbadingo's protocol
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} target.adl
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class AbbadingoSamples < Quickl::Command(__FILE__, __LINE__)
13
+
14
+ # Install options
15
+ options do |opt|
16
+
17
+ end # options
18
+
19
+ # Command execution
20
+ def execute(args)
21
+ raise Quickl::Help unless args.size == 1
22
+
23
+ # Loads the target automaton
24
+ target_file = args.first
25
+ basename = File.basename(target_file, '.adl')
26
+ dirname = File.dirname(target_file)
27
+ target = Stamina::ADL::parse_automaton_file(target_file)
28
+
29
+ require 'stamina/abbadingo'
30
+ training, test = Stamina::Abbadingo::RandomSample.execute(target)
31
+
32
+ # Flush results aside the target automaton file
33
+ Stamina::ADL::print_sample_in_file(training, File.join(dirname, "#{basename}-training.adl"))
34
+ Stamina::ADL::print_sample_in_file(test, File.join(dirname, "#{basename}-test.adl"))
35
+ end
36
+
37
+ end # class AbbadingoSamples
38
+ end # class Command
39
+ end # module Stamina
40
+
@@ -0,0 +1,71 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Prints an automaton expressed in ADL in dot (or gif) format
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} automaton.adl
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class Adl2dot < Quickl::Command(__FILE__, __LINE__)
13
+ include Robustness
14
+
15
+ attr_reader :gif_output
16
+
17
+ # Install options
18
+ options do |opt|
19
+
20
+ @output_file = nil
21
+ opt.on("-o", "--output=OUTPUT",
22
+ "Flush result output file") do |value|
23
+ @output_file = assert_writable_file(value)
24
+ end
25
+
26
+ opt.on("-g", "--gif",
27
+ "Generates a gif file instead of a dot one") do
28
+ @gif_output = true
29
+ end
30
+
31
+ end # options
32
+
33
+ def output_file(infile)
34
+ @output_file || "#{File.basename(infile || 'stdin.adl', '.adl')}.#{gif_output ? 'gif' : 'dot'}"
35
+ end
36
+
37
+ # Command execution
38
+ def execute(args)
39
+ raise Quickl::Help unless args.size <= 1
40
+
41
+ # Loads the target automaton
42
+ input = if args.size == 1
43
+ File.read assert_readable_file(args.first)
44
+ else
45
+ $stdin.readlines.join("\n")
46
+ end
47
+ automaton = Stamina::ADL::parse_automaton(input)
48
+
49
+ # create a file for the dot output
50
+ if gif_output
51
+ require 'tempfile'
52
+ dotfile = Tempfile.new("stamina").path
53
+ else
54
+ dotfile = output_file(args.first)
55
+ end
56
+
57
+ # Flush automaton inside it
58
+ File.open(dotfile, 'w') do |f|
59
+ f << automaton.to_dot
60
+ end
61
+
62
+ # if gif output, use dot to convert it
63
+ if gif_output
64
+ `dot -Tgif -o #{output_file(args.first)} #{dotfile}`
65
+ end
66
+ end
67
+
68
+ end # class Adl2dot
69
+ end # class Command
70
+ end # module Stamina
71
+
@@ -0,0 +1,48 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Classifies a sample thanks with an automaton
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} sample.adl automaton.adl
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class Classify < Quickl::Command(__FILE__, __LINE__)
13
+ include Robustness
14
+
15
+ # Where to flush the output
16
+ attr_accessor :output_file
17
+
18
+ # Install options
19
+ options do |opt|
20
+
21
+ @output_file = nil
22
+ opt.on("-o", "--output=OUTPUT",
23
+ "Flush classification signature in output file") do |value|
24
+ assert_writable_file(value)
25
+ @output_file = value
26
+ end
27
+
28
+ end # options
29
+
30
+ # Command execution
31
+ def execute(args)
32
+ raise Quickl::Help unless args.size == 2
33
+ sample = Stamina::ADL::parse_sample_file assert_readable_file(args.first)
34
+ automaton = Stamina::ADL::parse_automaton_file assert_readable_file(args.last)
35
+
36
+ if of = output_file
37
+ File.open(of, 'w'){|io|
38
+ io << automaton.signature(sample)
39
+ }
40
+ else
41
+ $stdout << automaton.signature(sample)
42
+ end
43
+ end
44
+
45
+ end # class Classify
46
+ end # class Command
47
+ end # module Stamina
48
+
@@ -0,0 +1,27 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Show help about a specific command
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} COMMAND
8
+ #
9
+ class Help < Quickl::Command(__FILE__, __LINE__)
10
+
11
+ # Let NoSuchCommandError be passed to higher stage
12
+ no_react_to Quickl::NoSuchCommand
13
+
14
+ # Command execution
15
+ def execute(args)
16
+ if args.size != 1
17
+ puts super_command.help
18
+ else
19
+ cmd = has_command!(args.first, super_command)
20
+ puts cmd.help
21
+ end
22
+ end
23
+
24
+ end # class Help
25
+ end # class Command
26
+ end # module Stamina
27
+
@@ -0,0 +1,141 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Grammar inference, induces a DFA from a training sample using an
5
+ # chosen algorithm.
6
+ #
7
+ # SYNOPSIS
8
+ # #{program_name} #{command_name} sample.adl
9
+ #
10
+ # OPTIONS
11
+ # #{summarized_options}
12
+ #
13
+ class Infer < Quickl::Command(__FILE__, __LINE__)
14
+ include Robustness
15
+
16
+ attr_accessor :algorithm
17
+ attr_accessor :take
18
+ attr_accessor :score
19
+ attr_accessor :verbose
20
+ attr_accessor :drop
21
+ attr_accessor :output_file
22
+
23
+ # Install options
24
+ options do |opt|
25
+
26
+ @algorithm = :rpni
27
+ opt.on("--algorithm=X", "Sets the induction algorithm to use (rpni, bluefringe)") do |x|
28
+ @algorithm = x.to_sym
29
+ end
30
+
31
+ @take = 1.0
32
+ opt.on("--take=X", Float, "Take only X% of available strings") do |x|
33
+ @take = x.to_f
34
+ unless @take > 0.0 and @take <= 1.0
35
+ raise Quickl::InvalidOption, "Invalid --take option: #{@take}"
36
+ end
37
+ end
38
+
39
+ @score = nil
40
+ opt.on("--score=test.adl", "Add scoring information to metadata, using test.adl file") do |x|
41
+ @score = assert_readable_file(x)
42
+ end
43
+
44
+ @verbose = true
45
+ opt.on("-v", "--[no-]verbose", "Verbose mode") do |x|
46
+ @verbose = x
47
+ end
48
+
49
+ @drop = false
50
+ opt.on("-d", "--drop", "Drop result") do |x|
51
+ @drop = x
52
+ end
53
+
54
+ @output_file = nil
55
+ opt.on("-o", "--output=OUTPUT",
56
+ "Flush induced DFA in output file") do |value|
57
+ @output_file = assert_writable_file(value)
58
+ end
59
+
60
+ end # options
61
+
62
+ def launch_induction(sample)
63
+ require 'benchmark'
64
+
65
+ algo_clazz = case algorithm
66
+ when :rpni
67
+ Stamina::Induction::RPNI
68
+ when :bluefringe
69
+ Stamina::Induction::BlueFringe
70
+ else
71
+ raise Quickl::InvalidOption, "Unknown induction algorithm: #{algo}"
72
+ end
73
+
74
+ dfa, tms = nil, nil
75
+ tms = Benchmark.measure do
76
+ dfa = algo_clazz.execute(sample, {:verbose => verbose})
77
+ end
78
+ [dfa, tms]
79
+ end
80
+
81
+ def load_sample(file)
82
+ sample = Stamina::ADL.parse_sample_file(file)
83
+ if @take != 1.0
84
+ sampled = Stamina::Sample.new
85
+ sample.each_positive{|s| sampled << s if Kernel.rand < @take}
86
+ sample.each_negative{|s| sampled << s if Kernel.rand < @take}
87
+ sample = sampled
88
+ end
89
+ sample
90
+ end
91
+
92
+ # Command execution
93
+ def execute(args)
94
+ raise Quickl::Help unless args.size == 1
95
+
96
+ # Parses the sample
97
+ $stderr << "Parsing sample...\n" if verbose
98
+ sample = load_sample(assert_readable_file(args.first))
99
+
100
+ # Induce the DFA
101
+ dfa, tms = launch_induction(sample)
102
+
103
+ # Flush result
104
+ unless drop
105
+ if output_file
106
+ File.open(output_file, 'w') do |file|
107
+ Stamina::ADL.print_automaton(dfa, file)
108
+ end
109
+ else
110
+ Stamina::ADL.print_automaton(dfa, $stdout)
111
+ end
112
+ end
113
+
114
+ # build meta information
115
+ meta = {:algorithm => algorithm,
116
+ :sample => File.basename(args.first),
117
+ :take => take,
118
+ :sample_size => sample.size,
119
+ :positive_count => sample.positive_count,
120
+ :negative_count => sample.negative_count,
121
+ :real_time => tms.real,
122
+ :total_time => tms.total,
123
+ :user_time => tms.utime + tms.cutime,
124
+ :system_time => tms.stime + tms.cstime}
125
+
126
+ if score
127
+ test = Stamina::ADL::parse_sample_file(score)
128
+ classified_as = dfa.signature(test)
129
+ reference = test.signature
130
+ scoring = Scoring.scoring(classified_as, reference)
131
+ meta.merge!(scoring.to_h)
132
+ end
133
+
134
+ # Display information
135
+ puts meta.inspect
136
+ end
137
+
138
+ end # class Infer
139
+ end # class Command
140
+ end # module Stamina
141
+
@@ -0,0 +1,51 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Prints metrics about an automaton or sample
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} [file.adl]
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class Metrics < Quickl::Command(__FILE__, __LINE__)
13
+ include Robustness
14
+
15
+ # Install options
16
+ options do |opt|
17
+
18
+ end # options
19
+
20
+ # Command execution
21
+ def execute(args)
22
+ raise Quickl::Help unless args.size <= 1
23
+
24
+ # Loads the target automaton
25
+ input = if args.size == 1
26
+ File.read assert_readable_file(args.first)
27
+ else
28
+ $stdin.readlines.join("\n")
29
+ end
30
+
31
+ # Flush metrics
32
+ begin
33
+ target = Stamina::ADL::parse_automaton(input)
34
+ puts "Alphabet size: #{target.alphabet_size}"
35
+ puts "State count: #{target.state_count}"
36
+ puts "Edge count: #{target.edge_count}"
37
+ puts "Degree (avg): #{target.avg_degree}"
38
+ puts "Accepting ratio: #{target.accepting_ratio}"
39
+ puts "Depth: #{target.depth}"
40
+ rescue ADL::ParseError
41
+ sample = Stamina::ADL::parse_sample(input)
42
+ puts "Size: #{sample.size}"
43
+ puts "Positive: #{sample.positive_count} (#{sample.positive_count.to_f / sample.size})"
44
+ puts "Negative: #{sample.negative_count} (#{sample.negative_count.to_f / sample.size})"
45
+ end
46
+ end
47
+
48
+ end # class Metrics
49
+ end # class Command
50
+ end # module Stamina
51
+
@@ -0,0 +1,22 @@
1
+ module Stamina
2
+ class Command
3
+ module Robustness
4
+
5
+ # Checks that a given file is readable or raises a Quickl::IOAccessError
6
+ def assert_readable_file(file)
7
+ raise Quickl::IOAccessError, "File #{file} does not exists" unless File.exists?(file)
8
+ raise Quickl::IOAccessError, "File #{file} cannot be read" unless File.readable?(file)
9
+ file
10
+ end
11
+
12
+ # Checks that a given file is writable or raises a Quickl::IOAccessError
13
+ def assert_writable_file(file)
14
+ raise Quickl::IOAccessError, "File #{file} cannot be written" \
15
+ unless not(File.exists?(file)) or File.writable?(file)
16
+ file
17
+ end
18
+
19
+ end # module Robustness
20
+ end # class Command
21
+ end # module Stamina
22
+
@@ -0,0 +1,35 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Scores the labelling of a sample by an automaton
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} sample.adl automaton.adl
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class Score < Quickl::Command(__FILE__, __LINE__)
13
+ include Robustness
14
+
15
+ # Install options
16
+ options do |opt|
17
+
18
+ end # options
19
+
20
+ # Command execution
21
+ def execute(args)
22
+ raise Quickl::Help unless args.size == 2
23
+ sample = Stamina::ADL::parse_sample_file assert_readable_file(args.first)
24
+ automaton = Stamina::ADL::parse_automaton_file assert_readable_file(args.last)
25
+
26
+ classified_as = automaton.signature(sample)
27
+ reference = sample.signature
28
+ scoring = Scoring.scoring(classified_as, reference)
29
+ puts scoring.to_s
30
+ end
31
+
32
+ end # class Score
33
+ end # class Command
34
+ end # module Stamina
35
+