stamina 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. data/CHANGELOG.md +24 -0
  2. data/Gemfile.lock +5 -1
  3. data/bin/stamina +10 -0
  4. data/lib/stamina.rb +2 -1
  5. data/lib/stamina/abbadingo.rb +2 -0
  6. data/lib/stamina/abbadingo/random_dfa.rb +48 -0
  7. data/lib/stamina/abbadingo/random_sample.rb +146 -0
  8. data/lib/stamina/adl.rb +6 -6
  9. data/lib/stamina/automaton.rb +29 -4
  10. data/lib/stamina/automaton/complete.rb +36 -0
  11. data/lib/stamina/automaton/equivalence.rb +55 -0
  12. data/lib/stamina/automaton/metrics.rb +8 -1
  13. data/lib/stamina/automaton/minimize.rb +25 -0
  14. data/lib/stamina/automaton/minimize/hopcroft.rb +116 -0
  15. data/lib/stamina/automaton/minimize/pitchies.rb +64 -0
  16. data/lib/stamina/automaton/strip.rb +16 -0
  17. data/lib/stamina/automaton/walking.rb +46 -19
  18. data/lib/stamina/command.rb +45 -0
  19. data/lib/stamina/command/abbadingo_dfa.rb +81 -0
  20. data/lib/stamina/command/abbadingo_samples.rb +40 -0
  21. data/lib/stamina/command/adl2dot.rb +71 -0
  22. data/lib/stamina/command/classify.rb +48 -0
  23. data/lib/stamina/command/help.rb +27 -0
  24. data/lib/stamina/command/infer.rb +141 -0
  25. data/lib/stamina/command/metrics.rb +51 -0
  26. data/lib/stamina/command/robustness.rb +22 -0
  27. data/lib/stamina/command/score.rb +35 -0
  28. data/lib/stamina/errors.rb +4 -1
  29. data/lib/stamina/ext/math.rb +20 -0
  30. data/lib/stamina/induction/{redblue.rb → blue_fringe.rb} +29 -28
  31. data/lib/stamina/induction/commons.rb +32 -46
  32. data/lib/stamina/induction/rpni.rb +7 -9
  33. data/lib/stamina/induction/union_find.rb +3 -3
  34. data/lib/stamina/loader.rb +1 -0
  35. data/lib/stamina/sample.rb +79 -2
  36. data/lib/stamina/scoring.rb +37 -0
  37. data/lib/stamina/version.rb +2 -2
  38. data/stamina.gemspec +2 -1
  39. data/stamina.noespec +9 -12
  40. data/test/stamina/abbadingo/random_dfa_test.rb +16 -0
  41. data/test/stamina/abbadingo/random_sample_test.rb +78 -0
  42. data/test/stamina/adl_test.rb +27 -2
  43. data/test/stamina/automaton/complete_test.rb +58 -0
  44. data/test/stamina/automaton/equivalence_test.rb +120 -0
  45. data/test/stamina/automaton/minimize/hopcroft_test.rb +15 -0
  46. data/test/stamina/automaton/minimize/minimize_test.rb +55 -0
  47. data/test/stamina/automaton/minimize/pitchies_test.rb +15 -0
  48. data/test/stamina/automaton/minimize/rice_edu_10.adl +16 -0
  49. data/test/stamina/automaton/minimize/rice_edu_10.min.adl +13 -0
  50. data/test/stamina/automaton/minimize/rice_edu_13.adl +13 -0
  51. data/test/stamina/automaton/minimize/rice_edu_13.min.adl +7 -0
  52. data/test/stamina/automaton/minimize/should_strip_1.adl +8 -0
  53. data/test/stamina/automaton/minimize/should_strip_1.min.adl +6 -0
  54. data/test/stamina/automaton/minimize/unknown_1.adl +16 -0
  55. data/test/stamina/automaton/minimize/unknown_1.min.adl +12 -0
  56. data/test/stamina/automaton/strip_test.rb +36 -0
  57. data/test/stamina/automaton/walking/dfa_delta_test.rb +39 -0
  58. data/test/stamina/automaton_test.rb +13 -1
  59. data/test/stamina/induction/{redblue_test.rb → blue_fringe_test.rb} +22 -22
  60. data/test/stamina/sample_test.rb +75 -0
  61. data/test/stamina/stamina_test.rb +13 -2
  62. metadata +98 -23
  63. data/bin/adl2dot +0 -12
  64. data/bin/classify +0 -12
  65. data/bin/redblue +0 -12
  66. data/bin/rpni +0 -12
  67. data/lib/stamina/command/adl2dot_command.rb +0 -73
  68. data/lib/stamina/command/classify_command.rb +0 -57
  69. data/lib/stamina/command/redblue_command.rb +0 -58
  70. data/lib/stamina/command/rpni_command.rb +0 -58
  71. data/lib/stamina/command/stamina_command.rb +0 -79
@@ -0,0 +1,40 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Generates samples following Abbadingo's protocol
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} target.adl
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class AbbadingoSamples < Quickl::Command(__FILE__, __LINE__)
13
+
14
+ # Install options
15
+ options do |opt|
16
+
17
+ end # options
18
+
19
+ # Command execution
20
+ def execute(args)
21
+ raise Quickl::Help unless args.size == 1
22
+
23
+ # Loads the target automaton
24
+ target_file = args.first
25
+ basename = File.basename(target_file, '.adl')
26
+ dirname = File.dirname(target_file)
27
+ target = Stamina::ADL::parse_automaton_file(target_file)
28
+
29
+ require 'stamina/abbadingo'
30
+ training, test = Stamina::Abbadingo::RandomSample.execute(target)
31
+
32
+ # Flush results aside the target automaton file
33
+ Stamina::ADL::print_sample_in_file(training, File.join(dirname, "#{basename}-training.adl"))
34
+ Stamina::ADL::print_sample_in_file(test, File.join(dirname, "#{basename}-test.adl"))
35
+ end
36
+
37
+ end # class AbbadingoSamples
38
+ end # class Command
39
+ end # module Stamina
40
+
@@ -0,0 +1,71 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Prints an automaton expressed in ADL in dot (or gif) format
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} automaton.adl
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class Adl2dot < Quickl::Command(__FILE__, __LINE__)
13
+ include Robustness
14
+
15
+ attr_reader :gif_output
16
+
17
+ # Install options
18
+ options do |opt|
19
+
20
+ @output_file = nil
21
+ opt.on("-o", "--output=OUTPUT",
22
+ "Flush result output file") do |value|
23
+ @output_file = assert_writable_file(value)
24
+ end
25
+
26
+ opt.on("-g", "--gif",
27
+ "Generates a gif file instead of a dot one") do
28
+ @gif_output = true
29
+ end
30
+
31
+ end # options
32
+
33
+ def output_file(infile)
34
+ @output_file || "#{File.basename(infile || 'stdin.adl', '.adl')}.#{gif_output ? 'gif' : 'dot'}"
35
+ end
36
+
37
+ # Command execution
38
+ def execute(args)
39
+ raise Quickl::Help unless args.size <= 1
40
+
41
+ # Loads the target automaton
42
+ input = if args.size == 1
43
+ File.read assert_readable_file(args.first)
44
+ else
45
+ $stdin.readlines.join("\n")
46
+ end
47
+ automaton = Stamina::ADL::parse_automaton(input)
48
+
49
+ # create a file for the dot output
50
+ if gif_output
51
+ require 'tempfile'
52
+ dotfile = Tempfile.new("stamina").path
53
+ else
54
+ dotfile = output_file(args.first)
55
+ end
56
+
57
+ # Flush automaton inside it
58
+ File.open(dotfile, 'w') do |f|
59
+ f << automaton.to_dot
60
+ end
61
+
62
+ # if gif output, use dot to convert it
63
+ if gif_output
64
+ `dot -Tgif -o #{output_file(args.first)} #{dotfile}`
65
+ end
66
+ end
67
+
68
+ end # class Adl2dot
69
+ end # class Command
70
+ end # module Stamina
71
+
@@ -0,0 +1,48 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Classifies a sample thanks with an automaton
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} sample.adl automaton.adl
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class Classify < Quickl::Command(__FILE__, __LINE__)
13
+ include Robustness
14
+
15
+ # Where to flush the output
16
+ attr_accessor :output_file
17
+
18
+ # Install options
19
+ options do |opt|
20
+
21
+ @output_file = nil
22
+ opt.on("-o", "--output=OUTPUT",
23
+ "Flush classification signature in output file") do |value|
24
+ assert_writable_file(value)
25
+ @output_file = value
26
+ end
27
+
28
+ end # options
29
+
30
+ # Command execution
31
+ def execute(args)
32
+ raise Quickl::Help unless args.size == 2
33
+ sample = Stamina::ADL::parse_sample_file assert_readable_file(args.first)
34
+ automaton = Stamina::ADL::parse_automaton_file assert_readable_file(args.last)
35
+
36
+ if of = output_file
37
+ File.open(of, 'w'){|io|
38
+ io << automaton.signature(sample)
39
+ }
40
+ else
41
+ $stdout << automaton.signature(sample)
42
+ end
43
+ end
44
+
45
+ end # class Classify
46
+ end # class Command
47
+ end # module Stamina
48
+
@@ -0,0 +1,27 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Show help about a specific command
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} COMMAND
8
+ #
9
+ class Help < Quickl::Command(__FILE__, __LINE__)
10
+
11
+ # Let NoSuchCommandError be passed to higher stage
12
+ no_react_to Quickl::NoSuchCommand
13
+
14
+ # Command execution
15
+ def execute(args)
16
+ if args.size != 1
17
+ puts super_command.help
18
+ else
19
+ cmd = has_command!(args.first, super_command)
20
+ puts cmd.help
21
+ end
22
+ end
23
+
24
+ end # class Help
25
+ end # class Command
26
+ end # module Stamina
27
+
@@ -0,0 +1,141 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Grammar inference, induces a DFA from a training sample using an
5
+ # chosen algorithm.
6
+ #
7
+ # SYNOPSIS
8
+ # #{program_name} #{command_name} sample.adl
9
+ #
10
+ # OPTIONS
11
+ # #{summarized_options}
12
+ #
13
+ class Infer < Quickl::Command(__FILE__, __LINE__)
14
+ include Robustness
15
+
16
+ attr_accessor :algorithm
17
+ attr_accessor :take
18
+ attr_accessor :score
19
+ attr_accessor :verbose
20
+ attr_accessor :drop
21
+ attr_accessor :output_file
22
+
23
+ # Install options
24
+ options do |opt|
25
+
26
+ @algorithm = :rpni
27
+ opt.on("--algorithm=X", "Sets the induction algorithm to use (rpni, bluefringe)") do |x|
28
+ @algorithm = x.to_sym
29
+ end
30
+
31
+ @take = 1.0
32
+ opt.on("--take=X", Float, "Take only X% of available strings") do |x|
33
+ @take = x.to_f
34
+ unless @take > 0.0 and @take <= 1.0
35
+ raise Quickl::InvalidOption, "Invalid --take option: #{@take}"
36
+ end
37
+ end
38
+
39
+ @score = nil
40
+ opt.on("--score=test.adl", "Add scoring information to metadata, using test.adl file") do |x|
41
+ @score = assert_readable_file(x)
42
+ end
43
+
44
+ @verbose = true
45
+ opt.on("-v", "--[no-]verbose", "Verbose mode") do |x|
46
+ @verbose = x
47
+ end
48
+
49
+ @drop = false
50
+ opt.on("-d", "--drop", "Drop result") do |x|
51
+ @drop = x
52
+ end
53
+
54
+ @output_file = nil
55
+ opt.on("-o", "--output=OUTPUT",
56
+ "Flush induced DFA in output file") do |value|
57
+ @output_file = assert_writable_file(value)
58
+ end
59
+
60
+ end # options
61
+
62
+ def launch_induction(sample)
63
+ require 'benchmark'
64
+
65
+ algo_clazz = case algorithm
66
+ when :rpni
67
+ Stamina::Induction::RPNI
68
+ when :bluefringe
69
+ Stamina::Induction::BlueFringe
70
+ else
71
+ raise Quickl::InvalidOption, "Unknown induction algorithm: #{algo}"
72
+ end
73
+
74
+ dfa, tms = nil, nil
75
+ tms = Benchmark.measure do
76
+ dfa = algo_clazz.execute(sample, {:verbose => verbose})
77
+ end
78
+ [dfa, tms]
79
+ end
80
+
81
+ def load_sample(file)
82
+ sample = Stamina::ADL.parse_sample_file(file)
83
+ if @take != 1.0
84
+ sampled = Stamina::Sample.new
85
+ sample.each_positive{|s| sampled << s if Kernel.rand < @take}
86
+ sample.each_negative{|s| sampled << s if Kernel.rand < @take}
87
+ sample = sampled
88
+ end
89
+ sample
90
+ end
91
+
92
+ # Command execution
93
+ def execute(args)
94
+ raise Quickl::Help unless args.size == 1
95
+
96
+ # Parses the sample
97
+ $stderr << "Parsing sample...\n" if verbose
98
+ sample = load_sample(assert_readable_file(args.first))
99
+
100
+ # Induce the DFA
101
+ dfa, tms = launch_induction(sample)
102
+
103
+ # Flush result
104
+ unless drop
105
+ if output_file
106
+ File.open(output_file, 'w') do |file|
107
+ Stamina::ADL.print_automaton(dfa, file)
108
+ end
109
+ else
110
+ Stamina::ADL.print_automaton(dfa, $stdout)
111
+ end
112
+ end
113
+
114
+ # build meta information
115
+ meta = {:algorithm => algorithm,
116
+ :sample => File.basename(args.first),
117
+ :take => take,
118
+ :sample_size => sample.size,
119
+ :positive_count => sample.positive_count,
120
+ :negative_count => sample.negative_count,
121
+ :real_time => tms.real,
122
+ :total_time => tms.total,
123
+ :user_time => tms.utime + tms.cutime,
124
+ :system_time => tms.stime + tms.cstime}
125
+
126
+ if score
127
+ test = Stamina::ADL::parse_sample_file(score)
128
+ classified_as = dfa.signature(test)
129
+ reference = test.signature
130
+ scoring = Scoring.scoring(classified_as, reference)
131
+ meta.merge!(scoring.to_h)
132
+ end
133
+
134
+ # Display information
135
+ puts meta.inspect
136
+ end
137
+
138
+ end # class Infer
139
+ end # class Command
140
+ end # module Stamina
141
+
@@ -0,0 +1,51 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Prints metrics about an automaton or sample
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} [file.adl]
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class Metrics < Quickl::Command(__FILE__, __LINE__)
13
+ include Robustness
14
+
15
+ # Install options
16
+ options do |opt|
17
+
18
+ end # options
19
+
20
+ # Command execution
21
+ def execute(args)
22
+ raise Quickl::Help unless args.size <= 1
23
+
24
+ # Loads the target automaton
25
+ input = if args.size == 1
26
+ File.read assert_readable_file(args.first)
27
+ else
28
+ $stdin.readlines.join("\n")
29
+ end
30
+
31
+ # Flush metrics
32
+ begin
33
+ target = Stamina::ADL::parse_automaton(input)
34
+ puts "Alphabet size: #{target.alphabet_size}"
35
+ puts "State count: #{target.state_count}"
36
+ puts "Edge count: #{target.edge_count}"
37
+ puts "Degree (avg): #{target.avg_degree}"
38
+ puts "Accepting ratio: #{target.accepting_ratio}"
39
+ puts "Depth: #{target.depth}"
40
+ rescue ADL::ParseError
41
+ sample = Stamina::ADL::parse_sample(input)
42
+ puts "Size: #{sample.size}"
43
+ puts "Positive: #{sample.positive_count} (#{sample.positive_count.to_f / sample.size})"
44
+ puts "Negative: #{sample.negative_count} (#{sample.negative_count.to_f / sample.size})"
45
+ end
46
+ end
47
+
48
+ end # class Metrics
49
+ end # class Command
50
+ end # module Stamina
51
+
@@ -0,0 +1,22 @@
1
+ module Stamina
2
+ class Command
3
+ module Robustness
4
+
5
+ # Checks that a given file is readable or raises a Quickl::IOAccessError
6
+ def assert_readable_file(file)
7
+ raise Quickl::IOAccessError, "File #{file} does not exists" unless File.exists?(file)
8
+ raise Quickl::IOAccessError, "File #{file} cannot be read" unless File.readable?(file)
9
+ file
10
+ end
11
+
12
+ # Checks that a given file is writable or raises a Quickl::IOAccessError
13
+ def assert_writable_file(file)
14
+ raise Quickl::IOAccessError, "File #{file} cannot be written" \
15
+ unless not(File.exists?(file)) or File.writable?(file)
16
+ file
17
+ end
18
+
19
+ end # module Robustness
20
+ end # class Command
21
+ end # module Stamina
22
+
@@ -0,0 +1,35 @@
1
+ module Stamina
2
+ class Command
3
+ #
4
+ # Scores the labelling of a sample by an automaton
5
+ #
6
+ # SYNOPSIS
7
+ # #{program_name} #{command_name} sample.adl automaton.adl
8
+ #
9
+ # OPTIONS
10
+ # #{summarized_options}
11
+ #
12
+ class Score < Quickl::Command(__FILE__, __LINE__)
13
+ include Robustness
14
+
15
+ # Install options
16
+ options do |opt|
17
+
18
+ end # options
19
+
20
+ # Command execution
21
+ def execute(args)
22
+ raise Quickl::Help unless args.size == 2
23
+ sample = Stamina::ADL::parse_sample_file assert_readable_file(args.first)
24
+ automaton = Stamina::ADL::parse_automaton_file assert_readable_file(args.last)
25
+
26
+ classified_as = automaton.signature(sample)
27
+ reference = sample.signature
28
+ scoring = Scoring.scoring(classified_as, reference)
29
+ puts scoring.to_s
30
+ end
31
+
32
+ end # class Score
33
+ end # class Command
34
+ end # module Stamina
35
+