lernen 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +18 -0
  3. data/README.md +531 -28
  4. data/Rakefile +29 -7
  5. data/Steepfile +14 -0
  6. data/examples/ripper_prism.rb +63 -0
  7. data/examples/uri_parse_regexp.rb +73 -0
  8. data/lib/lernen/algorithm/cex_processor/acex.rb +43 -0
  9. data/lib/lernen/algorithm/cex_processor/prefix_transformer_acex.rb +43 -0
  10. data/lib/lernen/algorithm/cex_processor.rb +115 -0
  11. data/lib/lernen/algorithm/kearns_vazirani/discrimination_tree.rb +207 -0
  12. data/lib/lernen/algorithm/kearns_vazirani/kearns_vazirani_learner.rb +100 -0
  13. data/lib/lernen/algorithm/kearns_vazirani.rb +44 -0
  14. data/lib/lernen/algorithm/kearns_vazirani_vpa/discrimination_tree_vpa.rb +246 -0
  15. data/lib/lernen/algorithm/kearns_vazirani_vpa/kearns_vazirani_vpa_learner.rb +89 -0
  16. data/lib/lernen/algorithm/kearns_vazirani_vpa.rb +35 -0
  17. data/lib/lernen/algorithm/learner.rb +82 -0
  18. data/lib/lernen/algorithm/lsharp/lsharp_learner.rb +367 -0
  19. data/lib/lernen/algorithm/lsharp/observation_tree.rb +115 -0
  20. data/lib/lernen/algorithm/lsharp.rb +43 -0
  21. data/lib/lernen/algorithm/lstar/lstar_learner.rb +49 -0
  22. data/lib/lernen/algorithm/lstar/observation_table.rb +214 -0
  23. data/lib/lernen/algorithm/lstar.rb +49 -0
  24. data/lib/lernen/algorithm/procedural/atr_manager.rb +200 -0
  25. data/lib/lernen/algorithm/procedural/procedural_learner.rb +223 -0
  26. data/lib/lernen/algorithm/procedural/procedural_sul.rb +47 -0
  27. data/lib/lernen/algorithm/procedural/return_indices_acex.rb +58 -0
  28. data/lib/lernen/algorithm/procedural.rb +57 -0
  29. data/lib/lernen/algorithm.rb +19 -0
  30. data/lib/lernen/automaton/dfa.rb +204 -0
  31. data/lib/lernen/automaton/mealy.rb +108 -0
  32. data/lib/lernen/automaton/moore.rb +122 -0
  33. data/lib/lernen/automaton/moore_like.rb +83 -0
  34. data/lib/lernen/automaton/proc_util.rb +93 -0
  35. data/lib/lernen/automaton/spa.rb +368 -0
  36. data/lib/lernen/automaton/transition_system.rb +209 -0
  37. data/lib/lernen/automaton/vpa.rb +300 -0
  38. data/lib/lernen/automaton.rb +19 -92
  39. data/lib/lernen/equiv/combined_oracle.rb +57 -0
  40. data/lib/lernen/equiv/exhaustive_search_oracle.rb +60 -0
  41. data/lib/lernen/equiv/moore_like_simulator_oracle.rb +36 -0
  42. data/lib/lernen/equiv/oracle.rb +109 -0
  43. data/lib/lernen/equiv/random_walk_oracle.rb +69 -0
  44. data/lib/lernen/equiv/random_well_matched_word_oracle.rb +139 -0
  45. data/lib/lernen/equiv/random_word_oracle.rb +71 -0
  46. data/lib/lernen/equiv/spa_simulator_oracle.rb +39 -0
  47. data/lib/lernen/equiv/test_words_oracle.rb +42 -0
  48. data/lib/lernen/equiv/transition_system_simulator_oracle.rb +36 -0
  49. data/lib/lernen/equiv/vpa_simulator_oracle.rb +48 -0
  50. data/lib/lernen/equiv.rb +25 -0
  51. data/lib/lernen/graph.rb +215 -0
  52. data/lib/lernen/system/block_sul.rb +41 -0
  53. data/lib/lernen/system/moore_like_simulator.rb +45 -0
  54. data/lib/lernen/system/moore_like_sul.rb +33 -0
  55. data/lib/lernen/system/sul.rb +126 -0
  56. data/lib/lernen/system/transition_system_simulator.rb +40 -0
  57. data/lib/lernen/system.rb +72 -0
  58. data/lib/lernen/version.rb +2 -1
  59. data/lib/lernen.rb +322 -13
  60. data/rbs_collection.lock.yaml +16 -0
  61. data/rbs_collection.yaml +14 -0
  62. data/renovate.json +6 -0
  63. data/sig/generated/lernen/algorithm/cex_processor/acex.rbs +30 -0
  64. data/sig/generated/lernen/algorithm/cex_processor/prefix_transformer_acex.rbs +27 -0
  65. data/sig/generated/lernen/algorithm/cex_processor.rbs +59 -0
  66. data/sig/generated/lernen/algorithm/kearns_vazirani/discrimination_tree.rbs +68 -0
  67. data/sig/generated/lernen/algorithm/kearns_vazirani/kearns_vazirani_learner.rbs +51 -0
  68. data/sig/generated/lernen/algorithm/kearns_vazirani.rbs +32 -0
  69. data/sig/generated/lernen/algorithm/kearns_vazirani_vpa/discrimination_tree_vpa.rbs +73 -0
  70. data/sig/generated/lernen/algorithm/kearns_vazirani_vpa/kearns_vazirani_vpa_learner.rbs +51 -0
  71. data/sig/generated/lernen/algorithm/kearns_vazirani_vpa.rbs +20 -0
  72. data/sig/generated/lernen/algorithm/learner.rbs +53 -0
  73. data/sig/generated/lernen/algorithm/lsharp/lsharp_learner.rbs +103 -0
  74. data/sig/generated/lernen/algorithm/lsharp/observation_tree.rbs +53 -0
  75. data/sig/generated/lernen/algorithm/lsharp.rbs +38 -0
  76. data/sig/generated/lernen/algorithm/lstar/lstar_learner.rbs +38 -0
  77. data/sig/generated/lernen/algorithm/lstar/observation_table.rbs +79 -0
  78. data/sig/generated/lernen/algorithm/lstar.rbs +37 -0
  79. data/sig/generated/lernen/algorithm/procedural/atr_manager.rbs +80 -0
  80. data/sig/generated/lernen/algorithm/procedural/procedural_learner.rbs +79 -0
  81. data/sig/generated/lernen/algorithm/procedural/procedural_sul.rbs +36 -0
  82. data/sig/generated/lernen/algorithm/procedural/return_indices_acex.rbs +33 -0
  83. data/sig/generated/lernen/algorithm/procedural.rbs +27 -0
  84. data/sig/generated/lernen/algorithm.rbs +10 -0
  85. data/sig/generated/lernen/automaton/dfa.rbs +93 -0
  86. data/sig/generated/lernen/automaton/mealy.rbs +61 -0
  87. data/sig/generated/lernen/automaton/moore.rbs +69 -0
  88. data/sig/generated/lernen/automaton/moore_like.rbs +63 -0
  89. data/sig/generated/lernen/automaton/proc_util.rbs +38 -0
  90. data/sig/generated/lernen/automaton/spa.rbs +125 -0
  91. data/sig/generated/lernen/automaton/transition_system.rbs +108 -0
  92. data/sig/generated/lernen/automaton/vpa.rbs +109 -0
  93. data/sig/generated/lernen/automaton.rbs +15 -0
  94. data/sig/generated/lernen/equiv/combined_oracle.rbs +27 -0
  95. data/sig/generated/lernen/equiv/exhaustive_search_oracle.rbs +38 -0
  96. data/sig/generated/lernen/equiv/moore_like_simulator_oracle.rbs +27 -0
  97. data/sig/generated/lernen/equiv/oracle.rbs +75 -0
  98. data/sig/generated/lernen/equiv/random_walk_oracle.rbs +41 -0
  99. data/sig/generated/lernen/equiv/random_well_matched_word_oracle.rbs +70 -0
  100. data/sig/generated/lernen/equiv/random_word_oracle.rbs +45 -0
  101. data/sig/generated/lernen/equiv/spa_simulator_oracle.rbs +30 -0
  102. data/sig/generated/lernen/equiv/test_words_oracle.rbs +20 -0
  103. data/sig/generated/lernen/equiv/transition_system_simulator_oracle.rbs +27 -0
  104. data/sig/generated/lernen/equiv/vpa_simulator_oracle.rbs +33 -0
  105. data/sig/generated/lernen/equiv.rbs +11 -0
  106. data/sig/generated/lernen/graph.rbs +80 -0
  107. data/sig/generated/lernen/system/block_sul.rbs +29 -0
  108. data/sig/generated/lernen/system/moore_like_simulator.rbs +31 -0
  109. data/sig/generated/lernen/system/moore_like_sul.rbs +28 -0
  110. data/sig/generated/lernen/system/sul.rbs +87 -0
  111. data/sig/generated/lernen/system/transition_system_simulator.rbs +28 -0
  112. data/sig/generated/lernen/system.rbs +62 -0
  113. data/sig/generated/lernen/version.rbs +6 -0
  114. data/sig/generated/lernen.rbs +214 -0
  115. data/sig-test/generated/test/example_test.rbs +14 -0
  116. data/sig-test/generated/test/lernen/algorithm/kearns_vazirani_test.rbs +16 -0
  117. data/sig-test/generated/test/lernen/algorithm/kearns_vazirani_vpa_test.rbs +10 -0
  118. data/sig-test/generated/test/lernen/algorithm/lsharp_test.rbs +16 -0
  119. data/sig-test/generated/test/lernen/algorithm/lstar_test.rbs +16 -0
  120. data/sig-test/generated/test/lernen/algorithm/procedural_test.rbs +10 -0
  121. data/sig-test/generated/test/lernen/automaton/dfa_test.rbs +19 -0
  122. data/sig-test/generated/test/lernen/automaton/mealy_test.rbs +19 -0
  123. data/sig-test/generated/test/lernen/automaton/moore_test.rbs +19 -0
  124. data/sig-test/generated/test/lernen/automaton/proc_util_test.rbs +19 -0
  125. data/sig-test/generated/test/lernen/automaton/spa_test.rbs +19 -0
  126. data/sig-test/generated/test/lernen/automaton/vpa_test.rbs +19 -0
  127. data/sig-test/generated/test/lernen/equiv/exhaustive_search_oracle_test.rbs +10 -0
  128. data/sig-test/generated/test/lernen/equiv/random_walk_oracle_test.rbs +10 -0
  129. data/sig-test/generated/test/lernen/equiv/random_word_oracle_test.rbs +10 -0
  130. data/sig-test/generated/test/lernen/system/block_sul_test.rbs +16 -0
  131. data/sig-test/generated/test/lernen/system/moore_like_simulator_test.rbs +16 -0
  132. data/sig-test/generated/test/lernen/system/transition_system_simulator_test.rbs +13 -0
  133. data/sig-test/generated/test/lernen/system_test.rbs +11 -0
  134. data/sig-test/generated/test/lernen_test.rbs +13 -0
  135. metadata +131 -11
  136. data/.yardopts +0 -3
  137. data/lib/lernen/cex_processor.rb +0 -61
  138. data/lib/lernen/kearns_vazirani.rb +0 -199
  139. data/lib/lernen/lsharp.rb +0 -335
  140. data/lib/lernen/lstar.rb +0 -169
  141. data/lib/lernen/oracle.rb +0 -116
  142. data/lib/lernen/sul.rb +0 -134
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lernen"
4
+ require "prism"
5
+ require "ripper"
6
+
7
+ shows_mermaid_diagram = false
8
+
9
+ # For reproducibility, we use a PRNG with a fixed seed.
10
+ seed = 41
11
+ random = Random.new(seed)
12
+
13
+ # `alphabet`, `call_alphabet`, and `return_alphabet` are arrays of pieces of words.
14
+ # The `alphabet` characters cause neither push nor pop,
15
+ # the `call_alphabet` characters cause push onto a stack, and
16
+ # the `return_alphabet` characters cause pop onto a stack.
17
+ alphabet = %w["a" :] # rubocop:disable Lint/PercentStringArray
18
+ call_alphabet = %w[(]
19
+ return_alphabet = %w[)]
20
+
21
+ # `oracle` specifies a kind of an equivalence oracle using on learning,
22
+ # and `oracle_params` is a paremeter object to it.
23
+ oracle = :random_well_matched_word
24
+ oracle_params = { max_words: 2000 }.freeze
25
+
26
+ # When `call_alphabet` and `return_alphabet` are specified to `Lernen.learn`,
27
+ # it infers a VPA instead of a automaton.
28
+
29
+ # Ripper VPA:
30
+ puts "Infer Ripper VPA..."
31
+ ripper_vpa = Lernen.learn(alphabet:, call_alphabet:, return_alphabet:, oracle:, oracle_params:, random:) do |word|
32
+ !Ripper.sexp(word.join).nil?
33
+ end
34
+
35
+ # Prism VPA:
36
+ puts "Infer Prism VPA..."
37
+ prism_vpa = Lernen.learn(alphabet:, call_alphabet:, return_alphabet:, oracle:, oracle_params:, random:) do |word|
38
+ Prism.parse(word.join).success?
39
+ end
40
+
41
+ if shows_mermaid_diagram
42
+ puts
43
+ puts "=== Ripper VPA... ==="
44
+ puts ripper_vpa.to_mermaid
45
+ puts "====================="
46
+ puts
47
+ puts "=== Prism VPA... ===="
48
+ puts prism_vpa.to_mermaid
49
+ puts "======================"
50
+ puts
51
+ end
52
+
53
+ puts "Check equivalence between Prism and Ripper VPAs..."
54
+ sep_word = Lernen::Automaton::VPA.find_separating_word(alphabet, call_alphabet, return_alphabet, ripper_vpa, prism_vpa)
55
+
56
+ if sep_word
57
+ sep_str = sep_word.join
58
+ puts "#{sep_str.inspect} is the separating word between Prism and Ripper VPAs."
59
+ puts " !Ripper.parse(#{sep_str.inspect}).nil? = #{!Ripper.parse(sep_str).nil?}"
60
+ puts "Prism.parse(#{sep_str.inspect}).success? = #{Prism.parse(sep_str).success?}"
61
+ else
62
+ puts "No separating word is found, so two VPAs are equivalent."
63
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lernen"
4
+ require "uri"
5
+
6
+ shows_mermaid_diagram = false
7
+
8
+ # Define validations using `URI.parse` and `URI` regexp.
9
+
10
+ def valid_and_http_url?(string)
11
+ uri = URI.parse(string)
12
+ uri.scheme == "http" || uri.scheme == "https"
13
+ rescue URI::Error
14
+ false
15
+ end
16
+
17
+ VALID_AND_HTTP_URL_REGEXP = /\A#{URI::DEFAULT_PARSER.make_regexp(%w[http https])}\z/
18
+ def new_valid_and_http_url?(string)
19
+ string.match?(VALID_AND_HTTP_URL_REGEXP)
20
+ end
21
+
22
+ # For reproducibility, we use a PRNG with a fixed seed.
23
+ seed = 41
24
+ random = Random.new(seed)
25
+
26
+ # `alphabet` is an array of pieces of words.
27
+ # Learning algorithm infers an automaton on this alphabet, so in this case,
28
+ # we specify some possible subwords in URLs to `alphabet`.
29
+ alphabet = %w[http https ftp example com foo 80 12 : / . ? = & # @ %]
30
+
31
+ # `oracle` specifies a kind of an equivalence oracle using on learning,
32
+ # and `oracle_params` is a paremeter object to it.
33
+ oracle = :random_word
34
+ oracle_params = { max_words: 2000 }.freeze
35
+
36
+ # Infer a automaton by calling the `Lernen.learn` method with the target program.
37
+
38
+ # `URI.parse` DFA:
39
+ puts "Infer `URI.parse` DFA..."
40
+ uri_parse_dfa = Lernen.learn(alphabet:, oracle:, oracle_params:, random:) do |word|
41
+ # `word.join` is necessary because `word` is an array of `alphabet` elements.
42
+ valid_and_http_url?(word.join)
43
+ end
44
+
45
+ # `URI` regexp DFA:
46
+ puts "Infer `URI` regexp DFA..."
47
+ uri_regexp_dfa = Lernen.learn(alphabet:, oracle:, oracle_params:, random:) do |word|
48
+ new_valid_and_http_url?(word.join)
49
+ end
50
+
51
+ if shows_mermaid_diagram
52
+ puts
53
+ puts "=== `URI.parse` DFA... ===="
54
+ puts uri_parse_dfa.to_mermaid
55
+ puts "==========================="
56
+ puts
57
+ puts "=== `URI` regexp DFA... ==="
58
+ puts uri_regexp_dfa.to_mermaid
59
+ puts "=========================="
60
+ puts
61
+ end
62
+
63
+ puts "Check equivalence between `URI.parse` and `URI` regexp DFAs..."
64
+ sep_word = Lernen::Automaton::DFA.find_separating_word(alphabet, uri_parse_dfa, uri_regexp_dfa)
65
+
66
+ if sep_word
67
+ sep_str = sep_word.join
68
+ puts "#{sep_str.inspect} is the separating word between `URI.parse` and `URI` regexp DFAs."
69
+ puts " valid_and_http_url?(#{sep_str.inspect}) = #{valid_and_http_url?(sep_str)}"
70
+ puts "new_valid_and_http_url?(#{sep_str.inspect}) = #{new_valid_and_http_url?(sep_str)}"
71
+ else
72
+ puts "No separating word is found, so two DFAs are equivalent."
73
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+ # rbs_inline: enabled
3
+
4
+ module Lernen
5
+ module Algorithm
6
+ module CexProcessor
7
+ # Acex represents an abstract counterexample.
8
+ #
9
+ # Note that this class is *abstract*. We should implement the following method:
10
+ #
11
+ # - `#compute_effect(index)`
12
+ class Acex
13
+ # @rbs @cache: Array[bool | nil]
14
+
15
+ #: (Integer size) -> void
16
+ def initialize(size)
17
+ @cache = Array.new(size)
18
+ end
19
+
20
+ #: () -> Integer
21
+ def size = @cache.size
22
+
23
+ #: (Integer index) -> bool
24
+ def effect(index)
25
+ eff = @cache[index]
26
+ eff = @cache[index] = compute_effect(index) if eff.nil?
27
+ eff
28
+ end
29
+
30
+ private
31
+
32
+ # rubocop:disable Lint/UnusedMethodArgument
33
+
34
+ #: (Integer index) -> bool
35
+ def compute_effect(index)
36
+ raise TypeError, "abstract method: `compute_effect`"
37
+ end
38
+
39
+ # rubocop:enable Lint/UnusedMethodArgument
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+ # rbs_inline: enabled
3
+
4
+ module Lernen
5
+ module Algorithm
6
+ module CexProcessor
7
+ # PrefixTransformerAcex is an implementation of `Acex` for classic prefix transformers.
8
+ #
9
+ # @rbs generic Conf
10
+ # @rbs generic In
11
+ # @rbs generic Out
12
+ class PrefixTransformerAcex < Acex
13
+ #: (
14
+ # Array[In] cex,
15
+ # System::SUL[In, Out] sul,
16
+ # Automaton::TransitionSystem[Conf, In, Out] hypothesis,
17
+ # ^(Conf) -> Array[In] conf_to_prefix
18
+ # ) -> void
19
+ def initialize(cex, sul, hypothesis, conf_to_prefix)
20
+ super(cex.size)
21
+
22
+ @cex = cex
23
+ @sul = sul
24
+ @hypothesis = hypothesis
25
+ @conf_to_prefix = conf_to_prefix
26
+
27
+ @hypothesis_output = @hypothesis.run(cex)[0].last
28
+ end
29
+
30
+ private
31
+
32
+ # @rbs override
33
+ def compute_effect(index)
34
+ prefix = @cex[0...index]
35
+ suffix = @cex[index...]
36
+
37
+ _, prefix_conf = @hypothesis.run(prefix)
38
+ @sul.query_last(@conf_to_prefix.call(prefix_conf) + suffix) == @hypothesis_output
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+ # rbs_inline: enabled
3
+
4
+ require "lernen/algorithm/cex_processor/acex"
5
+ require "lernen/algorithm/cex_processor/prefix_transformer_acex"
6
+
7
+ module Lernen
8
+ module Algorithm
9
+ # @rbs!
10
+ # type cex_processing_method = :linear | :binary | :exponential
11
+
12
+ # CexProcessor contains implementations of counterexample processing functions.
13
+ #
14
+ # A counterexample is a word that leads to the different result between
15
+ # a hypothesis automaton and a SUL (i.e., `hypothesis.run(cex)[0].last != sul.query_last(cex)`).
16
+ # Where `h[n] = conf_to_prefix[hypothesis.run(cex[0...n])[1]]`, there
17
+ # are some `n` (where `0 <= n < cex.size`) such that
18
+ # `sul.query_last(h[n] + cex[n...]) != sul.query_last(h[n + 1] + cex[n + 1...])`
19
+ # because `sul.query_last(cex) == sul.query_last(h[0] + cex[n...])` and
20
+ # `sul.query_last(h[cex.size] + cex[cex.size...]) == hypothesis.run(cex)[0].last`.
21
+ # Finding such a position `n` from `cex` is called "counterexample processing".
22
+ #
23
+ # The result `n` of counterexample processing has a good property for automata
24
+ # learning. Because `sul.query_last(h[n] + cex[n...]) != sul.query_last(h[n + 1] + cex[n + 1...])`,
25
+ # a prefix `h[n] + cex[n]` leads a different state than a state of `h[n + 1]`
26
+ # with a suffix `cex[n + 1...]`.
27
+ #
28
+ # For counterexample processing, we can use some searching approach such like
29
+ # linear or binrary search. Using binary search for counterexample processing,
30
+ # it is known as the Rivest-Schapire (RS) optimization typically. For the more
31
+ # detailed information, please refer [Isberner and Steffen (2014) "An Abstract
32
+ # Framework for Counterexample Analysis in Active Automata Learning"](https://proceedings.mlr.press/v34/isberner14a).
33
+ module CexProcessor
34
+ # Processes a given counterexample in the `cex_processing` way.
35
+ #
36
+ # It returns `n` such that `acex.effect(n) != acex.effect(n + 1)`.
37
+ #
38
+ #: (
39
+ # Acex acex,
40
+ # ?cex_processing: cex_processing_method
41
+ # ) -> Integer
42
+ def self.process(acex, cex_processing: :binary)
43
+ low = 0
44
+ high = acex.size - 1
45
+ case cex_processing
46
+ in :linear
47
+ process_linear(acex, low:, high:)
48
+ in :binary
49
+ process_binary(acex, low:, high:)
50
+ in :exponential
51
+ process_exponential(acex, low:, high:)
52
+ end
53
+ end
54
+
55
+ private
56
+
57
+ # Processes a given counterexample by linear search.
58
+ #
59
+ #: (Acex acex, low: Integer, high: Integer) -> Integer
60
+ def self.process_linear(acex, low:, high:)
61
+ prev_eff = acex.effect(low)
62
+ index = low + 1
63
+ while index <= high
64
+ eff = acex.effect(index)
65
+ return index - 1 if prev_eff != eff
66
+ index += 1
67
+ prev_eff = eff
68
+ end
69
+
70
+ raise ArgumentError
71
+ end
72
+
73
+ # Processes a given counterexample by binary search.
74
+ #
75
+ # It is known as the Rivest-Schapire (RS) optimization.
76
+ #
77
+ #: (Acex acex, low: Integer, high: Integer) -> Integer
78
+ def self.process_binary(acex, low:, high:)
79
+ low_eff = acex.effect(low)
80
+
81
+ while high - low > 1
82
+ mid = low + ((high - low) / 2)
83
+ mid_eff = acex.effect(mid)
84
+ if low_eff == mid_eff
85
+ low = mid
86
+ else
87
+ high = mid
88
+ end
89
+ end
90
+
91
+ low
92
+ end
93
+
94
+ # Processes a given counterexample by exponential seatch.
95
+ #
96
+ #: (Acex acex, low: Integer, high: Integer) -> Integer
97
+ def self.process_exponential(acex, low:, high:)
98
+ ofs = 1
99
+ low_eff = acex.effect(low)
100
+
101
+ while low + ofs < high
102
+ index = low + ofs
103
+ eff = acex.effect(index)
104
+ break if low_eff != eff
105
+ low = index
106
+ ofs *= 2
107
+ end
108
+
109
+ process_binary(acex, low:, high:)
110
+ end
111
+
112
+ private_class_method :process_linear, :process_binary, :process_exponential
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,207 @@
1
+ # frozen_string_literal: true
2
+ # rbs_inline: enabled
3
+
4
+ module Lernen
5
+ module Algorithm
6
+ module KearnsVazirani
7
+ # DiscriminationTree is an implementation of discrimination tree data structure.
8
+ #
9
+ # This data structure is used for Kearns-Vazirani algorithm.
10
+ #
11
+ # @rbs generic In -- Type for input alphabet
12
+ # @rbs generic Out -- Type for output values
13
+ class DiscriminationTree
14
+ # @rbs skip
15
+ Node = Data.define(:suffix, :branch)
16
+ # @rbs skip
17
+ Leaf = Data.define(:prefix)
18
+
19
+ # @rbs!
20
+ # type tree[In, Out] = Node[In, Out] | Leaf[In]
21
+ #
22
+ # class Node[In, Out] < Data
23
+ # attr_reader suffix: Array[In]
24
+ # attr_reader branch: Hash[Out, tree[In, Out]]
25
+ # def self.[]: [In, Out] (
26
+ # Array[In] suffix,
27
+ # Hash[Out, tree[In, Out]] branch
28
+ # ) -> Node[In, Out]
29
+ # end
30
+ #
31
+ # class Leaf[In] < Data
32
+ # attr_reader prefix: Array[In]
33
+ # def self.[]: [In] (Array[In] prefix) -> Leaf[In]
34
+ # end
35
+
36
+ # @rbs @alphabet: Array[In]
37
+ # @rbs @sul: System::SUL[In, Out]
38
+ # @rbs @automaton_type: :dfa | :mealy | :moore
39
+ # @rbs @cex_processing: cex_processing_method
40
+ # @rbs @path_hash: Hash[Array[In], Array[Out]]
41
+ # @rbs @root: Node[In, Out]
42
+
43
+ #: (
44
+ # Array[In] alphabet,
45
+ # System::SUL[In, Out] sul,
46
+ # cex: Array[In],
47
+ # automaton_type: :dfa | :mealy | :moore,
48
+ # cex_processing: cex_processing_method
49
+ # ) -> void
50
+ def initialize(alphabet, sul, cex:, automaton_type:, cex_processing:)
51
+ @alphabet = alphabet
52
+ @sul = sul
53
+ @automaton_type = automaton_type
54
+ @cex_processing = cex_processing
55
+
56
+ @path_hash = {}
57
+
58
+ case @automaton_type
59
+ in :dfa | :moore
60
+ @root = Node[[], {}]
61
+
62
+ empty_out = sul.query_empty
63
+ @root.branch[empty_out] = Leaf[[]]
64
+ @path_hash[[]] = [empty_out]
65
+
66
+ cex_out = sul.query_last(cex)
67
+ @root.branch[cex_out] = Leaf[cex]
68
+ @path_hash[cex] = [cex_out]
69
+ in :mealy
70
+ prefix = cex[0...-1]
71
+ suffix = [cex.last]
72
+ @root = Node[suffix, {}]
73
+
74
+ suffix_out = sul.query_last(suffix)
75
+ @root.branch[suffix_out] = Leaf[[]]
76
+ @path_hash[[]] = [suffix_out]
77
+
78
+ cex_out = sul.query_last(cex)
79
+ @root.branch[cex_out] = Leaf[prefix]
80
+ @path_hash[prefix] = [cex_out]
81
+ end
82
+ end
83
+
84
+ # Returns a prefix discriminated by `word`.
85
+ #
86
+ #: (Array[In] word) -> Array[In]
87
+ def sift(word)
88
+ node = @root
89
+ path = []
90
+
91
+ until node.is_a?(Leaf)
92
+ full_word = word + node.suffix
93
+
94
+ out = @sul.query_last(full_word)
95
+ path << out
96
+
97
+ unless node.branch.include?(out)
98
+ node.branch[out] = Leaf[word]
99
+ @path_hash[word] = path
100
+ end
101
+
102
+ node = node.branch[out] # steep:ignore
103
+ end
104
+
105
+ node.prefix # steep:ignore
106
+ end
107
+
108
+ # Constructs a hypothesis automaton from this discrimination tree.
109
+ #
110
+ #: () -> [Automaton::TransitionSystem[Integer, In, Out], Hash[Integer, Array[In]]]
111
+ def build_hypothesis
112
+ transition_function = {}
113
+
114
+ queue = []
115
+ prefix_to_state = {}
116
+ state_to_prefix = {}
117
+
118
+ queue << []
119
+ prefix_to_state[[]] = prefix_to_state.size
120
+ state_to_prefix[state_to_prefix.size] = []
121
+
122
+ until queue.empty?
123
+ prefix = queue.shift
124
+ state = prefix_to_state[prefix]
125
+ @alphabet.each do |input|
126
+ word = prefix + [input]
127
+ next_prefix = sift(word)
128
+
129
+ unless prefix_to_state.include?(next_prefix)
130
+ queue << next_prefix
131
+ prefix_to_state[next_prefix] = prefix_to_state.size
132
+ state_to_prefix[state_to_prefix.size] = next_prefix
133
+ end
134
+
135
+ next_state = prefix_to_state[next_prefix]
136
+ case @automaton_type
137
+ in :dfa | :moore
138
+ transition_function[[state, input]] = next_state
139
+ in :mealy
140
+ output = @sul.query_last(word)
141
+ transition_function[[state, input]] = [output, next_state]
142
+ end
143
+ end
144
+ end
145
+
146
+ automaton =
147
+ case @automaton_type
148
+ in :dfa
149
+ accept_states =
150
+ state_to_prefix.to_a.filter { |(_, prefix)| @path_hash[prefix][0] }.to_set { |(state, _)| state }
151
+ Automaton::DFA.new(0, accept_states, transition_function)
152
+ in :moore
153
+ outputs = state_to_prefix.transform_values { |prefix| @path_hash[prefix][0] }
154
+ Automaton::Moore.new(0, outputs, transition_function)
155
+ in :mealy
156
+ Automaton::Mealy.new(0, transition_function)
157
+ end
158
+
159
+ [automaton, state_to_prefix]
160
+ end
161
+
162
+ # Update this classification tree by the given `cex`.
163
+ #
164
+ #: (
165
+ # Array[In] cex,
166
+ # Automaton::TransitionSystem[Integer, In, Out] hypothesis,
167
+ # Hash[Integer, Array[In]] state_to_prefix
168
+ # ) -> void
169
+ def refine_hypothesis(cex, hypothesis, state_to_prefix)
170
+ state_to_prefix_lambda = ->(state) { state_to_prefix[state] }
171
+ acex = CexProcessor::PrefixTransformerAcex.new(cex, @sul, hypothesis, state_to_prefix_lambda)
172
+
173
+ n = CexProcessor.process(acex, cex_processing: @cex_processing)
174
+ old_prefix = cex[0...n]
175
+ new_input = cex[n]
176
+ new_suffix = cex[n + 1...]
177
+
178
+ _, old_state = hypothesis.run(old_prefix) # steep:ignore
179
+ _, replace_state = hypothesis.step(old_state, new_input)
180
+
181
+ new_prefix = state_to_prefix[old_state] + [new_input]
182
+ new_out = @sul.query_last(new_prefix + new_suffix) # steep:ignore
183
+
184
+ replace_prefix = state_to_prefix[replace_state]
185
+ replace_out = @sul.query_last(replace_prefix + new_suffix) # steep:ignore
186
+
187
+ replace_node_path = @path_hash[replace_prefix]
188
+ replace_node_parent = @root
189
+ replace_node = @root.branch[replace_node_path.first] # steep:ignore
190
+ replace_node_path[1..].each do |out| # steep:ignore
191
+ replace_node_parent = replace_node
192
+ replace_node = replace_node.branch[out] # steep:ignore
193
+ end
194
+
195
+ new_node = Node[new_suffix, {}] # steep:ignore
196
+ replace_node_parent.branch[replace_node_path.last] = new_node # steep:ignore
197
+
198
+ new_node.branch[new_out] = Leaf[new_prefix]
199
+ @path_hash[new_prefix] = replace_node_path + [new_out] # steep:ignore
200
+
201
+ new_node.branch[replace_out] = Leaf[replace_prefix]
202
+ @path_hash[replace_prefix] = replace_node_path + [replace_out] # steep:ignore
203
+ end
204
+ end
205
+ end
206
+ end
207
+ end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+ # rbs_inline: enabled
3
+
4
+ module Lernen
5
+ module Algorithm
6
+ module KearnsVazirani
7
+ # KearnzVaziraniLearner is an implementation of Kearnz-Vazirani algorithm.
8
+ #
9
+ # Kearns-Vazirani is introduced by [Kearns & Vazirani (1994) "An Introduction to
10
+ # Computational Learning Theory"](https://direct.mit.edu/books/monograph/2604/An-Introduction-to-Computational-Learning-Theory).
11
+ #
12
+ # @rbs generic In -- Type for input alphabet
13
+ # @rbs generic Out -- Type for output values
14
+ class KearnsVaziraniLearner < Learner #[In, Out]
15
+ # @rbs @alphabet: Array[In]
16
+ # @rbs @sul: System::SUL[In, Out]
17
+ # @rbs @oracle: Equiv::Oracle[In, Out]
18
+ # @rbs @automaton_type: :dfa | :moore | :mealy
19
+ # @rbs @cex_processing: cex_processing_method
20
+ # @rbs @tree: DiscriminationTree[In, Out] | nil
21
+
22
+ #: (
23
+ # Array[In] alphabet, System::SUL[In, Out] sul,
24
+ # automaton_type: :dfa | :moore | :mealy,
25
+ # ?cex_processing: cex_processing_method
26
+ # ) -> void
27
+ def initialize(alphabet, sul, automaton_type:, cex_processing: :binary)
28
+ super()
29
+
30
+ @alphabet = alphabet.dup
31
+ @sul = sul
32
+ @automaton_type = automaton_type
33
+ @cex_processing = cex_processing
34
+
35
+ @tree = nil
36
+ end
37
+
38
+ # @rbs override
39
+ def add_alphabet(input)
40
+ @alphabet << input
41
+ end
42
+
43
+ # @rbs override
44
+ def build_hypothesis
45
+ tree = @tree
46
+ return tree.build_hypothesis if tree
47
+
48
+ [build_first_hypothesis, { 0 => [] }]
49
+ end
50
+
51
+ # @rbs override
52
+ def refine_hypothesis(cex, hypothesis, state_to_prefix)
53
+ tree = @tree
54
+ if tree
55
+ tree.refine_hypothesis(cex, hypothesis, state_to_prefix)
56
+ return
57
+ end
58
+
59
+ @tree =
60
+ DiscriminationTree.new(
61
+ @alphabet,
62
+ @sul,
63
+ cex:,
64
+ automaton_type: @automaton_type,
65
+ cex_processing: @cex_processing
66
+ )
67
+ end
68
+
69
+ private
70
+
71
+ # Constructs the first hypothesis automaton.
72
+ #
73
+ #: () -> Automaton::TransitionSystem[Integer, In, Out]
74
+ def build_first_hypothesis
75
+ transition_function = {}
76
+ @alphabet.each do |input|
77
+ case @automaton_type
78
+ in :dfa | :moore
79
+ transition_function[[0, input]] = 0
80
+ in :mealy
81
+ out = @sul.query_last([input])
82
+ transition_function[[0, input]] = [out, 0]
83
+ end
84
+ end
85
+
86
+ case @automaton_type
87
+ in :dfa
88
+ accept_state_set = @sul.query_empty ? Set[0] : Set.new
89
+ Automaton::DFA.new(0, accept_state_set, transition_function)
90
+ in :moore
91
+ output_function = { 0 => @sul.query_empty }
92
+ Automaton::Moore.new(0, output_function, transition_function)
93
+ in :mealy
94
+ Automaton::Mealy.new(0, transition_function)
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+ # rbs_inline: enabled
3
+
4
+ require "lernen/algorithm/kearns_vazirani/discrimination_tree"
5
+ require "lernen/algorithm/kearns_vazirani/kearns_vazirani_learner"
6
+
7
+ module Lernen
8
+ module Algorithm
9
+ # KearnzVazirani provides an implementation of Kearnz-Vazirani algorithm.
10
+ #
11
+ # Kearns-Vazirani is introduced by [Kearns & Vazirani (1994) "An Introduction to
12
+ # Computational Learning Theory"](https://direct.mit.edu/books/monograph/2604/An-Introduction-to-Computational-Learning-Theory).
13
+ module KearnsVazirani
14
+ # Runs Kearns-Vazirani algorithm and returns an inferred automaton.
15
+ #
16
+ #: [In] (
17
+ # Array[In] alphabet, System::SUL[In, bool] sul, Equiv::Oracle[In, bool] oracle,
18
+ # automaton_type: :dfa,
19
+ # ?cex_processing: cex_processing_method, ?max_learning_rounds: Integer | nil
20
+ # ) -> Automaton::DFA[In]
21
+ #: [In, Out] (
22
+ # Array[In] alphabet, System::SUL[In, Out] sul, Equiv::Oracle[In, Out] oracle,
23
+ # automaton_type: :mealy,
24
+ # ?cex_processing: cex_processing_method, ?max_learning_rounds: Integer | nil
25
+ # ) -> Automaton::Mealy[In, Out]
26
+ #: [In, Out] (
27
+ # Array[In] alphabet, System::SUL[In, Out] sul, Equiv::Oracle[In, Out] oracle,
28
+ # automaton_type: :moore,
29
+ # ?cex_processing: cex_processing_method, ?max_learning_rounds: Integer | nil
30
+ # ) -> Automaton::Moore[In, Out]
31
+ def self.learn( # steep:ignore
32
+ alphabet,
33
+ sul,
34
+ oracle,
35
+ automaton_type:,
36
+ cex_processing: :binary,
37
+ max_learning_rounds: nil
38
+ )
39
+ learner = KearnsVaziraniLearner.new(alphabet, sul, automaton_type:, cex_processing:)
40
+ learner.learn(oracle, max_learning_rounds:)
41
+ end
42
+ end
43
+ end
44
+ end