lernen 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +18 -0
  3. data/README.md +531 -28
  4. data/Rakefile +29 -7
  5. data/Steepfile +14 -0
  6. data/examples/ripper_prism.rb +63 -0
  7. data/examples/uri_parse_regexp.rb +73 -0
  8. data/lib/lernen/algorithm/cex_processor/acex.rb +43 -0
  9. data/lib/lernen/algorithm/cex_processor/prefix_transformer_acex.rb +43 -0
  10. data/lib/lernen/algorithm/cex_processor.rb +115 -0
  11. data/lib/lernen/algorithm/kearns_vazirani/discrimination_tree.rb +207 -0
  12. data/lib/lernen/algorithm/kearns_vazirani/kearns_vazirani_learner.rb +100 -0
  13. data/lib/lernen/algorithm/kearns_vazirani.rb +44 -0
  14. data/lib/lernen/algorithm/kearns_vazirani_vpa/discrimination_tree_vpa.rb +246 -0
  15. data/lib/lernen/algorithm/kearns_vazirani_vpa/kearns_vazirani_vpa_learner.rb +89 -0
  16. data/lib/lernen/algorithm/kearns_vazirani_vpa.rb +35 -0
  17. data/lib/lernen/algorithm/learner.rb +82 -0
  18. data/lib/lernen/algorithm/lsharp/lsharp_learner.rb +367 -0
  19. data/lib/lernen/algorithm/lsharp/observation_tree.rb +115 -0
  20. data/lib/lernen/algorithm/lsharp.rb +43 -0
  21. data/lib/lernen/algorithm/lstar/lstar_learner.rb +49 -0
  22. data/lib/lernen/algorithm/lstar/observation_table.rb +214 -0
  23. data/lib/lernen/algorithm/lstar.rb +49 -0
  24. data/lib/lernen/algorithm/procedural/atr_manager.rb +200 -0
  25. data/lib/lernen/algorithm/procedural/procedural_learner.rb +223 -0
  26. data/lib/lernen/algorithm/procedural/procedural_sul.rb +47 -0
  27. data/lib/lernen/algorithm/procedural/return_indices_acex.rb +58 -0
  28. data/lib/lernen/algorithm/procedural.rb +57 -0
  29. data/lib/lernen/algorithm.rb +19 -0
  30. data/lib/lernen/automaton/dfa.rb +204 -0
  31. data/lib/lernen/automaton/mealy.rb +108 -0
  32. data/lib/lernen/automaton/moore.rb +122 -0
  33. data/lib/lernen/automaton/moore_like.rb +83 -0
  34. data/lib/lernen/automaton/proc_util.rb +93 -0
  35. data/lib/lernen/automaton/spa.rb +368 -0
  36. data/lib/lernen/automaton/transition_system.rb +209 -0
  37. data/lib/lernen/automaton/vpa.rb +300 -0
  38. data/lib/lernen/automaton.rb +19 -92
  39. data/lib/lernen/equiv/combined_oracle.rb +57 -0
  40. data/lib/lernen/equiv/exhaustive_search_oracle.rb +60 -0
  41. data/lib/lernen/equiv/moore_like_simulator_oracle.rb +36 -0
  42. data/lib/lernen/equiv/oracle.rb +109 -0
  43. data/lib/lernen/equiv/random_walk_oracle.rb +69 -0
  44. data/lib/lernen/equiv/random_well_matched_word_oracle.rb +139 -0
  45. data/lib/lernen/equiv/random_word_oracle.rb +71 -0
  46. data/lib/lernen/equiv/spa_simulator_oracle.rb +39 -0
  47. data/lib/lernen/equiv/test_words_oracle.rb +42 -0
  48. data/lib/lernen/equiv/transition_system_simulator_oracle.rb +36 -0
  49. data/lib/lernen/equiv/vpa_simulator_oracle.rb +48 -0
  50. data/lib/lernen/equiv.rb +25 -0
  51. data/lib/lernen/graph.rb +215 -0
  52. data/lib/lernen/system/block_sul.rb +41 -0
  53. data/lib/lernen/system/moore_like_simulator.rb +45 -0
  54. data/lib/lernen/system/moore_like_sul.rb +33 -0
  55. data/lib/lernen/system/sul.rb +126 -0
  56. data/lib/lernen/system/transition_system_simulator.rb +40 -0
  57. data/lib/lernen/system.rb +72 -0
  58. data/lib/lernen/version.rb +2 -1
  59. data/lib/lernen.rb +322 -13
  60. data/rbs_collection.lock.yaml +16 -0
  61. data/rbs_collection.yaml +14 -0
  62. data/renovate.json +6 -0
  63. data/sig/generated/lernen/algorithm/cex_processor/acex.rbs +30 -0
  64. data/sig/generated/lernen/algorithm/cex_processor/prefix_transformer_acex.rbs +27 -0
  65. data/sig/generated/lernen/algorithm/cex_processor.rbs +59 -0
  66. data/sig/generated/lernen/algorithm/kearns_vazirani/discrimination_tree.rbs +68 -0
  67. data/sig/generated/lernen/algorithm/kearns_vazirani/kearns_vazirani_learner.rbs +51 -0
  68. data/sig/generated/lernen/algorithm/kearns_vazirani.rbs +32 -0
  69. data/sig/generated/lernen/algorithm/kearns_vazirani_vpa/discrimination_tree_vpa.rbs +73 -0
  70. data/sig/generated/lernen/algorithm/kearns_vazirani_vpa/kearns_vazirani_vpa_learner.rbs +51 -0
  71. data/sig/generated/lernen/algorithm/kearns_vazirani_vpa.rbs +20 -0
  72. data/sig/generated/lernen/algorithm/learner.rbs +53 -0
  73. data/sig/generated/lernen/algorithm/lsharp/lsharp_learner.rbs +103 -0
  74. data/sig/generated/lernen/algorithm/lsharp/observation_tree.rbs +53 -0
  75. data/sig/generated/lernen/algorithm/lsharp.rbs +38 -0
  76. data/sig/generated/lernen/algorithm/lstar/lstar_learner.rbs +38 -0
  77. data/sig/generated/lernen/algorithm/lstar/observation_table.rbs +79 -0
  78. data/sig/generated/lernen/algorithm/lstar.rbs +37 -0
  79. data/sig/generated/lernen/algorithm/procedural/atr_manager.rbs +80 -0
  80. data/sig/generated/lernen/algorithm/procedural/procedural_learner.rbs +79 -0
  81. data/sig/generated/lernen/algorithm/procedural/procedural_sul.rbs +36 -0
  82. data/sig/generated/lernen/algorithm/procedural/return_indices_acex.rbs +33 -0
  83. data/sig/generated/lernen/algorithm/procedural.rbs +27 -0
  84. data/sig/generated/lernen/algorithm.rbs +10 -0
  85. data/sig/generated/lernen/automaton/dfa.rbs +93 -0
  86. data/sig/generated/lernen/automaton/mealy.rbs +61 -0
  87. data/sig/generated/lernen/automaton/moore.rbs +69 -0
  88. data/sig/generated/lernen/automaton/moore_like.rbs +63 -0
  89. data/sig/generated/lernen/automaton/proc_util.rbs +38 -0
  90. data/sig/generated/lernen/automaton/spa.rbs +125 -0
  91. data/sig/generated/lernen/automaton/transition_system.rbs +108 -0
  92. data/sig/generated/lernen/automaton/vpa.rbs +109 -0
  93. data/sig/generated/lernen/automaton.rbs +15 -0
  94. data/sig/generated/lernen/equiv/combined_oracle.rbs +27 -0
  95. data/sig/generated/lernen/equiv/exhaustive_search_oracle.rbs +38 -0
  96. data/sig/generated/lernen/equiv/moore_like_simulator_oracle.rbs +27 -0
  97. data/sig/generated/lernen/equiv/oracle.rbs +75 -0
  98. data/sig/generated/lernen/equiv/random_walk_oracle.rbs +41 -0
  99. data/sig/generated/lernen/equiv/random_well_matched_word_oracle.rbs +70 -0
  100. data/sig/generated/lernen/equiv/random_word_oracle.rbs +45 -0
  101. data/sig/generated/lernen/equiv/spa_simulator_oracle.rbs +30 -0
  102. data/sig/generated/lernen/equiv/test_words_oracle.rbs +20 -0
  103. data/sig/generated/lernen/equiv/transition_system_simulator_oracle.rbs +27 -0
  104. data/sig/generated/lernen/equiv/vpa_simulator_oracle.rbs +33 -0
  105. data/sig/generated/lernen/equiv.rbs +11 -0
  106. data/sig/generated/lernen/graph.rbs +80 -0
  107. data/sig/generated/lernen/system/block_sul.rbs +29 -0
  108. data/sig/generated/lernen/system/moore_like_simulator.rbs +31 -0
  109. data/sig/generated/lernen/system/moore_like_sul.rbs +28 -0
  110. data/sig/generated/lernen/system/sul.rbs +87 -0
  111. data/sig/generated/lernen/system/transition_system_simulator.rbs +28 -0
  112. data/sig/generated/lernen/system.rbs +62 -0
  113. data/sig/generated/lernen/version.rbs +6 -0
  114. data/sig/generated/lernen.rbs +214 -0
  115. data/sig-test/generated/test/example_test.rbs +14 -0
  116. data/sig-test/generated/test/lernen/algorithm/kearns_vazirani_test.rbs +16 -0
  117. data/sig-test/generated/test/lernen/algorithm/kearns_vazirani_vpa_test.rbs +10 -0
  118. data/sig-test/generated/test/lernen/algorithm/lsharp_test.rbs +16 -0
  119. data/sig-test/generated/test/lernen/algorithm/lstar_test.rbs +16 -0
  120. data/sig-test/generated/test/lernen/algorithm/procedural_test.rbs +10 -0
  121. data/sig-test/generated/test/lernen/automaton/dfa_test.rbs +19 -0
  122. data/sig-test/generated/test/lernen/automaton/mealy_test.rbs +19 -0
  123. data/sig-test/generated/test/lernen/automaton/moore_test.rbs +19 -0
  124. data/sig-test/generated/test/lernen/automaton/proc_util_test.rbs +19 -0
  125. data/sig-test/generated/test/lernen/automaton/spa_test.rbs +19 -0
  126. data/sig-test/generated/test/lernen/automaton/vpa_test.rbs +19 -0
  127. data/sig-test/generated/test/lernen/equiv/exhaustive_search_oracle_test.rbs +10 -0
  128. data/sig-test/generated/test/lernen/equiv/random_walk_oracle_test.rbs +10 -0
  129. data/sig-test/generated/test/lernen/equiv/random_word_oracle_test.rbs +10 -0
  130. data/sig-test/generated/test/lernen/system/block_sul_test.rbs +16 -0
  131. data/sig-test/generated/test/lernen/system/moore_like_simulator_test.rbs +16 -0
  132. data/sig-test/generated/test/lernen/system/transition_system_simulator_test.rbs +13 -0
  133. data/sig-test/generated/test/lernen/system_test.rbs +11 -0
  134. data/sig-test/generated/test/lernen_test.rbs +13 -0
  135. metadata +131 -11
  136. data/.yardopts +0 -3
  137. data/lib/lernen/cex_processor.rb +0 -61
  138. data/lib/lernen/kearns_vazirani.rb +0 -199
  139. data/lib/lernen/lsharp.rb +0 -335
  140. data/lib/lernen/lstar.rb +0 -169
  141. data/lib/lernen/oracle.rb +0 -116
  142. data/lib/lernen/sul.rb +0 -134
@@ -0,0 +1,214 @@
1
+ # frozen_string_literal: true
2
+ # rbs_inline: enabled
3
+
4
+ module Lernen
5
+ module Algorithm
6
+ module LStar
7
+ # ObservationTable is an implementation of observation tabel data structure.
8
+ #
9
+ # This data structure is used for Angluin's L* algorithm.
10
+ #
11
+ # @rbs generic In -- Type for input alphabet
12
+ # @rbs generic Out -- Type for output values
13
+ class ObservationTable
14
+ # @rbs @alphabet: Array[In]
15
+ # @rbs @sul: System::SUL[In, Out]
16
+ # @rbs @automaton_type: Automaton::transition_system_type
17
+ # @rbs @cex_processing: cex_processing_method | nil
18
+ # @rbs @prefixes: Array[Array[In]]
19
+ # @rbs @suffixes: Array[Array[In]]
20
+ # @rbs @table: Hash[Array[In], Array[Out]]
21
+
22
+ #: (
23
+ # Array[In] alphabet,
24
+ # System::SUL[In, Out] sul,
25
+ # automaton_type: :dfa | :moore | :mealy,
26
+ # cex_processing: cex_processing_method | nil
27
+ # ) -> void
28
+ def initialize(alphabet, sul, automaton_type:, cex_processing:)
29
+ @alphabet = alphabet
30
+ @sul = sul
31
+ @automaton_type = automaton_type
32
+ @cex_processing = cex_processing
33
+
34
+ @prefixes = [[]]
35
+ @suffixes = []
36
+ @table = {}
37
+
38
+ case @automaton_type
39
+ in :dfa | :moore
40
+ @suffixes << []
41
+ in :mealy
42
+ @alphabet.each { |a| @suffixes << [a] }
43
+ end
44
+ end
45
+
46
+ # Constructs a hypothesis automaton from this observation table.
47
+ #
48
+ #: () -> [Automaton::TransitionSystem[Integer, In, Out], Hash[Integer, Array[In]]]
49
+ def build_hypothesis
50
+ make_consistent_and_closed
51
+
52
+ state_to_prefix = @prefixes.each_with_index.to_h { |prefix, state| [state, prefix] }
53
+ row_to_state = @prefixes.each_with_index.to_h { |prefix, state| [@table[prefix], state] }
54
+
55
+ transition_function = {}
56
+ @prefixes.each_with_index do |prefix, state|
57
+ @alphabet.each_with_index do |input, index|
58
+ case @automaton_type
59
+ in :moore | :dfa
60
+ transition_function[[state, input]] = row_to_state[@table[prefix + [input]]]
61
+ in :mealy
62
+ transition_function[[state, input]] = [@table[prefix][index], row_to_state[@table[prefix + [input]]]]
63
+ end
64
+ end
65
+ end
66
+
67
+ automaton =
68
+ case @automaton_type
69
+ in :dfa
70
+ accept_state_set =
71
+ state_to_prefix.to_a.filter { |(_, prefix)| @table[prefix][0] }.to_set { |(state, _)| state }
72
+ Automaton::DFA.new(0, accept_state_set, transition_function)
73
+ in :moore
74
+ outputs = state_to_prefix.transform_values { |prefix| @table[prefix][0] }
75
+ Automaton::Moore.new(0, outputs, transition_function)
76
+ in :mealy
77
+ Automaton::Mealy.new(0, transition_function)
78
+ end
79
+
80
+ [automaton, state_to_prefix]
81
+ end
82
+
83
+ # Updates this observation table by the given `cex`.
84
+ #
85
+ #: (
86
+ # Array[In] cex,
87
+ # Automaton::TransitionSystem[Integer, In, Out] hypothesis,
88
+ # Hash[Integer, Array[In]] state_to_prefix
89
+ # ) -> void
90
+ def refine_hypothesis(cex, hypothesis, state_to_prefix)
91
+ cex_processing = @cex_processing
92
+ if cex_processing
93
+ state_to_prefix_lambda = ->(state) { state_to_prefix[state] }
94
+
95
+ acex = CexProcessor::PrefixTransformerAcex.new(cex, @sul, hypothesis, state_to_prefix_lambda)
96
+ n = CexProcessor.process(acex, cex_processing:)
97
+ old_prefix = cex[0...n]
98
+ new_input = cex[n]
99
+ new_suffix = cex[n + 1...]
100
+
101
+ _, old_state = hypothesis.run(old_prefix) # steep:ignore
102
+ new_prefix = state_to_prefix[old_state] + [new_input]
103
+ @prefixes << new_prefix unless @prefixes.include?(new_prefix)
104
+ @suffixes << new_suffix unless @suffixes.include?(new_suffix) # steep:ignore
105
+ else
106
+ cex_prefixes = (0..cex.size).map { |n| cex[0...n] }
107
+ cex_prefixes.each do |prefix|
108
+ @prefixes << prefix unless @prefixes.include?(prefix) # steep:ignore
109
+ end
110
+ end
111
+ end
112
+
113
+ private
114
+
115
+ # Finds new prefixes to close.
116
+ #
117
+ #: () -> (Array[Array[In]] | nil)
118
+ def find_prefixes_to_close
119
+ prefixes_to_close = []
120
+ unclosed_row_set = Set.new
121
+
122
+ prefix_row_set = @prefixes.to_set { |prefix| @table[prefix] }
123
+
124
+ @prefixes.each do |prefix|
125
+ @alphabet.each do |input|
126
+ new_prefix = prefix + [input]
127
+ row = @table[new_prefix]
128
+ unless prefix_row_set.include?(row) || unclosed_row_set.include?(row)
129
+ prefixes_to_close << new_prefix
130
+ unclosed_row_set << row
131
+ end
132
+ end
133
+ end
134
+
135
+ return if prefixes_to_close.empty?
136
+
137
+ prefixes_to_close.sort_by!(&:size).reverse!
138
+ end
139
+
140
+ # Checks consistency and returns a new suffix to add if this observation table
141
+ # is inconsistent.
142
+ #
143
+ #: () -> (Array[In] | nil)
144
+ def check_consistency
145
+ @prefixes.combination(2) do |(prefix1, prefix2)|
146
+ next unless @table[prefix1] == @table[prefix2] # steep:ignore
147
+
148
+ @alphabet.each do |input|
149
+ new_prefix1 = prefix1 + [input] # steep:ignore
150
+ new_prefix2 = prefix2 + [input] # steep:ignore
151
+ next if @table[new_prefix1] == @table[new_prefix2]
152
+
153
+ @suffixes.each_with_index do |suffix, index|
154
+ next if @table[new_prefix1][index] == @table[new_prefix2][index] # steep:ignore
155
+
156
+ return [input] + suffix
157
+ end
158
+ end
159
+ end
160
+
161
+ nil
162
+ end
163
+
164
+ # Updates rows of this observation table.
165
+ #
166
+ #: () -> void
167
+ def update_table
168
+ @prefixes.each do |prefix|
169
+ update_table_row(prefix)
170
+
171
+ @alphabet.each { |input| update_table_row(prefix + [input]) }
172
+ end
173
+ end
174
+
175
+ # Updates the row for the given `prefix` of this observation table.
176
+ #
177
+ #: (Array[In] prefix) -> void
178
+ def update_table_row(prefix)
179
+ @table[prefix] ||= []
180
+ return if @table[prefix].size == @suffixes.size
181
+
182
+ @suffixes[@table[prefix].size..].each do |suffix| # steep:ignore
183
+ word = prefix + suffix
184
+ output = word.empty? && (sul = @sul).is_a?(System::MooreLikeSUL) ? sul.query_empty : @sul.query_last(word)
185
+ @table[prefix] << output
186
+ end
187
+ end
188
+
189
+ # Update this table to be consistent and closed.
190
+ #
191
+ #: () -> void
192
+ def make_consistent_and_closed
193
+ update_table
194
+
195
+ if @cex_processing.nil?
196
+ new_suffix = check_consistency
197
+ until new_suffix.nil?
198
+ @suffixes << new_suffix
199
+ update_table
200
+ new_suffix = check_consistency
201
+ end
202
+ end
203
+
204
+ new_prefixes = find_prefixes_to_close
205
+ until new_prefixes.nil?
206
+ @prefixes.push(*new_prefixes)
207
+ update_table
208
+ new_prefixes = find_prefixes_to_close
209
+ end
210
+ end
211
+ end
212
+ end
213
+ end
214
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+ # rbs_inline: enabled
3
+
4
+ require "lernen/algorithm/lstar/observation_table"
5
+ require "lernen/algorithm/lstar/lstar_learner"
6
+
7
+ module Lernen
8
+ module Algorithm
9
+ # LStar provides an implementation of Angluin's L* algorithm.
10
+ #
11
+ # Angluin's L* is introduced by [Angluin (1987) "Learning Regular Sets from
12
+ # Queries and Counterexamples"](https://dl.acm.org/doi/10.1016/0890-5401%2887%2990052-6).
13
+ module LStar
14
+ # Runs Angluin's L* algorithm and returns an inferred automaton.
15
+ #
16
+ # `cex_processing` is used for determining a method of counterexample processing.
17
+ # In additional to predefined `cex_processing_method`, we can specify `nil` as `cex_processing`.
18
+ # When `cex_processing: nil` is specified, it uses the original counterexample processing
19
+ # described in the Angluin paper.
20
+ #
21
+ #: [In] (
22
+ # Array[In] alphabet, System::SUL[In, bool] sul, Equiv::Oracle[In, bool] oracle,
23
+ # automaton_type: :dfa,
24
+ # ?cex_processing: cex_processing_method | nil, ?max_learning_rounds: Integer | nil
25
+ # ) -> Automaton::DFA[In]
26
+ #: [In, Out] (
27
+ # Array[In] alphabet, System::SUL[In, Out] sul, Equiv::Oracle[In, Out] oracle,
28
+ # automaton_type: :mealy,
29
+ # ?cex_processing: cex_processing_method | nil, ?max_learning_rounds: Integer | nil
30
+ # ) -> Automaton::Mealy[In, Out]
31
+ #: [In, Out] (
32
+ # Array[In] alphabet, System::SUL[In, Out] sul, Equiv::Oracle[In, Out] oracle,
33
+ # automaton_type: :moore,
34
+ # ?cex_processing: cex_processing_method | nil, ?max_learning_rounds: Integer | nil
35
+ # ) -> Automaton::Moore[In, Out]
36
+ def self.learn( # steep:ignore
37
+ alphabet,
38
+ sul,
39
+ oracle,
40
+ automaton_type:,
41
+ cex_processing: :binary,
42
+ max_learning_rounds: nil
43
+ )
44
+ learner = LStarLearner.new(alphabet, sul, automaton_type:, cex_processing:)
45
+ learner.learn(oracle, max_learning_rounds:)
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,200 @@
1
+ # frozen_string_literal: true
2
+ # rbs_inline: enabled
3
+
4
+ module Lernen
5
+ module Algorithm
6
+ module Procedural
7
+ # ATRManager is a collection to manage access, terminating, and return sequences.
8
+ #
9
+ # @rbs generic In -- Type for input alphabet
10
+ # @rbs generic Call -- Type for call alphabet
11
+ # @rbs generic Return -- Type for return alphabet
12
+ class ATRManager
13
+ # @rbs @alphabet: Array[In]
14
+ # @rbs @call_alphabet_set: Set[Call]
15
+ # @rbs @return_input: Return
16
+ # @rbs @scan_procs: bool
17
+
18
+ # @rbs @proc_to_access_sequence: Hash[Call, Array[In | Call | Return]]
19
+ # @rbs @proc_to_terminating_sequence: Hash[Call, Array[In | Call | Return]]
20
+ # @rbs @proc_to_return_sequence: Hash[Call, Array[In | Call | Return]]
21
+
22
+ #: (
23
+ # Array[In] alphabet,
24
+ # Array[Call] call_alphabet,
25
+ # Return return_input,
26
+ # ?scan_procs: bool
27
+ # ) -> void
28
+ def initialize(alphabet, call_alphabet, return_input, scan_procs: true)
29
+ @alphabet = alphabet
30
+ @call_alphabet_set = call_alphabet.to_set
31
+ @return_input = return_input
32
+ @scan_procs = scan_procs
33
+
34
+ @proc_to_access_sequence = {}
35
+ @proc_to_terminating_sequence = {}
36
+ @proc_to_return_sequence = {}
37
+ end
38
+
39
+ attr_reader :proc_to_access_sequence #: Hash[Call, Array[In | Call | Return]]
40
+ attr_reader :proc_to_terminating_sequence #: Hash[Call, Array[In | Call | Return]]
41
+ attr_reader :proc_to_return_sequence #: Hash[Call, Array[In | Call | Return]]
42
+
43
+ #: (Array[In | Call | Return] cex) -> Array[Call]
44
+ def scan_positive_cex(cex)
45
+ new_procs = extract_potential_terminating_sequences(cex)
46
+ extract_potential_access_and_return_sequences(cex)
47
+ new_procs
48
+ end
49
+
50
+ #: (
51
+ # Hash[Call, Automaton::DFA[In | Call]] procs,
52
+ # Hash[Call, Hash[Integer, Array[In | Call]]] proc_to_state_to_prefix
53
+ # ) -> void
54
+ def scan_procs(proc_to_dfa, proc_to_state_to_prefix)
55
+ return unless @scan_procs
56
+
57
+ updated = false
58
+ stable = false
59
+ until stable
60
+ stable = true
61
+ proc_to_dfa.each do |proc, dfa|
62
+ current_terminating_sequence = @proc_to_terminating_sequence[proc]
63
+ state_to_prefix = proc_to_state_to_prefix[proc]
64
+ hypothesis_terminating_sequence =
65
+ dfa.accept_state_set.to_a.map { |accept_state| expand(state_to_prefix[accept_state]) }.min_by(&:size)
66
+
67
+ next unless hypothesis_terminating_sequence
68
+ next if current_terminating_sequence.size <= hypothesis_terminating_sequence.size
69
+
70
+ updated = true
71
+ stable = false
72
+ @proc_to_terminating_sequence[proc] = hypothesis_terminating_sequence
73
+ end
74
+ end
75
+
76
+ return unless updated
77
+
78
+ optimize_sequences(@proc_to_terminating_sequence)
79
+ optimize_sequences(@proc_to_access_sequence)
80
+ optimize_sequences(@proc_to_return_sequence)
81
+ end
82
+
83
+ #: (Call proc, Array[In | Call] word) -> Array[In | Call | Return]
84
+ def embed(proc, word)
85
+ access_sequence = @proc_to_access_sequence[proc]
86
+ expanded_word = expand(word)
87
+ return_sequence = @proc_to_return_sequence[proc]
88
+ [*access_sequence, *expanded_word, *return_sequence]
89
+ end
90
+
91
+ #: [In, Call, Return] (Array[In | Call] word) -> Array[In | Call | Return]
92
+ def expand(word)
93
+ Automaton::ProcUtil.expand(@return_input, word, @proc_to_terminating_sequence)
94
+ end
95
+
96
+ #: [In, Call, Return] (Array[In | Call] word) -> Array[In | Call | Return]
97
+ def project(word)
98
+ Automaton::ProcUtil.project(@call_alphabet_set, @return_input, word)
99
+ end
100
+
101
+ #: (Array[In | Call | Return] word, Integer index) -> Integer
102
+ def find_call_index(word, index) # steep:ignore
103
+ Automaton::ProcUtil.find_call_index(@call_alphabet_set, @return_input, word, index)
104
+ end
105
+
106
+ #: (Array[In | Call | Return] word, Integer index) -> Integer
107
+ def find_return_index(word, index) # steep:ignore
108
+ Automaton::ProcUtil.find_return_index(@call_alphabet_set, @return_input, word, index)
109
+ end
110
+
111
+ private
112
+
113
+ #: (Array[In | Call | Return] cex) -> Array[Call]
114
+ def extract_potential_terminating_sequences(cex)
115
+ new_procs = []
116
+ cex.each_with_index do |input, index|
117
+ next unless @call_alphabet_set.include?(input) # steep:ignore
118
+
119
+ return_index = find_return_index(cex, index + 1)
120
+ potential_terminating_sequence = cex[index + 1...return_index]
121
+ current_terminating_sequence = @proc_to_terminating_sequence[input] # steep:ignore
122
+
123
+ if current_terminating_sequence.nil?
124
+ new_procs << input
125
+ @proc_to_terminating_sequence[input] = potential_terminating_sequence # steep:ignore
126
+ elsif potential_terminating_sequence.size < current_terminating_sequence.size # steep:ignore
127
+ @proc_to_terminating_sequence[input] = potential_terminating_sequence # steep:ignore
128
+ end
129
+ end
130
+ new_procs
131
+ end
132
+
133
+ #: (Array[In | Call | Return] cex) -> void
134
+ def extract_potential_access_and_return_sequences(cex)
135
+ access_sequence = []
136
+ return_sequence = minify_well_matched(cex)
137
+
138
+ cex.each_with_index do |input, index|
139
+ access_sequence << input
140
+
141
+ if @call_alphabet_set.include?(input) # steep:ignore
142
+ return_index = find_return_index(return_sequence, 1)
143
+ potential_return_sequence = return_sequence[return_index...]
144
+ current_access_sequence = @proc_to_access_sequence[input] # steep:ignore
145
+ current_return_sequence = @proc_to_return_sequence[input] # steep:ignore
146
+ if current_access_sequence.nil? || current_return_sequence.nil? ||
147
+ (access_sequence.size + potential_return_sequence.size) < # steep:ignore
148
+ (current_access_sequence.size + current_return_sequence.size) # steep:ignore
149
+ @proc_to_access_sequence[input] = access_sequence.dup # steep:ignore
150
+ @proc_to_return_sequence[input] = potential_return_sequence # steep:ignore
151
+ end
152
+ elsif input == @return_input # steep:ignore
153
+ call_index = find_call_index(access_sequence, access_sequence.size - 1)
154
+ proc = access_sequence[call_index]
155
+ access_sequence.slice!(call_index + 1...access_sequence.size - 1)
156
+ access_sequence.unshift(*@proc_to_terminating_sequence[proc])
157
+ end
158
+
159
+ return_sequence.shift
160
+
161
+ next unless @call_alphabet_set.include?(input) # steep:ignore
162
+
163
+ rs_return_index = find_return_index(return_sequence, 0)
164
+ cex_return_index = find_return_index(cex, index + 1)
165
+ return_sequence.slice!(0...rs_return_index)
166
+ return_sequence.unshift(*minify_well_matched(cex[index + 1...cex_return_index])) # steep:ignore
167
+ end
168
+ end
169
+
170
+ #: (Array[In | Call | Return] word) -> Array[In | Call | Return]
171
+ def minify_well_matched(word)
172
+ minified_word = []
173
+ index = 0
174
+ while index < word.size
175
+ input = word[index]
176
+ minified_word << input
177
+ if @call_alphabet_set.include?(input) # steep:ignore
178
+ return_index = find_return_index(word, index + 1)
179
+ if return_index
180
+ minified_word.concat(@proc_to_terminating_sequence[input]) # steep:ignore
181
+ minified_word << @return_input
182
+ index = return_index
183
+ end
184
+ end
185
+ index += 1
186
+ end
187
+ minified_word
188
+ end
189
+
190
+ #: (Hash[Call, Array[In | Call | Return]]) -> void
191
+ def optimize_sequences(proc_to_sequence)
192
+ proc_to_sequence.each do |proc, sequence|
193
+ minified_sequence = minify_well_matched(sequence)
194
+ proc_to_sequence[proc] = minified_sequence if minified_sequence.size < sequence.size
195
+ end
196
+ end
197
+ end
198
+ end
199
+ end
200
+ end
@@ -0,0 +1,223 @@
1
+ # frozen_string_literal: true
2
+ # rbs_inline: enabled
3
+
4
+ module Lernen
5
+ module Algorithm
6
+ module Procedural
7
+ # ProceduralLearner is an implementation of the learning algorithm for SPA.
8
+ #
9
+ # This algorithm is described in [Frohme & Seffen (2021) "Compositional
10
+ # Learning of Mutually Recursive Procedural Systems"](https://link.springer.com/article/10.1007/s10009-021-00634-y).
11
+ #
12
+ # @rbs generic In -- Type for input alphabet
13
+ # @rbs generic Call -- Type for call alphabet
14
+ # @rbs generic Return -- Type for return alphabet
15
+ class ProceduralLearner < Learner #[In | Call | Return, bool]
16
+ # @rbs @alphabet: Array[In]
17
+ # @rbs @call_alphabet: Array[Call]
18
+ # @rbs @return_input: Return
19
+ # @rbs @sul: System::SUL[In | Call | Return, bool]
20
+ # @rbs @algorithm: :lstar | :kearns_vazirani | :lsharp
21
+ # @rbs @algorithm_params: Hash[Symbol, untyped]
22
+ # @rbs @cex_processing: cex_processing_method
23
+
24
+ # @rbs @initial_proc: Call | nil
25
+ # @rbs @proc_to_learner: Hash[Call, Learner[In | Call, bool]]
26
+ # @rbs @manager: ATRManager[In, Call, Return]
27
+ # @rbs @active_call_alphabet_set: Set[Call]
28
+
29
+ #: (
30
+ # Array[In] alphabet,
31
+ # Array[Call] call_alphabet,
32
+ # Return return_input,
33
+ # System::SUL[In | Call | Return, bool] sul,
34
+ # ?algorithm: :lstar | :kearns_vazirani | :lsharp,
35
+ # ?algorithm_params: Hash[Symbol, untyped],
36
+ # ?cex_processing: cex_processing_method,
37
+ # ?scan_procs: bool
38
+ # ) -> void
39
+ def initialize(
40
+ alphabet,
41
+ call_alphabet,
42
+ return_input,
43
+ sul,
44
+ algorithm: :kearns_vazirani,
45
+ algorithm_params: {},
46
+ cex_processing: :binary,
47
+ scan_procs: true
48
+ )
49
+ super()
50
+
51
+ @alphabet = alphabet.dup
52
+ @call_alphabet = call_alphabet.dup
53
+ @return_input = return_input
54
+ @sul = sul
55
+ @algorithm = algorithm
56
+ @algorithm_params = algorithm_params
57
+ @cex_processing = cex_processing
58
+
59
+ @initial_proc = nil
60
+ @proc_to_learner = {}
61
+ @manager = ATRManager.new(alphabet, call_alphabet, return_input, scan_procs:)
62
+ @active_call_alphabet_set = Set.new
63
+ end
64
+
65
+ #: () -> [Automaton::SPA[In, Call, Return], Hash[Call, Hash[Integer, Array[In | Call]]]]
66
+ def build_hypothesis
67
+ initial_proc = @initial_proc
68
+ return build_first_hypothesis, {} unless initial_proc
69
+
70
+ proc_to_dfa = {}
71
+ proc_to_state_to_prefix = {}
72
+ @proc_to_learner.each do |proc, learner|
73
+ dfa, state_to_prefix = learner.build_hypothesis
74
+ proc_to_dfa[proc] = dfa
75
+ proc_to_state_to_prefix[proc] = state_to_prefix
76
+ end
77
+
78
+ hypothesis = Automaton::SPA.new(initial_proc, @return_input, proc_to_dfa)
79
+ [hypothesis, proc_to_state_to_prefix]
80
+ end
81
+
82
+ #: (
83
+ # Array[In | Call | Return] cex,
84
+ # Automaton::SPA[In, Call, Return] _hypothesis,
85
+ # Hash[Call, Hash[Integer, Array[In | Call]]] _proc_to_state_to_prefix
86
+ # ) -> void
87
+ def refine_hypothesis(cex, _hypothesis, _proc_to_state_to_prefix)
88
+ extract_useful_information_from_cex(cex)
89
+
90
+ loop { break unless refine_hypothesis_internal(cex) }
91
+ end
92
+
93
+ private
94
+
95
+ #: () -> Automaton::SPA[In, Call, Return]
96
+ def build_first_hypothesis # steep:ignore
97
+ Automaton::SPA.new(nil, @return_input, {})
98
+ end
99
+
100
+ #: (Array[In | Call | Return] cex) -> void
101
+ def extract_useful_information_from_cex(cex)
102
+ return unless @sul.query_last(cex)
103
+
104
+ @initial_proc = cex[0]
105
+
106
+ new_procs = @manager.scan_positive_cex(cex)
107
+ return if new_procs.empty?
108
+
109
+ new_procs.each do |new_proc|
110
+ proc_sul = ProceduralSUL.new(new_proc, @sul, @manager)
111
+ new_learner =
112
+ case @algorithm
113
+ in :lstar
114
+ LStar::LStarLearner.new(@alphabet, proc_sul, automaton_type: :dfa, **@algorithm_params)
115
+ in :kearns_vazirani
116
+ KearnsVazirani::KearnsVaziraniLearner.new(
117
+ @alphabet,
118
+ proc_sul,
119
+ automaton_type: :dfa,
120
+ **@algorithm_params
121
+ )
122
+ in :lsharp
123
+ LSharp::LSharpLearner.new(@alphabet, proc_sul, automaton_type: :dfa, **@algorithm_params)
124
+ end
125
+
126
+ @proc_to_learner.each_key { |proc| new_learner.add_alphabet(proc) }
127
+ @proc_to_learner[new_proc] = new_learner
128
+ @proc_to_learner.each_value { |learner| learner.add_alphabet(new_proc) }
129
+ @active_call_alphabet_set << new_proc
130
+ end
131
+
132
+ hypothesis, proc_to_state_to_prefix = build_hypothesis
133
+ @manager.scan_procs(hypothesis.proc_to_dfa, proc_to_state_to_prefix)
134
+ end
135
+
136
+ #: (Array[In | Call | Return] cex) -> bool
137
+ def refine_hypothesis_internal(cex)
138
+ sul_out = @sul.query_last(cex)
139
+
140
+ hypothesis = build_hypothesis[0]
141
+ return false if hypothesis.run(cex)[0].last == sul_out
142
+
143
+ update_atr_and_check_ts_conformance
144
+ hypothesis, proc_to_state_to_prefix = build_hypothesis
145
+ return false if hypothesis.run(cex)[0].last == sul_out
146
+
147
+ return_indices = (0...cex.size).filter { |index| cex[index] == @return_input } # steep:ignore
148
+ global_query =
149
+ if sul_out
150
+ ->(word) { hypothesis.run(word)[0].last }
151
+ else
152
+ ->(word) { @sul.query_last(word) }
153
+ end
154
+ global_acex = ReturnIndicesAcex.new(cex, return_indices, global_query, @manager) # steep:ignore
155
+ idx = CexProcessor.process(global_acex, cex_processing: @cex_processing)
156
+
157
+ return_index = return_indices[idx]
158
+ call_index = @manager.find_call_index(cex, return_index)
159
+ proc = cex[call_index]
160
+
161
+ local_cex = @manager.project(cex[call_index + 1...return_index]) # steep:ignore
162
+ dfa = hypothesis.proc_to_dfa[proc] # steep:ignore
163
+ state_to_prefix = proc_to_state_to_prefix[proc] # steep:ignore
164
+ @proc_to_learner[proc].refine_hypothesis(local_cex, dfa, state_to_prefix) # steep:ignore
165
+
166
+ true
167
+ end
168
+
169
+ #: () -> bool
170
+ def update_atr_and_check_ts_conformance
171
+ updated = false
172
+
173
+ hypothesis, proc_to_state_to_prefix = build_hypothesis
174
+ while check_and_ensure_ts_conformance(hypothesis, proc_to_state_to_prefix)
175
+ updated = true
176
+ hypothesis, proc_to_state_to_prefix = build_hypothesis
177
+ @manager.scan_procs(hypothesis.proc_to_dfa, proc_to_state_to_prefix)
178
+ end
179
+
180
+ updated
181
+ end
182
+
183
+ def check_and_ensure_ts_conformance(hypothesis, proc_to_state_to_prefix)
184
+ updated = false
185
+
186
+ hypothesis.proc_to_dfa.each_key do |proc|
187
+ ts = []
188
+ ts << proc
189
+ ts.concat(@manager.proc_to_terminating_sequence[proc])
190
+ ts << @return_input
191
+ if check_and_ensure_single_ts_conformance(ts, hypothesis, proc_to_state_to_prefix)
192
+ updated = true
193
+ break
194
+ end
195
+ end
196
+
197
+ updated
198
+ end
199
+
200
+ def check_and_ensure_single_ts_conformance(ts, hypothesis, proc_to_state_to_prefix) # rubocop:disable Naming/MethodParameterName
201
+ updated = false
202
+
203
+ ts.each_with_index do |input, index|
204
+ next unless @active_call_alphabet_set.include?(input)
205
+
206
+ return_index = @manager.find_return_index(ts, index + 1)
207
+ local_word = @manager.project(ts[index + 1...return_index])
208
+
209
+ dfa = hypothesis.proc_to_dfa[input]
210
+ next if dfa.output(dfa.run(local_word)[1])
211
+
212
+ state_to_prefix = proc_to_state_to_prefix[input]
213
+ @proc_to_learner[input].refine_hypothesis(local_word, dfa, state_to_prefix)
214
+
215
+ updated = true
216
+ end
217
+
218
+ updated
219
+ end
220
+ end
221
+ end
222
+ end
223
+ end