RubyGems - lernen - Versions diffs - 0.1.0 → 0.3.0 - Mend

lernen 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (142) hide show

checksums.yaml +4 -4
data/.rubocop.yml +18 -0
data/README.md +531 -28
data/Rakefile +29 -7
data/Steepfile +14 -0
data/examples/ripper_prism.rb +63 -0
data/examples/uri_parse_regexp.rb +73 -0
data/lib/lernen/algorithm/cex_processor/acex.rb +43 -0
data/lib/lernen/algorithm/cex_processor/prefix_transformer_acex.rb +43 -0
data/lib/lernen/algorithm/cex_processor.rb +115 -0
data/lib/lernen/algorithm/kearns_vazirani/discrimination_tree.rb +207 -0
data/lib/lernen/algorithm/kearns_vazirani/kearns_vazirani_learner.rb +100 -0
data/lib/lernen/algorithm/kearns_vazirani.rb +44 -0
data/lib/lernen/algorithm/kearns_vazirani_vpa/discrimination_tree_vpa.rb +246 -0
data/lib/lernen/algorithm/kearns_vazirani_vpa/kearns_vazirani_vpa_learner.rb +89 -0
data/lib/lernen/algorithm/kearns_vazirani_vpa.rb +35 -0
data/lib/lernen/algorithm/learner.rb +82 -0
data/lib/lernen/algorithm/lsharp/lsharp_learner.rb +367 -0
data/lib/lernen/algorithm/lsharp/observation_tree.rb +115 -0
data/lib/lernen/algorithm/lsharp.rb +43 -0
data/lib/lernen/algorithm/lstar/lstar_learner.rb +49 -0
data/lib/lernen/algorithm/lstar/observation_table.rb +214 -0
data/lib/lernen/algorithm/lstar.rb +49 -0
data/lib/lernen/algorithm/procedural/atr_manager.rb +200 -0
data/lib/lernen/algorithm/procedural/procedural_learner.rb +223 -0
data/lib/lernen/algorithm/procedural/procedural_sul.rb +47 -0
data/lib/lernen/algorithm/procedural/return_indices_acex.rb +58 -0
data/lib/lernen/algorithm/procedural.rb +57 -0
data/lib/lernen/algorithm.rb +19 -0
data/lib/lernen/automaton/dfa.rb +204 -0
data/lib/lernen/automaton/mealy.rb +108 -0
data/lib/lernen/automaton/moore.rb +122 -0
data/lib/lernen/automaton/moore_like.rb +83 -0
data/lib/lernen/automaton/proc_util.rb +93 -0
data/lib/lernen/automaton/spa.rb +368 -0
data/lib/lernen/automaton/transition_system.rb +209 -0
data/lib/lernen/automaton/vpa.rb +300 -0
data/lib/lernen/automaton.rb +19 -92
data/lib/lernen/equiv/combined_oracle.rb +57 -0
data/lib/lernen/equiv/exhaustive_search_oracle.rb +60 -0
data/lib/lernen/equiv/moore_like_simulator_oracle.rb +36 -0
data/lib/lernen/equiv/oracle.rb +109 -0
data/lib/lernen/equiv/random_walk_oracle.rb +69 -0
data/lib/lernen/equiv/random_well_matched_word_oracle.rb +139 -0
data/lib/lernen/equiv/random_word_oracle.rb +71 -0
data/lib/lernen/equiv/spa_simulator_oracle.rb +39 -0
data/lib/lernen/equiv/test_words_oracle.rb +42 -0
data/lib/lernen/equiv/transition_system_simulator_oracle.rb +36 -0
data/lib/lernen/equiv/vpa_simulator_oracle.rb +48 -0
data/lib/lernen/equiv.rb +25 -0
data/lib/lernen/graph.rb +215 -0
data/lib/lernen/system/block_sul.rb +41 -0
data/lib/lernen/system/moore_like_simulator.rb +45 -0
data/lib/lernen/system/moore_like_sul.rb +33 -0
data/lib/lernen/system/sul.rb +126 -0
data/lib/lernen/system/transition_system_simulator.rb +40 -0
data/lib/lernen/system.rb +72 -0
data/lib/lernen/version.rb +2 -1
data/lib/lernen.rb +322 -13
data/rbs_collection.lock.yaml +16 -0
data/rbs_collection.yaml +14 -0
data/renovate.json +6 -0
data/sig/generated/lernen/algorithm/cex_processor/acex.rbs +30 -0
data/sig/generated/lernen/algorithm/cex_processor/prefix_transformer_acex.rbs +27 -0
data/sig/generated/lernen/algorithm/cex_processor.rbs +59 -0
data/sig/generated/lernen/algorithm/kearns_vazirani/discrimination_tree.rbs +68 -0
data/sig/generated/lernen/algorithm/kearns_vazirani/kearns_vazirani_learner.rbs +51 -0
data/sig/generated/lernen/algorithm/kearns_vazirani.rbs +32 -0
data/sig/generated/lernen/algorithm/kearns_vazirani_vpa/discrimination_tree_vpa.rbs +73 -0
data/sig/generated/lernen/algorithm/kearns_vazirani_vpa/kearns_vazirani_vpa_learner.rbs +51 -0
data/sig/generated/lernen/algorithm/kearns_vazirani_vpa.rbs +20 -0
data/sig/generated/lernen/algorithm/learner.rbs +53 -0
data/sig/generated/lernen/algorithm/lsharp/lsharp_learner.rbs +103 -0
data/sig/generated/lernen/algorithm/lsharp/observation_tree.rbs +53 -0
data/sig/generated/lernen/algorithm/lsharp.rbs +38 -0
data/sig/generated/lernen/algorithm/lstar/lstar_learner.rbs +38 -0
data/sig/generated/lernen/algorithm/lstar/observation_table.rbs +79 -0
data/sig/generated/lernen/algorithm/lstar.rbs +37 -0
data/sig/generated/lernen/algorithm/procedural/atr_manager.rbs +80 -0
data/sig/generated/lernen/algorithm/procedural/procedural_learner.rbs +79 -0
data/sig/generated/lernen/algorithm/procedural/procedural_sul.rbs +36 -0
data/sig/generated/lernen/algorithm/procedural/return_indices_acex.rbs +33 -0
data/sig/generated/lernen/algorithm/procedural.rbs +27 -0
data/sig/generated/lernen/algorithm.rbs +10 -0
data/sig/generated/lernen/automaton/dfa.rbs +93 -0
data/sig/generated/lernen/automaton/mealy.rbs +61 -0
data/sig/generated/lernen/automaton/moore.rbs +69 -0
data/sig/generated/lernen/automaton/moore_like.rbs +63 -0
data/sig/generated/lernen/automaton/proc_util.rbs +38 -0
data/sig/generated/lernen/automaton/spa.rbs +125 -0
data/sig/generated/lernen/automaton/transition_system.rbs +108 -0
data/sig/generated/lernen/automaton/vpa.rbs +109 -0
data/sig/generated/lernen/automaton.rbs +15 -0
data/sig/generated/lernen/equiv/combined_oracle.rbs +27 -0
data/sig/generated/lernen/equiv/exhaustive_search_oracle.rbs +38 -0
data/sig/generated/lernen/equiv/moore_like_simulator_oracle.rbs +27 -0
data/sig/generated/lernen/equiv/oracle.rbs +75 -0
data/sig/generated/lernen/equiv/random_walk_oracle.rbs +41 -0
data/sig/generated/lernen/equiv/random_well_matched_word_oracle.rbs +70 -0
data/sig/generated/lernen/equiv/random_word_oracle.rbs +45 -0
data/sig/generated/lernen/equiv/spa_simulator_oracle.rbs +30 -0
data/sig/generated/lernen/equiv/test_words_oracle.rbs +20 -0
data/sig/generated/lernen/equiv/transition_system_simulator_oracle.rbs +27 -0
data/sig/generated/lernen/equiv/vpa_simulator_oracle.rbs +33 -0
data/sig/generated/lernen/equiv.rbs +11 -0
data/sig/generated/lernen/graph.rbs +80 -0
data/sig/generated/lernen/system/block_sul.rbs +29 -0
data/sig/generated/lernen/system/moore_like_simulator.rbs +31 -0
data/sig/generated/lernen/system/moore_like_sul.rbs +28 -0
data/sig/generated/lernen/system/sul.rbs +87 -0
data/sig/generated/lernen/system/transition_system_simulator.rbs +28 -0
data/sig/generated/lernen/system.rbs +62 -0
data/sig/generated/lernen/version.rbs +6 -0
data/sig/generated/lernen.rbs +214 -0
data/sig-test/generated/test/example_test.rbs +14 -0
data/sig-test/generated/test/lernen/algorithm/kearns_vazirani_test.rbs +16 -0
data/sig-test/generated/test/lernen/algorithm/kearns_vazirani_vpa_test.rbs +10 -0
data/sig-test/generated/test/lernen/algorithm/lsharp_test.rbs +16 -0
data/sig-test/generated/test/lernen/algorithm/lstar_test.rbs +16 -0
data/sig-test/generated/test/lernen/algorithm/procedural_test.rbs +10 -0
data/sig-test/generated/test/lernen/automaton/dfa_test.rbs +19 -0
data/sig-test/generated/test/lernen/automaton/mealy_test.rbs +19 -0
data/sig-test/generated/test/lernen/automaton/moore_test.rbs +19 -0
data/sig-test/generated/test/lernen/automaton/proc_util_test.rbs +19 -0
data/sig-test/generated/test/lernen/automaton/spa_test.rbs +19 -0
data/sig-test/generated/test/lernen/automaton/vpa_test.rbs +19 -0
data/sig-test/generated/test/lernen/equiv/exhaustive_search_oracle_test.rbs +10 -0
data/sig-test/generated/test/lernen/equiv/random_walk_oracle_test.rbs +10 -0
data/sig-test/generated/test/lernen/equiv/random_word_oracle_test.rbs +10 -0
data/sig-test/generated/test/lernen/system/block_sul_test.rbs +16 -0
data/sig-test/generated/test/lernen/system/moore_like_simulator_test.rbs +16 -0
data/sig-test/generated/test/lernen/system/transition_system_simulator_test.rbs +13 -0
data/sig-test/generated/test/lernen/system_test.rbs +11 -0
data/sig-test/generated/test/lernen_test.rbs +13 -0
metadata +131 -11
data/.yardopts +0 -3
data/lib/lernen/cex_processor.rb +0 -61
data/lib/lernen/kearns_vazirani.rb +0 -199
data/lib/lernen/lsharp.rb +0 -335
data/lib/lernen/lstar.rb +0 -169
data/lib/lernen/oracle.rb +0 -116
data/lib/lernen/sul.rb +0 -134

data/lib/lernen/algorithm/lstar/observation_table.rb ADDED Viewed

@@ -0,0 +1,214 @@
+# frozen_string_literal: true
+# rbs_inline: enabled
+module Lernen
+  module Algorithm
+    module LStar
+      # ObservationTable is an implementation of observation tabel data structure.
+      #
+      # This data structure is used for Angluin's L* algorithm.
+      #
+      # @rbs generic In  -- Type for input alphabet
+      # @rbs generic Out -- Type for output values
+      class ObservationTable
+        # @rbs @alphabet: Array[In]
+        # @rbs @sul: System::SUL[In, Out]
+        # @rbs @automaton_type: Automaton::transition_system_type
+        # @rbs @cex_processing: cex_processing_method | nil
+        # @rbs @prefixes: Array[Array[In]]
+        # @rbs @suffixes: Array[Array[In]]
+        # @rbs @table: Hash[Array[In], Array[Out]]
+        #: (
+        #    Array[In] alphabet,
+        #    System::SUL[In, Out] sul,
+        #    automaton_type: :dfa | :moore | :mealy,
+        #    cex_processing: cex_processing_method | nil
+        #  ) -> void
+        def initialize(alphabet, sul, automaton_type:, cex_processing:)
+          @alphabet = alphabet
+          @sul = sul
+          @automaton_type = automaton_type
+          @cex_processing = cex_processing
+          @prefixes = [[]]
+          @suffixes = []
+          @table = {}
+          case @automaton_type
+          in :dfa | :moore
+            @suffixes << []
+          in :mealy
+            @alphabet.each { |a| @suffixes << [a] }
+          end
+        end
+        # Constructs a hypothesis automaton from this observation table.
+        #
+        #: () -> [Automaton::TransitionSystem[Integer, In, Out], Hash[Integer, Array[In]]]
+        def build_hypothesis
+          make_consistent_and_closed
+          state_to_prefix = @prefixes.each_with_index.to_h { |prefix, state| [state, prefix] }
+          row_to_state = @prefixes.each_with_index.to_h { |prefix, state| [@table[prefix], state] }
+          transition_function = {}
+          @prefixes.each_with_index do |prefix, state|
+            @alphabet.each_with_index do |input, index|
+              case @automaton_type
+              in :moore | :dfa
+                transition_function[[state, input]] = row_to_state[@table[prefix + [input]]]
+              in :mealy
+                transition_function[[state, input]] = [@table[prefix][index], row_to_state[@table[prefix + [input]]]]
+              end
+            end
+          end
+          automaton =
+            case @automaton_type
+            in :dfa
+              accept_state_set =
+                state_to_prefix.to_a.filter { |(_, prefix)| @table[prefix][0] }.to_set { |(state, _)| state }
+              Automaton::DFA.new(0, accept_state_set, transition_function)
+            in :moore
+              outputs = state_to_prefix.transform_values { |prefix| @table[prefix][0] }
+              Automaton::Moore.new(0, outputs, transition_function)
+            in :mealy
+              Automaton::Mealy.new(0, transition_function)
+            end
+          [automaton, state_to_prefix]
+        end
+        # Updates this observation table by the given `cex`.
+        #
+        #: (
+        #    Array[In] cex,
+        #    Automaton::TransitionSystem[Integer, In, Out] hypothesis,
+        #    Hash[Integer, Array[In]] state_to_prefix
+        #  ) -> void
+        def refine_hypothesis(cex, hypothesis, state_to_prefix)
+          cex_processing = @cex_processing
+          if cex_processing
+            state_to_prefix_lambda = ->(state) { state_to_prefix[state] }
+            acex = CexProcessor::PrefixTransformerAcex.new(cex, @sul, hypothesis, state_to_prefix_lambda)
+            n = CexProcessor.process(acex, cex_processing:)
+            old_prefix = cex[0...n]
+            new_input = cex[n]
+            new_suffix = cex[n + 1...]
+            _, old_state = hypothesis.run(old_prefix) # steep:ignore
+            new_prefix = state_to_prefix[old_state] + [new_input]
+            @prefixes << new_prefix unless @prefixes.include?(new_prefix)
+            @suffixes << new_suffix unless @suffixes.include?(new_suffix) # steep:ignore
+          else
+            cex_prefixes = (0..cex.size).map { |n| cex[0...n] }
+            cex_prefixes.each do |prefix|
+              @prefixes << prefix unless @prefixes.include?(prefix) # steep:ignore
+            end
+          end
+        end
+        private
+        # Finds new prefixes to close.
+        #
+        #: () -> (Array[Array[In]] | nil)
+        def find_prefixes_to_close
+          prefixes_to_close = []
+          unclosed_row_set = Set.new
+          prefix_row_set = @prefixes.to_set { |prefix| @table[prefix] }
+          @prefixes.each do |prefix|
+            @alphabet.each do |input|
+              new_prefix = prefix + [input]
+              row = @table[new_prefix]
+              unless prefix_row_set.include?(row) || unclosed_row_set.include?(row)
+                prefixes_to_close << new_prefix
+                unclosed_row_set << row
+              end
+            end
+          end
+          return if prefixes_to_close.empty?
+          prefixes_to_close.sort_by!(&:size).reverse!
+        end
+        # Checks consistency and returns a new suffix to add if this observation table
+        # is inconsistent.
+        #
+        #: () -> (Array[In] | nil)
+        def check_consistency
+          @prefixes.combination(2) do |(prefix1, prefix2)|
+            next unless @table[prefix1] == @table[prefix2] # steep:ignore
+            @alphabet.each do |input|
+              new_prefix1 = prefix1 + [input] # steep:ignore
+              new_prefix2 = prefix2 + [input] # steep:ignore
+              next if @table[new_prefix1] == @table[new_prefix2]
+              @suffixes.each_with_index do |suffix, index|
+                next if @table[new_prefix1][index] == @table[new_prefix2][index] # steep:ignore
+                return [input] + suffix
+              end
+            end
+          end
+          nil
+        end
+        # Updates rows of this observation table.
+        #
+        #: () -> void
+        def update_table
+          @prefixes.each do |prefix|
+            update_table_row(prefix)
+            @alphabet.each { |input| update_table_row(prefix + [input]) }
+          end
+        end
+        # Updates the row for the given `prefix` of this observation table.
+        #
+        #: (Array[In] prefix) -> void
+        def update_table_row(prefix)
+          @table[prefix] ||= []
+          return if @table[prefix].size == @suffixes.size
+          @suffixes[@table[prefix].size..].each do |suffix| # steep:ignore
+            word = prefix + suffix
+            output = word.empty? && (sul = @sul).is_a?(System::MooreLikeSUL) ? sul.query_empty : @sul.query_last(word)
+            @table[prefix] << output
+          end
+        end
+        # Update this table to be consistent and closed.
+        #
+        #: () -> void
+        def make_consistent_and_closed
+          update_table
+          if @cex_processing.nil?
+            new_suffix = check_consistency
+            until new_suffix.nil?
+              @suffixes << new_suffix
+              update_table
+              new_suffix = check_consistency
+            end
+          end
+          new_prefixes = find_prefixes_to_close
+          until new_prefixes.nil?
+            @prefixes.push(*new_prefixes)
+            update_table
+            new_prefixes = find_prefixes_to_close
+          end
+        end
+      end
+    end
+  end
+end

data/lib/lernen/algorithm/lstar.rb ADDED Viewed

@@ -0,0 +1,49 @@
+# frozen_string_literal: true
+# rbs_inline: enabled
+require "lernen/algorithm/lstar/observation_table"
+require "lernen/algorithm/lstar/lstar_learner"
+module Lernen
+  module Algorithm
+    # LStar provides an implementation of Angluin's L* algorithm.
+    #
+    # Angluin's L* is introduced by [Angluin (1987) "Learning Regular Sets from
+    # Queries and Counterexamples"](https://dl.acm.org/doi/10.1016/0890-5401%2887%2990052-6).
+    module LStar
+      # Runs Angluin's L* algorithm and returns an inferred automaton.
+      #
+      # `cex_processing` is used for determining a method of counterexample processing.
+      # In additional to predefined `cex_processing_method`, we can specify `nil` as `cex_processing`.
+      # When `cex_processing: nil` is specified, it uses the original counterexample processing
+      # described in the Angluin paper.
+      #
+      #: [In] (
+      #    Array[In] alphabet, System::SUL[In, bool] sul, Equiv::Oracle[In, bool] oracle,
+      #    automaton_type: :dfa,
+      #    ?cex_processing: cex_processing_method | nil, ?max_learning_rounds: Integer | nil
+      #  ) -> Automaton::DFA[In]
+      #: [In, Out] (
+      #    Array[In] alphabet, System::SUL[In, Out] sul, Equiv::Oracle[In, Out] oracle,
+      #    automaton_type: :mealy,
+      #    ?cex_processing: cex_processing_method | nil, ?max_learning_rounds: Integer | nil
+      #  ) -> Automaton::Mealy[In, Out]
+      #: [In, Out] (
+      #    Array[In] alphabet, System::SUL[In, Out] sul, Equiv::Oracle[In, Out] oracle,
+      #    automaton_type: :moore,
+      #    ?cex_processing: cex_processing_method | nil, ?max_learning_rounds: Integer | nil
+      #  ) -> Automaton::Moore[In, Out]
+      def self.learn( # steep:ignore
+        alphabet,
+        sul,
+        oracle,
+        automaton_type:,
+        cex_processing: :binary,
+        max_learning_rounds: nil
+      )
+        learner = LStarLearner.new(alphabet, sul, automaton_type:, cex_processing:)
+        learner.learn(oracle, max_learning_rounds:)
+      end
+    end
+  end
+end

data/lib/lernen/algorithm/procedural/atr_manager.rb ADDED Viewed

@@ -0,0 +1,200 @@
+# frozen_string_literal: true
+# rbs_inline: enabled
+module Lernen
+  module Algorithm
+    module Procedural
+      # ATRManager is a collection to manage access, terminating, and return sequences.
+      #
+      # @rbs generic In     -- Type for input alphabet
+      # @rbs generic Call   -- Type for call alphabet
+      # @rbs generic Return -- Type for return alphabet
+      class ATRManager
+        # @rbs @alphabet: Array[In]
+        # @rbs @call_alphabet_set: Set[Call]
+        # @rbs @return_input: Return
+        # @rbs @scan_procs: bool
+        # @rbs @proc_to_access_sequence: Hash[Call, Array[In | Call | Return]]
+        # @rbs @proc_to_terminating_sequence: Hash[Call, Array[In | Call | Return]]
+        # @rbs @proc_to_return_sequence: Hash[Call, Array[In | Call | Return]]
+        #: (
+        #    Array[In] alphabet,
+        #    Array[Call] call_alphabet,
+        #    Return return_input,
+        #    ?scan_procs: bool
+        #  ) -> void
+        def initialize(alphabet, call_alphabet, return_input, scan_procs: true)
+          @alphabet = alphabet
+          @call_alphabet_set = call_alphabet.to_set
+          @return_input = return_input
+          @scan_procs = scan_procs
+          @proc_to_access_sequence = {}
+          @proc_to_terminating_sequence = {}
+          @proc_to_return_sequence = {}
+        end
+        attr_reader :proc_to_access_sequence #: Hash[Call, Array[In | Call | Return]]
+        attr_reader :proc_to_terminating_sequence #: Hash[Call, Array[In | Call | Return]]
+        attr_reader :proc_to_return_sequence #: Hash[Call, Array[In | Call | Return]]
+        #: (Array[In | Call | Return] cex) -> Array[Call]
+        def scan_positive_cex(cex)
+          new_procs = extract_potential_terminating_sequences(cex)
+          extract_potential_access_and_return_sequences(cex)
+          new_procs
+        end
+        #: (
+        #    Hash[Call, Automaton::DFA[In | Call]] procs,
+        #    Hash[Call, Hash[Integer, Array[In | Call]]] proc_to_state_to_prefix
+        #  ) -> void
+        def scan_procs(proc_to_dfa, proc_to_state_to_prefix)
+          return unless @scan_procs
+          updated = false
+          stable = false
+          until stable
+            stable = true
+            proc_to_dfa.each do |proc, dfa|
+              current_terminating_sequence = @proc_to_terminating_sequence[proc]
+              state_to_prefix = proc_to_state_to_prefix[proc]
+              hypothesis_terminating_sequence =
+                dfa.accept_state_set.to_a.map { |accept_state| expand(state_to_prefix[accept_state]) }.min_by(&:size)
+              next unless hypothesis_terminating_sequence
+              next if current_terminating_sequence.size <= hypothesis_terminating_sequence.size
+              updated = true
+              stable = false
+              @proc_to_terminating_sequence[proc] = hypothesis_terminating_sequence
+            end
+          end
+          return unless updated
+          optimize_sequences(@proc_to_terminating_sequence)
+          optimize_sequences(@proc_to_access_sequence)
+          optimize_sequences(@proc_to_return_sequence)
+        end
+        #: (Call proc, Array[In | Call] word) -> Array[In | Call | Return]
+        def embed(proc, word)
+          access_sequence = @proc_to_access_sequence[proc]
+          expanded_word = expand(word)
+          return_sequence = @proc_to_return_sequence[proc]
+          [*access_sequence, *expanded_word, *return_sequence]
+        end
+        #: [In, Call, Return] (Array[In | Call] word) -> Array[In | Call | Return]
+        def expand(word)
+          Automaton::ProcUtil.expand(@return_input, word, @proc_to_terminating_sequence)
+        end
+        #: [In, Call, Return] (Array[In | Call] word) -> Array[In | Call | Return]
+        def project(word)
+          Automaton::ProcUtil.project(@call_alphabet_set, @return_input, word)
+        end
+        #: (Array[In | Call | Return] word, Integer index) -> Integer
+        def find_call_index(word, index) # steep:ignore
+          Automaton::ProcUtil.find_call_index(@call_alphabet_set, @return_input, word, index)
+        end
+        #: (Array[In | Call | Return] word, Integer index) -> Integer
+        def find_return_index(word, index) # steep:ignore
+          Automaton::ProcUtil.find_return_index(@call_alphabet_set, @return_input, word, index)
+        end
+        private
+        #: (Array[In | Call | Return] cex) -> Array[Call]
+        def extract_potential_terminating_sequences(cex)
+          new_procs = []
+          cex.each_with_index do |input, index|
+            next unless @call_alphabet_set.include?(input) # steep:ignore
+            return_index = find_return_index(cex, index + 1)
+            potential_terminating_sequence = cex[index + 1...return_index]
+            current_terminating_sequence = @proc_to_terminating_sequence[input] # steep:ignore
+            if current_terminating_sequence.nil?
+              new_procs << input
+              @proc_to_terminating_sequence[input] = potential_terminating_sequence # steep:ignore
+            elsif potential_terminating_sequence.size < current_terminating_sequence.size # steep:ignore
+              @proc_to_terminating_sequence[input] = potential_terminating_sequence # steep:ignore
+            end
+          end
+          new_procs
+        end
+        #: (Array[In | Call | Return] cex) -> void
+        def extract_potential_access_and_return_sequences(cex)
+          access_sequence = []
+          return_sequence = minify_well_matched(cex)
+          cex.each_with_index do |input, index|
+            access_sequence << input
+            if @call_alphabet_set.include?(input) # steep:ignore
+              return_index = find_return_index(return_sequence, 1)
+              potential_return_sequence = return_sequence[return_index...]
+              current_access_sequence = @proc_to_access_sequence[input] # steep:ignore
+              current_return_sequence = @proc_to_return_sequence[input] # steep:ignore
+              if current_access_sequence.nil? || current_return_sequence.nil? ||
+                   (access_sequence.size + potential_return_sequence.size) < # steep:ignore
+                     (current_access_sequence.size + current_return_sequence.size) # steep:ignore
+                @proc_to_access_sequence[input] = access_sequence.dup # steep:ignore
+                @proc_to_return_sequence[input] = potential_return_sequence # steep:ignore
+              end
+            elsif input == @return_input # steep:ignore
+              call_index = find_call_index(access_sequence, access_sequence.size - 1)
+              proc = access_sequence[call_index]
+              access_sequence.slice!(call_index + 1...access_sequence.size - 1)
+              access_sequence.unshift(*@proc_to_terminating_sequence[proc])
+            end
+            return_sequence.shift
+            next unless @call_alphabet_set.include?(input) # steep:ignore
+            rs_return_index = find_return_index(return_sequence, 0)
+            cex_return_index = find_return_index(cex, index + 1)
+            return_sequence.slice!(0...rs_return_index)
+            return_sequence.unshift(*minify_well_matched(cex[index + 1...cex_return_index])) # steep:ignore
+          end
+        end
+        #: (Array[In | Call | Return] word) -> Array[In | Call | Return]
+        def minify_well_matched(word)
+          minified_word = []
+          index = 0
+          while index < word.size
+            input = word[index]
+            minified_word << input
+            if @call_alphabet_set.include?(input) # steep:ignore
+              return_index = find_return_index(word, index + 1)
+              if return_index
+                minified_word.concat(@proc_to_terminating_sequence[input]) # steep:ignore
+                minified_word << @return_input
+                index = return_index
+              end
+            end
+            index += 1
+          end
+          minified_word
+        end
+        #: (Hash[Call, Array[In | Call | Return]]) -> void
+        def optimize_sequences(proc_to_sequence)
+          proc_to_sequence.each do |proc, sequence|
+            minified_sequence = minify_well_matched(sequence)
+            proc_to_sequence[proc] = minified_sequence if minified_sequence.size < sequence.size
+          end
+        end
+      end
+    end
+  end
+end

data/lib/lernen/algorithm/procedural/procedural_learner.rb ADDED Viewed

@@ -0,0 +1,223 @@
+# frozen_string_literal: true
+# rbs_inline: enabled
+module Lernen
+  module Algorithm
+    module Procedural
+      # ProceduralLearner is an implementation of the learning algorithm for SPA.
+      #
+      # This algorithm is described in [Frohme & Seffen (2021) "Compositional
+      # Learning of Mutually Recursive Procedural Systems"](https://link.springer.com/article/10.1007/s10009-021-00634-y).
+      #
+      # @rbs generic In     -- Type for input alphabet
+      # @rbs generic Call   -- Type for call alphabet
+      # @rbs generic Return -- Type for return alphabet
+      class ProceduralLearner < Learner #[In | Call | Return, bool]
+        # @rbs @alphabet: Array[In]
+        # @rbs @call_alphabet: Array[Call]
+        # @rbs @return_input: Return
+        # @rbs @sul: System::SUL[In | Call | Return, bool]
+        # @rbs @algorithm: :lstar | :kearns_vazirani | :lsharp
+        # @rbs @algorithm_params: Hash[Symbol, untyped]
+        # @rbs @cex_processing: cex_processing_method
+        # @rbs @initial_proc: Call | nil
+        # @rbs @proc_to_learner: Hash[Call, Learner[In | Call, bool]]
+        # @rbs @manager: ATRManager[In, Call, Return]
+        # @rbs @active_call_alphabet_set: Set[Call]
+        #: (
+        #    Array[In] alphabet,
+        #    Array[Call] call_alphabet,
+        #    Return return_input,
+        #    System::SUL[In | Call | Return, bool] sul,
+        #    ?algorithm: :lstar | :kearns_vazirani | :lsharp,
+        #    ?algorithm_params: Hash[Symbol, untyped],
+        #    ?cex_processing: cex_processing_method,
+        #    ?scan_procs: bool
+        #  ) -> void
+        def initialize(
+          alphabet,
+          call_alphabet,
+          return_input,
+          sul,
+          algorithm: :kearns_vazirani,
+          algorithm_params: {},
+          cex_processing: :binary,
+          scan_procs: true
+        )
+          super()
+          @alphabet = alphabet.dup
+          @call_alphabet = call_alphabet.dup
+          @return_input = return_input
+          @sul = sul
+          @algorithm = algorithm
+          @algorithm_params = algorithm_params
+          @cex_processing = cex_processing
+          @initial_proc = nil
+          @proc_to_learner = {}
+          @manager = ATRManager.new(alphabet, call_alphabet, return_input, scan_procs:)
+          @active_call_alphabet_set = Set.new
+        end
+        #: () -> [Automaton::SPA[In, Call, Return], Hash[Call, Hash[Integer, Array[In | Call]]]]
+        def build_hypothesis
+          initial_proc = @initial_proc
+          return build_first_hypothesis, {} unless initial_proc
+          proc_to_dfa = {}
+          proc_to_state_to_prefix = {}
+          @proc_to_learner.each do |proc, learner|
+            dfa, state_to_prefix = learner.build_hypothesis
+            proc_to_dfa[proc] = dfa
+            proc_to_state_to_prefix[proc] = state_to_prefix
+          end
+          hypothesis = Automaton::SPA.new(initial_proc, @return_input, proc_to_dfa)
+          [hypothesis, proc_to_state_to_prefix]
+        end
+        #: (
+        #    Array[In | Call | Return] cex,
+        #    Automaton::SPA[In, Call, Return] _hypothesis,
+        #    Hash[Call, Hash[Integer, Array[In | Call]]] _proc_to_state_to_prefix
+        #  ) -> void
+        def refine_hypothesis(cex, _hypothesis, _proc_to_state_to_prefix)
+          extract_useful_information_from_cex(cex)
+          loop { break unless refine_hypothesis_internal(cex) }
+        end
+        private
+        #: () -> Automaton::SPA[In, Call, Return]
+        def build_first_hypothesis # steep:ignore
+          Automaton::SPA.new(nil, @return_input, {})
+        end
+        #: (Array[In | Call | Return] cex) -> void
+        def extract_useful_information_from_cex(cex)
+          return unless @sul.query_last(cex)
+          @initial_proc = cex[0]
+          new_procs = @manager.scan_positive_cex(cex)
+          return if new_procs.empty?
+          new_procs.each do |new_proc|
+            proc_sul = ProceduralSUL.new(new_proc, @sul, @manager)
+            new_learner =
+              case @algorithm
+              in :lstar
+                LStar::LStarLearner.new(@alphabet, proc_sul, automaton_type: :dfa, **@algorithm_params)
+              in :kearns_vazirani
+                KearnsVazirani::KearnsVaziraniLearner.new(
+                  @alphabet,
+                  proc_sul,
+                  automaton_type: :dfa,
+                  **@algorithm_params
+                )
+              in :lsharp
+                LSharp::LSharpLearner.new(@alphabet, proc_sul, automaton_type: :dfa, **@algorithm_params)
+              end
+            @proc_to_learner.each_key { |proc| new_learner.add_alphabet(proc) }
+            @proc_to_learner[new_proc] = new_learner
+            @proc_to_learner.each_value { |learner| learner.add_alphabet(new_proc) }
+            @active_call_alphabet_set << new_proc
+          end
+          hypothesis, proc_to_state_to_prefix = build_hypothesis
+          @manager.scan_procs(hypothesis.proc_to_dfa, proc_to_state_to_prefix)
+        end
+        #: (Array[In | Call | Return] cex) -> bool
+        def refine_hypothesis_internal(cex)
+          sul_out = @sul.query_last(cex)
+          hypothesis = build_hypothesis[0]
+          return false if hypothesis.run(cex)[0].last == sul_out
+          update_atr_and_check_ts_conformance
+          hypothesis, proc_to_state_to_prefix = build_hypothesis
+          return false if hypothesis.run(cex)[0].last == sul_out
+          return_indices = (0...cex.size).filter { |index| cex[index] == @return_input } # steep:ignore
+          global_query =
+            if sul_out
+              ->(word) { hypothesis.run(word)[0].last }
+            else
+              ->(word) { @sul.query_last(word) }
+            end
+          global_acex = ReturnIndicesAcex.new(cex, return_indices, global_query, @manager) # steep:ignore
+          idx = CexProcessor.process(global_acex, cex_processing: @cex_processing)
+          return_index = return_indices[idx]
+          call_index = @manager.find_call_index(cex, return_index)
+          proc = cex[call_index]
+          local_cex = @manager.project(cex[call_index + 1...return_index]) # steep:ignore
+          dfa = hypothesis.proc_to_dfa[proc] # steep:ignore
+          state_to_prefix = proc_to_state_to_prefix[proc] # steep:ignore
+          @proc_to_learner[proc].refine_hypothesis(local_cex, dfa, state_to_prefix) # steep:ignore
+          true
+        end
+        #: () -> bool
+        def update_atr_and_check_ts_conformance
+          updated = false
+          hypothesis, proc_to_state_to_prefix = build_hypothesis
+          while check_and_ensure_ts_conformance(hypothesis, proc_to_state_to_prefix)
+            updated = true
+            hypothesis, proc_to_state_to_prefix = build_hypothesis
+            @manager.scan_procs(hypothesis.proc_to_dfa, proc_to_state_to_prefix)
+          end
+          updated
+        end
+        def check_and_ensure_ts_conformance(hypothesis, proc_to_state_to_prefix)
+          updated = false
+          hypothesis.proc_to_dfa.each_key do |proc|
+            ts = []
+            ts << proc
+            ts.concat(@manager.proc_to_terminating_sequence[proc])
+            ts << @return_input
+            if check_and_ensure_single_ts_conformance(ts, hypothesis, proc_to_state_to_prefix)
+              updated = true
+              break
+            end
+          end
+          updated
+        end
+        def check_and_ensure_single_ts_conformance(ts, hypothesis, proc_to_state_to_prefix) # rubocop:disable Naming/MethodParameterName
+          updated = false
+          ts.each_with_index do |input, index|
+            next unless @active_call_alphabet_set.include?(input)
+            return_index = @manager.find_return_index(ts, index + 1)
+            local_word = @manager.project(ts[index + 1...return_index])
+            dfa = hypothesis.proc_to_dfa[input]
+            next if dfa.output(dfa.run(local_word)[1])
+            state_to_prefix = proc_to_state_to_prefix[input]
+            @proc_to_learner[input].refine_hypothesis(local_word, dfa, state_to_prefix)
+            updated = true
+          end
+          updated
+        end
+      end
+    end
+  end
+end