RubyGems - kleene - Versions diffs - 0.4.0 - Mend

kleene 0.4.0

Files changed (18) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/Gemfile +12 -0
data/Gemfile.lock +117 -0
data/LICENSE +21 -0
data/README.md +21 -0
data/Rakefile +8 -0
data/build.ops +63 -0
data/kleene.gemspec +39 -0
data/lib/kleene/dfa.rb +258 -0
data/lib/kleene/dsl.rb +263 -0
data/lib/kleene/kleene.rb +88 -0
data/lib/kleene/multi_match_dfa.rb +308 -0
data/lib/kleene/nfa.rb +304 -0
data/lib/kleene/patches.rb +23 -0
data/lib/kleene/version.rb +3 -0
data/lib/kleene.rb +17 -0
metadata +76 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: 0c062445eeb37aa9a123c09e8d682b3c6514be3842a4f63b821ab320daee958b
+  data.tar.gz: 902a5fcc8d767bbb0c3b97e931dba5b72e07f9ef0ec8540c2420675ecd7bc0f7
+SHA512:
+  metadata.gz: 54b677033bbae4ced31b75bf3b3130a123017982fb9959a957360a9d8a10a4b50d03533bf27e3fbfafc7052574388e2adba0eeae63beee9dc5c7c6cabed429c0
+  data.tar.gz: 9d0b6d4715254e3f544e4f2ed220103f9d092063ab58a7781688ce035810662f6a441a899660ff9496776a5052548251174aeab3107515530dbe106486d27175

data/.rspec ADDED Viewed

@@ -0,0 +1,3 @@
+--format documentation
+--color
+--require spec_helper

data/Gemfile ADDED Viewed

@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+source "https://rubygems.org"
+# Specify your gem's dependencies in kleene.gemspec
+gemspec
+gem "rake", "~> 13.0"
+gem "rspec", "~> 3.0"
+gem 'solargraph', group: :development

data/Gemfile.lock ADDED Viewed

@@ -0,0 +1,117 @@
+PATH
+  remote: .
+  specs:
+    kleene (0.1.0)
+      activesupport (~> 7.1)
+GEM
+  remote: https://rubygems.org/
+  specs:
+    activesupport (7.1.1)
+      base64
+      bigdecimal
+      concurrent-ruby (~> 1.0, >= 1.0.2)
+      connection_pool (>= 2.2.5)
+      drb
+      i18n (>= 1.6, < 2)
+      minitest (>= 5.1)
+      mutex_m
+      tzinfo (~> 2.0)
+    ast (2.4.2)
+    backport (1.2.0)
+    base64 (0.1.1)
+    benchmark (0.2.1)
+    bigdecimal (3.1.4)
+    concurrent-ruby (1.2.2)
+    connection_pool (2.4.1)
+    diff-lcs (1.5.0)
+    drb (2.1.1)
+      ruby2_keywords
+    e2mmap (0.1.0)
+    i18n (1.14.1)
+      concurrent-ruby (~> 1.0)
+    jaro_winkler (1.5.6)
+    json (2.6.3)
+    kramdown (2.4.0)
+      rexml
+    kramdown-parser-gfm (1.1.0)
+      kramdown (~> 2.0)
+    language_server-protocol (3.17.0.3)
+    minitest (5.20.0)
+    mutex_m (0.1.2)
+    nokogiri (1.15.4-x86_64-linux)
+      racc (~> 1.4)
+    parallel (1.23.0)
+    parser (3.2.2.4)
+      ast (~> 2.4.1)
+      racc
+    racc (1.7.2)
+    rainbow (3.1.1)
+    rake (13.1.0)
+    rbs (2.8.4)
+    regexp_parser (2.8.2)
+    reverse_markdown (2.1.1)
+      nokogiri
+    rexml (3.2.6)
+    rspec (3.12.0)
+      rspec-core (~> 3.12.0)
+      rspec-expectations (~> 3.12.0)
+      rspec-mocks (~> 3.12.0)
+    rspec-core (3.12.2)
+      rspec-support (~> 3.12.0)
+    rspec-expectations (3.12.3)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.12.0)
+    rspec-mocks (3.12.6)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.12.0)
+    rspec-support (3.12.1)
+    rubocop (1.57.2)
+      json (~> 2.3)
+      language_server-protocol (>= 3.17.0)
+      parallel (~> 1.10)
+      parser (>= 3.2.2.4)
+      rainbow (>= 2.2.2, < 4.0)
+      regexp_parser (>= 1.8, < 3.0)
+      rexml (>= 3.2.5, < 4.0)
+      rubocop-ast (>= 1.28.1, < 2.0)
+      ruby-progressbar (~> 1.7)
+      unicode-display_width (>= 2.4.0, < 3.0)
+    rubocop-ast (1.30.0)
+      parser (>= 3.2.1.0)
+    ruby-progressbar (1.13.0)
+    ruby2_keywords (0.0.5)
+    solargraph (0.49.0)
+      backport (~> 1.2)
+      benchmark
+      bundler (~> 2.0)
+      diff-lcs (~> 1.4)
+      e2mmap
+      jaro_winkler (~> 1.5)
+      kramdown (~> 2.3)
+      kramdown-parser-gfm (~> 1.1)
+      parser (~> 3.0)
+      rbs (~> 2.0)
+      reverse_markdown (~> 2.0)
+      rubocop (~> 1.38)
+      thor (~> 1.0)
+      tilt (~> 2.0)
+      yard (~> 0.9, >= 0.9.24)
+    thor (1.3.0)
+    tilt (2.3.0)
+    tzinfo (2.0.6)
+      concurrent-ruby (~> 1.0)
+    unicode-display_width (2.5.0)
+    yard (0.9.34)
+PLATFORMS
+  x86_64-linux
+DEPENDENCIES
+  kleene!
+  rake (~> 13.0)
+  rspec (~> 3.0)
+  solargraph
+BUNDLED WITH
+   2.4.10

data/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2023 David Ellis
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,21 @@
+# kleene
+kleene is a library for building regular expression recognition automata - nfas, dfas, and some specialty structures.
+## Installation
+Install the gem and add to the application's Gemfile by executing:
+    $ bundle add kleene
+If bundler is not being used to manage dependencies, install the gem by executing:
+    $ gem install kleene
+## Usage
+```ruby
+require "kleene"
+```

data/Rakefile ADDED Viewed

@@ -0,0 +1,8 @@
+# frozen_string_literal: true
+require "bundler/gem_tasks"
+require "rspec/core/rake_task"
+RSpec::Core::RakeTask.new(:spec)
+task default: :spec

data/build.ops ADDED Viewed

@@ -0,0 +1,63 @@
+params:
+  version: string
+imports:
+  core: "opswalrus/core"
+...
+# when you run this script, it should do something like:
+# ~/sync/projects/kleene-rb
+# ❯ ops run build.ops version:1.0.0
+# Write version.rb for version 1.0.0
+# [localhost] Build gem: gem build opswalrus.gemspec
+# [localhost] Check whether Bitwarden is locked or not: bw status
+# [localhost] Get Rubygems OTP: bw get totp Rubygems
+# [localhost] Push gem: gem push opswalrus-1.0.0.gem
+# [localhost] Build docker image: docker build -t opswalrus/ops:1.0.0 .
+# ~/sync/projects/ops/opswalrus on  main via 💎 v3.2.2 took 44s
+version = params.version
+exit 1, "version parameter must be specified" unless version
+template = <<TEMPLATE
+module Kleene
+  VERSION = "{{ version }}"
+end
+TEMPLATE
+puts "Write version.rb for version #{version}"
+core.template.write template: template,
+                    variables: {version: version},
+                    to: "./lib/kleene/version.rb"
+sh("Build gem") { 'gem build kleene.gemspec' }
+sh("Commit Gemfile.lock and version.rb and git push changes") { 'git commit -am "gem {{ version }}" && git push' }
+# bw_status_output = sh("Check whether Bitwarden is locked or not") { 'bw status' }
+is_unlocked = sh? "Check whether Bitwarden is locked or not",
+                  'rbw unlocked'
+# the `bw status`` command currently exhibits an error in which it emits 'mac failed.' some number of times, so we need to filter that out
+# see:
+# - https://community.bitwarden.com/t/what-does-mac-failed-mean-exactly/29208
+# - https://github.com/bitwarden/cli/issues/88
+# - https://github.com/vwxyzjn/portwarden/issues/22
+# ❯ bw status
+# mac failed.
+# {"serverUrl":"...","lastSync":"2023-08-17T19:14:09.384Z","userEmail":"...","userId":"...","status":"locked"}
+# bw_status_output = bw_status_output.gsub('mac failed.', '').strip
+# bw_status_json = bw_status_output.parse_json
+# if bw_status_json['status'] != 'unlocked'
+#   exit 1, "Bitwarden is not unlocked. Please unlock bitwarden with: bw unlock"
+# end
+exit 1, "Bitwarden is not unlocked. Please unlock bitwarden with: rbw unlock" unless is_unlocked
+# totp = sh("Get Rubygems OTP") { 'bw get totp Rubygems' }
+totp = sh "Get Rubygems OTP",
+          'rbw get -f totp Rubygems'
+sh("Push gem", input: {/You have enabled multi-factor authentication. Please enter OTP code./ => "#{totp}\n"}) { 'gem push kleene-{{ version }}.gem' }

data/kleene.gemspec ADDED Viewed

@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+require_relative "lib/kleene/version"
+Gem::Specification.new do |spec|
+  spec.name                  = "kleene"
+  spec.version               = Kleene::VERSION
+  spec.authors               = ["David Ellis"]
+  spec.email                 = ["david@conquerthelawn.com"]
+  spec.summary               = "kleene is a library for building regular expression recognition automata"
+  spec.description           = "kleene is a library for building regular expression recognition automata - nfas, dfas, and some specialty structures."
+  spec.homepage              = "https://github.com/davidkellis/kleene-rb"
+  spec.license               = "MIT"
+  spec.required_ruby_version = ">= 3.0.0"
+  # spec.metadata["allowed_push_host"] = "TODO: Set to your gem server 'https://example.com'"
+  spec.metadata["homepage_uri"] = spec.homepage
+  spec.metadata["source_code_uri"] = "https://github.com/davidkellis/kleene-rb"
+  # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
+  # Specify which files should be added to the gem when it is released.
+  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
+  spec.files = Dir.chdir(__dir__) do
+    `git ls-files -z`.split("\x0").reject do |f|
+      (File.expand_path(f) == __FILE__) || f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor])
+    end
+  end
+  spec.bindir = "exe"
+  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
+  spec.require_paths = ["lib"]
+  # Uncomment to register a new dependency of your gem
+  spec.add_dependency "activesupport", "~> 7.1"
+  # For more information and examples about making a new gem, check out our
+  # guide at: https://bundler.io/guides/creating_gem.html
+end

data/lib/kleene/dfa.rb ADDED Viewed

@@ -0,0 +1,258 @@
+module Kleene
+  class DFATransition
+    attr_accessor :token # : Char
+    attr_accessor :from # : State
+    attr_accessor :to # : State
+    def initialize(token, from_state, to_state)
+      @token = token
+      @from = from_state
+      @to = to_state
+    end
+    def accept?(input)
+      @token == input
+    end
+  end
+  # ->(transition : DFATransition, token : Char, token_index : Int32) : Nil { ... }
+  # alias DFATransitionCallback = Proc(DFATransition, Char, Int32, Nil)
+  class DFA
+    attr_accessor :alphabet # : Set(Char)
+    attr_accessor :states # : Set(State)
+    attr_accessor :start_state # : State
+    attr_accessor :current_state # : State
+    attr_accessor :transitions # : Hash(State, Hash(Char, DFATransition))
+    attr_accessor :final_states # : Set(State)
+    attr_accessor :dfa_state_to_nfa_state_sets # : Hash(State, Set(State))            # this map contains (dfa_state => nfa_state_set) pairs
+    attr_accessor :nfa_state_to_dfa_state_sets # : Hash(State, Set(State))            # this map contains (nfa_state => dfa_state_set) pairs
+    attr_accessor :transition_callbacks # : Hash(DFATransition, DFATransitionCallback)
+    attr_accessor :transition_callbacks_per_destination_state # : Hash(State, DFATransitionCallback)
+    # @origin_nfa : NFA?
+    # @error_states : Set(State)?
+    # @regex_pattern : String?
+    def initialize(start_state, alphabet = DEFAULT_ALPHABET, transitions = Hash.new, dfa_state_to_nfa_state_sets = Hash.new, transition_callbacks = nil, origin_nfa: nil)
+      @start_state = start_state
+      @current_state = start_state
+      @transitions = transitions
+      @dfa_state_to_nfa_state_sets = dfa_state_to_nfa_state_sets
+      @alphabet = alphabet + all_transitions.map(&:token)
+      @states = reachable_states(@start_state)
+      @final_states = Set.new
+      @nfa_state_to_dfa_state_sets = Hash.new
+      @dfa_state_to_nfa_state_sets.each do |dfa_state, nfa_state_set|
+        nfa_state_set.each do |nfa_state|
+          dfa_state_set = @nfa_state_to_dfa_state_sets[nfa_state] ||= Set.new
+          dfa_state_set << dfa_state
+        end
+      end
+      @transition_callbacks = transition_callbacks || Hash.new
+      @transition_callbacks_per_destination_state = Hash.new
+      @origin_nfa = origin_nfa
+      update_final_states
+      reset_current_state
+    end
+    def origin_nfa
+      @origin_nfa || raise("This DFA was not created from an NFA, therefore it has no origin_nfa.")
+    end
+    def error_states
+      @error_states ||= @states.select {|s| s.error? }.to_set
+    end
+    def clear_error_states
+      @error_states = nil
+    end
+    def all_transitions() # : Array(DFATransition)
+      transitions.flat_map {|state, char_transition_map| char_transition_map.values }
+    end
+    def on_transition(transition, &blk)
+      @transition_callbacks[transition] = blk
+    end
+    def on_transition_to(state, &blk)
+      @transition_callbacks_per_destination_state[state] = blk
+    end
+    def shallow_clone
+      DFA.new(start_state, alphabet, transitions, dfa_state_to_nfa_state_sets, transition_callbacks, origin_nfa: origin_nfa).set_regex_pattern(regex_pattern)
+    end
+    # transition callbacks are not copied beacuse it is assumed that the state transition callbacks may be stateful and reference structures or states that only exist in `self`, but not the cloned copy.
+    def deep_clone
+      old_states = @states.to_a
+      new_states = old_states.map(&:dup)
+      state_mapping = old_states.zip(new_states).to_h
+      transition_mapping = Hash.new
+      new_transitions = transitions.map do |state, char_transition_map|
+        [
+          state_mapping[state],
+          char_transition_map.map do |char, old_transition|
+            new_transition = DFATransition.new(old_transition.token, state_mapping[old_transition.from], state_mapping[old_transition.to])
+            transition_mapping[old_transition] = new_transition
+            [char, new_transition]
+          end.to_h
+        ]
+      end.to_h
+      # new_transition_callbacks = transition_callbacks.map do |transition, callback|
+      #   {
+      #     transition_mapping[transition],
+      #     callback
+      #   }
+      # end.to_h
+      new_dfa_state_to_nfa_state_sets = dfa_state_to_nfa_state_sets.map {|dfa_state, nfa_state_set| [state_mapping[dfa_state], nfa_state_set] }.to_h
+      DFA.new(state_mapping[@start_state], @alphabet.clone, new_transitions, new_dfa_state_to_nfa_state_sets, origin_nfa: origin_nfa).set_regex_pattern(regex_pattern)
+    end
+    def update_final_states
+      @final_states = @states.select {|s| s.final? }.to_set
+    end
+    def reset_current_state
+      @current_state = @start_state
+    end
+    def add_transition(token, from_state, to_state)
+      @alphabet << token      # alphabet is a set, so there will be no duplications
+      @states << to_state     # states is a set, so there will be no duplications (to_state should be the only new state)
+      new_transition = DFATransition.new(token, from_state, to_state)
+      @transitions[from_state][token] = new_transition
+      new_transition
+    end
+    def match?(input)
+      reset_current_state
+      input.each_char.with_index do |char, index|
+        handle_token!(char, index)
+      end
+      if accept?
+        MatchRef.new(input, 0...input.size)
+      end
+    end
+    # Returns an array of matches found in the input string, each of which begins at the offset input_start_offset
+    def matches_at_offset(input, input_start_offset)
+      reset_current_state
+      matches = []
+      (input_start_offset...input.size).each do |offset|
+        token = input[offset]
+        handle_token!(token, offset)
+        if accept?
+          matches << MatchRef.new(input, input_start_offset..offset)
+        end
+      end
+      matches
+    end
+    # Returns an array of matches found anywhere in the input string
+    def matches(input)
+      (0...input.size).reduce([]) do |memo, offset|
+        memo + matches_at_offset(input, offset)
+      end
+    end
+    # accept an input token and transition to the next state in the state machine
+    def handle_token!(input_token, token_index)
+      @current_state = next_state(@current_state, input_token, token_index)
+    end
+    def accept?
+      @current_state.final?
+    end
+    def error?
+      @current_state.error?
+    end
+    # def terminal?
+    #   accept? || error?
+    # end
+    # if the DFA is currently in a final state, then we look up the associated NFA states that were also final, and return them
+    # def accepting_nfa_states : Set(State)
+    #   if accept?
+    #     dfa_state_to_nfa_state_sets[@current_state].select(&:final?).to_set
+    #   else
+    #     Set.new
+    #   end
+    # end
+    # this function transitions from state to state on an input token
+    def next_state(from_state, input_token, token_index)
+      transition = @transitions[from_state][input_token] || raise("No DFA transition found. Input token #{input_token} not in DFA alphabet.")
+      # invoke the relevant transition callback function
+      transition_callbacks[transition].try {|callback_fn| callback_fn.call(transition, input_token, token_index) }
+      transition_callbacks_per_destination_state[transition.to].try {|callback_fn| callback_fn.call(transition, input_token, token_index) }
+      transition.to
+    end
+    # Returns a set of State objects which are reachable through any transition path from the DFA's start_state.
+    def reachable_states(start_state)
+      visited_states = Set.new()
+      unvisited_states = Set[start_state]
+      while !unvisited_states.empty?
+        outbound_transitions = unvisited_states.flat_map {|state| @transitions[state].try(&:values) || Array.new }
+        destination_states = outbound_transitions.map(&:to).to_set
+        visited_states.merge(unvisited_states)         # add the unvisited states to the visited_states
+        unvisited_states = destination_states - visited_states
+      end
+      visited_states
+    end
+    # this is currently broken
+    # def to_nfa
+    #   dfa = self.deep_clone
+    #   NFA.new(dfa.start_state, dfa.alphabet.clone, dfa.transitions)
+    #   # todo: add all of this machine's transitions to the new machine
+    #   # @transitions.each {|t| nfa.add_transition(t.token, t.from, t.to) }
+    #   # nfa
+    # end
+    def to_s(verbose = false)
+      if verbose
+        retval = states.map(&:to_s).join("\n")
+        retval += "\n"
+        all_transitions.each do |t|
+          retval += "#{t.from.id} -> #{t.token} -> #{t.to.id}\n"
+        end
+        retval
+      else
+        regex_pattern
+      end
+    end
+    # This is an implementation of the "Reducing a DFA to a Minimal DFA" algorithm presented here: http://web.cecs.pdx.edu/~harry/compilers/slides/LexicalPart4.pdf
+    # This implements Hopcroft's algorithm as presented on page 142 of the first edition of the dragon book.
+    def minimize!
+      # todo: I'll implement this when I need it
+    end
+    def set_regex_pattern(pattern)
+      @regex_pattern = pattern
+      self
+    end
+    def regex_pattern
+      @regex_pattern || "<<empty>>"
+    end
+  end
+end