RubyGems - kleene - Versions diffs - 0.4.0 - Mend

kleene 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/Gemfile +12 -0
data/Gemfile.lock +117 -0
data/LICENSE +21 -0
data/README.md +21 -0
data/Rakefile +8 -0
data/build.ops +63 -0
data/kleene.gemspec +39 -0
data/lib/kleene/dfa.rb +258 -0
data/lib/kleene/dsl.rb +263 -0
data/lib/kleene/kleene.rb +88 -0
data/lib/kleene/multi_match_dfa.rb +308 -0
data/lib/kleene/nfa.rb +304 -0
data/lib/kleene/patches.rb +23 -0
data/lib/kleene/version.rb +3 -0
data/lib/kleene.rb +17 -0
metadata +76 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: 0c062445eeb37aa9a123c09e8d682b3c6514be3842a4f63b821ab320daee958b
+  data.tar.gz: 902a5fcc8d767bbb0c3b97e931dba5b72e07f9ef0ec8540c2420675ecd7bc0f7
+SHA512:
+  metadata.gz: 54b677033bbae4ced31b75bf3b3130a123017982fb9959a957360a9d8a10a4b50d03533bf27e3fbfafc7052574388e2adba0eeae63beee9dc5c7c6cabed429c0
+  data.tar.gz: 9d0b6d4715254e3f544e4f2ed220103f9d092063ab58a7781688ce035810662f6a441a899660ff9496776a5052548251174aeab3107515530dbe106486d27175

data/.rspec ADDED Viewed

@@ -0,0 +1,3 @@
+--format documentation
+--color
+--require spec_helper

data/Gemfile ADDED Viewed

@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+source "https://rubygems.org"
+# Specify your gem's dependencies in kleene.gemspec
+gemspec
+gem "rake", "~> 13.0"
+gem "rspec", "~> 3.0"
+gem 'solargraph', group: :development

data/Gemfile.lock ADDED Viewed

@@ -0,0 +1,117 @@
+PATH
+  remote: .
+  specs:
+    kleene (0.1.0)
+      activesupport (~> 7.1)
+GEM
+  remote: https://rubygems.org/
+  specs:
+    activesupport (7.1.1)
+      base64
+      bigdecimal
+      concurrent-ruby (~> 1.0, >= 1.0.2)
+      connection_pool (>= 2.2.5)
+      drb
+      i18n (>= 1.6, < 2)
+      minitest (>= 5.1)
+      mutex_m
+      tzinfo (~> 2.0)
+    ast (2.4.2)
+    backport (1.2.0)
+    base64 (0.1.1)
+    benchmark (0.2.1)
+    bigdecimal (3.1.4)
+    concurrent-ruby (1.2.2)
+    connection_pool (2.4.1)
+    diff-lcs (1.5.0)
+    drb (2.1.1)
+      ruby2_keywords
+    e2mmap (0.1.0)
+    i18n (1.14.1)
+      concurrent-ruby (~> 1.0)
+    jaro_winkler (1.5.6)
+    json (2.6.3)
+    kramdown (2.4.0)
+      rexml
+    kramdown-parser-gfm (1.1.0)
+      kramdown (~> 2.0)
+    language_server-protocol (3.17.0.3)
+    minitest (5.20.0)
+    mutex_m (0.1.2)
+    nokogiri (1.15.4-x86_64-linux)
+      racc (~> 1.4)
+    parallel (1.23.0)
+    parser (3.2.2.4)
+      ast (~> 2.4.1)
+      racc
+    racc (1.7.2)
+    rainbow (3.1.1)
+    rake (13.1.0)
+    rbs (2.8.4)
+    regexp_parser (2.8.2)
+    reverse_markdown (2.1.1)
+      nokogiri
+    rexml (3.2.6)
+    rspec (3.12.0)
+      rspec-core (~> 3.12.0)
+      rspec-expectations (~> 3.12.0)
+      rspec-mocks (~> 3.12.0)
+    rspec-core (3.12.2)
+      rspec-support (~> 3.12.0)
+    rspec-expectations (3.12.3)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.12.0)
+    rspec-mocks (3.12.6)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.12.0)
+    rspec-support (3.12.1)
+    rubocop (1.57.2)
+      json (~> 2.3)
+      language_server-protocol (>= 3.17.0)
+      parallel (~> 1.10)
+      parser (>= 3.2.2.4)
+      rainbow (>= 2.2.2, < 4.0)
+      regexp_parser (>= 1.8, < 3.0)
+      rexml (>= 3.2.5, < 4.0)
+      rubocop-ast (>= 1.28.1, < 2.0)
+      ruby-progressbar (~> 1.7)
+      unicode-display_width (>= 2.4.0, < 3.0)
+    rubocop-ast (1.30.0)
+      parser (>= 3.2.1.0)
+    ruby-progressbar (1.13.0)
+    ruby2_keywords (0.0.5)
+    solargraph (0.49.0)
+      backport (~> 1.2)
+      benchmark
+      bundler (~> 2.0)
+      diff-lcs (~> 1.4)
+      e2mmap
+      jaro_winkler (~> 1.5)
+      kramdown (~> 2.3)
+      kramdown-parser-gfm (~> 1.1)
+      parser (~> 3.0)
+      rbs (~> 2.0)
+      reverse_markdown (~> 2.0)
+      rubocop (~> 1.38)
+      thor (~> 1.0)
+      tilt (~> 2.0)
+      yard (~> 0.9, >= 0.9.24)
+    thor (1.3.0)
+    tilt (2.3.0)
+    tzinfo (2.0.6)
+      concurrent-ruby (~> 1.0)
+    unicode-display_width (2.5.0)
+    yard (0.9.34)
+PLATFORMS
+  x86_64-linux
+DEPENDENCIES
+  kleene!
+  rake (~> 13.0)
+  rspec (~> 3.0)
+  solargraph
+BUNDLED WITH
+   2.4.10

data/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2023 David Ellis
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,21 @@
+# kleene
+kleene is a library for building regular expression recognition automata - nfas, dfas, and some specialty structures.
+## Installation
+Install the gem and add to the application's Gemfile by executing:
+    $ bundle add kleene
+If bundler is not being used to manage dependencies, install the gem by executing:
+    $ gem install kleene
+## Usage
+```ruby
+require "kleene"
+```

data/Rakefile ADDED Viewed

@@ -0,0 +1,8 @@
+# frozen_string_literal: true
+require "bundler/gem_tasks"
+require "rspec/core/rake_task"
+RSpec::Core::RakeTask.new(:spec)
+task default: :spec

data/build.ops ADDED Viewed

@@ -0,0 +1,63 @@
+params:
+  version: string
+imports:
+  core: "opswalrus/core"
+...
+# when you run this script, it should do something like:
+# ~/sync/projects/kleene-rb
+# ❯ ops run build.ops version:1.0.0
+# Write version.rb for version 1.0.0
+# [localhost] Build gem: gem build opswalrus.gemspec
+# [localhost] Check whether Bitwarden is locked or not: bw status
+# [localhost] Get Rubygems OTP: bw get totp Rubygems
+# [localhost] Push gem: gem push opswalrus-1.0.0.gem
+# [localhost] Build docker image: docker build -t opswalrus/ops:1.0.0 .
+# ~/sync/projects/ops/opswalrus on  main via 💎 v3.2.2 took 44s
+version = params.version
+exit 1, "version parameter must be specified" unless version
+template = <<TEMPLATE
+module Kleene
+  VERSION = "{{ version }}"
+end
+TEMPLATE
+puts "Write version.rb for version #{version}"
+core.template.write template: template,
+                    variables: {version: version},
+                    to: "./lib/kleene/version.rb"
+sh("Build gem") { 'gem build kleene.gemspec' }
+sh("Commit Gemfile.lock and version.rb and git push changes") { 'git commit -am "gem {{ version }}" && git push' }
+# bw_status_output = sh("Check whether Bitwarden is locked or not") { 'bw status' }
+is_unlocked = sh? "Check whether Bitwarden is locked or not",
+                  'rbw unlocked'
+# the `bw status`` command currently exhibits an error in which it emits 'mac failed.' some number of times, so we need to filter that out
+# see:
+# - https://community.bitwarden.com/t/what-does-mac-failed-mean-exactly/29208
+# - https://github.com/bitwarden/cli/issues/88
+# - https://github.com/vwxyzjn/portwarden/issues/22
+# ❯ bw status
+# mac failed.
+# {"serverUrl":"...","lastSync":"2023-08-17T19:14:09.384Z","userEmail":"...","userId":"...","status":"locked"}
+# bw_status_output = bw_status_output.gsub('mac failed.', '').strip
+# bw_status_json = bw_status_output.parse_json
+# if bw_status_json['status'] != 'unlocked'
+#   exit 1, "Bitwarden is not unlocked. Please unlock bitwarden with: bw unlock"
+# end
+exit 1, "Bitwarden is not unlocked. Please unlock bitwarden with: rbw unlock" unless is_unlocked
+# totp = sh("Get Rubygems OTP") { 'bw get totp Rubygems' }
+totp = sh "Get Rubygems OTP",
+          'rbw get -f totp Rubygems'
+sh("Push gem", input: {/You have enabled multi-factor authentication. Please enter OTP code./ => "#{totp}\n"}) { 'gem push kleene-{{ version }}.gem' }

data/kleene.gemspec ADDED Viewed

@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+require_relative "lib/kleene/version"
+Gem::Specification.new do |spec|
+  spec.name                  = "kleene"
+  spec.version               = Kleene::VERSION
+  spec.authors               = ["David Ellis"]
+  spec.email                 = ["david@conquerthelawn.com"]
+  spec.summary               = "kleene is a library for building regular expression recognition automata"
+  spec.description           = "kleene is a library for building regular expression recognition automata - nfas, dfas, and some specialty structures."
+  spec.homepage              = "https://github.com/davidkellis/kleene-rb"
+  spec.license               = "MIT"
+  spec.required_ruby_version = ">= 3.0.0"
+  # spec.metadata["allowed_push_host"] = "TODO: Set to your gem server 'https://example.com'"
+  spec.metadata["homepage_uri"] = spec.homepage
+  spec.metadata["source_code_uri"] = "https://github.com/davidkellis/kleene-rb"
+  # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
+  # Specify which files should be added to the gem when it is released.
+  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
+  spec.files = Dir.chdir(__dir__) do
+    `git ls-files -z`.split("\x0").reject do |f|
+      (File.expand_path(f) == __FILE__) || f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor])
+    end
+  end
+  spec.bindir = "exe"
+  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
+  spec.require_paths = ["lib"]
+  # Uncomment to register a new dependency of your gem
+  spec.add_dependency "activesupport", "~> 7.1"
+  # For more information and examples about making a new gem, check out our
+  # guide at: https://bundler.io/guides/creating_gem.html
+end

data/lib/kleene/dfa.rb ADDED Viewed

@@ -0,0 +1,258 @@
+module Kleene
+  class DFATransition
+    attr_accessor :token # : Char
+    attr_accessor :from # : State
+    attr_accessor :to # : State
+    def initialize(token, from_state, to_state)
+      @token = token
+      @from = from_state
+      @to = to_state
+    end
+    def accept?(input)
+      @token == input
+    end
+  end
+  # ->(transition : DFATransition, token : Char, token_index : Int32) : Nil { ... }
+  # alias DFATransitionCallback = Proc(DFATransition, Char, Int32, Nil)
+  class DFA
+    attr_accessor :alphabet # : Set(Char)
+    attr_accessor :states # : Set(State)
+    attr_accessor :start_state # : State
+    attr_accessor :current_state # : State
+    attr_accessor :transitions # : Hash(State, Hash(Char, DFATransition))
+    attr_accessor :final_states # : Set(State)
+    attr_accessor :dfa_state_to_nfa_state_sets # : Hash(State, Set(State))            # this map contains (dfa_state => nfa_state_set) pairs
+    attr_accessor :nfa_state_to_dfa_state_sets # : Hash(State, Set(State))            # this map contains (nfa_state => dfa_state_set) pairs
+    attr_accessor :transition_callbacks # : Hash(DFATransition, DFATransitionCallback)
+    attr_accessor :transition_callbacks_per_destination_state # : Hash(State, DFATransitionCallback)
+    # @origin_nfa : NFA?
+    # @error_states : Set(State)?
+    # @regex_pattern : String?
+    def initialize(start_state, alphabet = DEFAULT_ALPHABET, transitions = Hash.new, dfa_state_to_nfa_state_sets = Hash.new, transition_callbacks = nil, origin_nfa: nil)
+      @start_state = start_state
+      @current_state = start_state
+      @transitions = transitions
+      @dfa_state_to_nfa_state_sets = dfa_state_to_nfa_state_sets
+      @alphabet = alphabet + all_transitions.map(&:token)
+      @states = reachable_states(@start_state)
+      @final_states = Set.new
+      @nfa_state_to_dfa_state_sets = Hash.new
+      @dfa_state_to_nfa_state_sets.each do |dfa_state, nfa_state_set|
+        nfa_state_set.each do |nfa_state|
+          dfa_state_set = @nfa_state_to_dfa_state_sets[nfa_state] ||= Set.new
+          dfa_state_set << dfa_state
+        end
+      end
+      @transition_callbacks = transition_callbacks || Hash.new
+      @transition_callbacks_per_destination_state = Hash.new
+      @origin_nfa = origin_nfa
+      update_final_states
+      reset_current_state
+    end
+    def origin_nfa
+      @origin_nfa || raise("This DFA was not created from an NFA, therefore it has no origin_nfa.")
+    end
+    def error_states
+      @error_states ||= @states.select {|s| s.error? }.to_set
+    end
+    def clear_error_states
+      @error_states = nil
+    end
+    def all_transitions() # : Array(DFATransition)
+      transitions.flat_map {|state, char_transition_map| char_transition_map.values }
+    end
+    def on_transition(transition, &blk)
+      @transition_callbacks[transition] = blk
+    end
+    def on_transition_to(state, &blk)
+      @transition_callbacks_per_destination_state[state] = blk
+    end
+    def shallow_clone
+      DFA.new(start_state, alphabet, transitions, dfa_state_to_nfa_state_sets, transition_callbacks, origin_nfa: origin_nfa).set_regex_pattern(regex_pattern)
+    end
+    # transition callbacks are not copied beacuse it is assumed that the state transition callbacks may be stateful and reference structures or states that only exist in `self`, but not the cloned copy.
+    def deep_clone
+      old_states = @states.to_a
+      new_states = old_states.map(&:dup)
+      state_mapping = old_states.zip(new_states).to_h
+      transition_mapping = Hash.new
+      new_transitions = transitions.map do |state, char_transition_map|
+        [
+          state_mapping[state],
+          char_transition_map.map do |char, old_transition|
+            new_transition = DFATransition.new(old_transition.token, state_mapping[old_transition.from], state_mapping[old_transition.to])
+            transition_mapping[old_transition] = new_transition
+            [char, new_transition]
+          end.to_h
+        ]
+      end.to_h
+      # new_transition_callbacks = transition_callbacks.map do |transition, callback|
+      #   {
+      #     transition_mapping[transition],
+      #     callback
+      #   }
+      # end.to_h
+      new_dfa_state_to_nfa_state_sets = dfa_state_to_nfa_state_sets.map {|dfa_state, nfa_state_set| [state_mapping[dfa_state], nfa_state_set] }.to_h
+      DFA.new(state_mapping[@start_state], @alphabet.clone, new_transitions, new_dfa_state_to_nfa_state_sets, origin_nfa: origin_nfa).set_regex_pattern(regex_pattern)
+    end
+    def update_final_states
+      @final_states = @states.select {|s| s.final? }.to_set
+    end
+    def reset_current_state
+      @current_state = @start_state
+    end
+    def add_transition(token, from_state, to_state)
+      @alphabet << token      # alphabet is a set, so there will be no duplications
+      @states << to_state     # states is a set, so there will be no duplications (to_state should be the only new state)
+      new_transition = DFATransition.new(token, from_state, to_state)
+      @transitions[from_state][token] = new_transition
+      new_transition
+    end
+    def match?(input)
+      reset_current_state
+      input.each_char.with_index do |char, index|
+        handle_token!(char, index)
+      end
+      if accept?
+        MatchRef.new(input, 0...input.size)
+      end
+    end
+    # Returns an array of matches found in the input string, each of which begins at the offset input_start_offset
+    def matches_at_offset(input, input_start_offset)
+      reset_current_state
+      matches = []
+      (input_start_offset...input.size).each do |offset|
+        token = input[offset]
+        handle_token!(token, offset)
+        if accept?
+          matches << MatchRef.new(input, input_start_offset..offset)
+        end
+      end
+      matches
+    end
+    # Returns an array of matches found anywhere in the input string
+    def matches(input)
+      (0...input.size).reduce([]) do |memo, offset|
+        memo + matches_at_offset(input, offset)
+      end
+    end
+    # accept an input token and transition to the next state in the state machine
+    def handle_token!(input_token, token_index)
+      @current_state = next_state(@current_state, input_token, token_index)
+    end
+    def accept?
+      @current_state.final?
+    end
+    def error?
+      @current_state.error?
+    end
+    # def terminal?
+    #   accept? || error?
+    # end
+    # if the DFA is currently in a final state, then we look up the associated NFA states that were also final, and return them
+    # def accepting_nfa_states : Set(State)
+    #   if accept?
+    #     dfa_state_to_nfa_state_sets[@current_state].select(&:final?).to_set
+    #   else
+    #     Set.new
+    #   end
+    # end
+    # this function transitions from state to state on an input token
+    def next_state(from_state, input_token, token_index)
+      transition = @transitions[from_state][input_token] || raise("No DFA transition found. Input token #{input_token} not in DFA alphabet.")
+      # invoke the relevant transition callback function
+      transition_callbacks[transition].try {|callback_fn| callback_fn.call(transition, input_token, token_index) }
+      transition_callbacks_per_destination_state[transition.to].try {|callback_fn| callback_fn.call(transition, input_token, token_index) }
+      transition.to
+    end
+    # Returns a set of State objects which are reachable through any transition path from the DFA's start_state.
+    def reachable_states(start_state)
+      visited_states = Set.new()
+      unvisited_states = Set[start_state]
+      while !unvisited_states.empty?
+        outbound_transitions = unvisited_states.flat_map {|state| @transitions[state].try(&:values) || Array.new }
+        destination_states = outbound_transitions.map(&:to).to_set
+        visited_states.merge(unvisited_states)         # add the unvisited states to the visited_states
+        unvisited_states = destination_states - visited_states
+      end
+      visited_states
+    end
+    # this is currently broken
+    # def to_nfa
+    #   dfa = self.deep_clone
+    #   NFA.new(dfa.start_state, dfa.alphabet.clone, dfa.transitions)
+    #   # todo: add all of this machine's transitions to the new machine
+    #   # @transitions.each {|t| nfa.add_transition(t.token, t.from, t.to) }
+    #   # nfa
+    # end
+    def to_s(verbose = false)
+      if verbose
+        retval = states.map(&:to_s).join("\n")
+        retval += "\n"
+        all_transitions.each do |t|
+          retval += "#{t.from.id} -> #{t.token} -> #{t.to.id}\n"
+        end
+        retval
+      else
+        regex_pattern
+      end
+    end
+    # This is an implementation of the "Reducing a DFA to a Minimal DFA" algorithm presented here: http://web.cecs.pdx.edu/~harry/compilers/slides/LexicalPart4.pdf
+    # This implements Hopcroft's algorithm as presented on page 142 of the first edition of the dragon book.
+    def minimize!
+      # todo: I'll implement this when I need it
+    end
+    def set_regex_pattern(pattern)
+      @regex_pattern = pattern
+      self
+    end
+    def regex_pattern
+      @regex_pattern || "<<empty>>"
+    end
+  end
+end