kleene 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7df94943f1746064025af64823d93819d96d04d5f6526789451dcdac643f54df
4
- data.tar.gz: 6590239e59b38ed2155f4329070ee763904dabc7535205bc5f1e23b780be48a0
3
+ metadata.gz: 674bbda22ddfbc6c4ec1624de621b96c24576cbe8aa656d228697e1d98549cdb
4
+ data.tar.gz: ddca6b95201b21359dd23c5d6b4d9591561e9045764ce90b470b6add1c4518b8
5
5
  SHA512:
6
- metadata.gz: 276cec78e2550ff2d7b95f96d3e98c55b6a43ace55d14300ae1406cd292dd5b709eb4074f1c2cac4ac806125a8296172aaa57ee37cece7a4445cb0069629c5ea
7
- data.tar.gz: 961ac38a53dbbf716d4cc34c07f26eb3cd36bb9dd0bcd65485f3c2d9cc0b667eb3d297a8d91e19507e6b7ec59180c92fca4574c10df57a4588e337f9012f8e19
6
+ metadata.gz: 9392d0b56aa48b8cef4f0337be625d6160d49c3fb0214b034f379a505ee3a142347e8bb5e62a82ad0cb982bb3ca00ad6f9b12f951e00cf48b0447a1b6fb78320
7
+ data.tar.gz: 1521365696f470bc249dac8aa77038bc9121ff60499dc9368aaf86224e64af14a06e092aef191bc9571d62d2dd63567a7051f4b07490e4f377eac8d24de83025
data/Gemfile.lock CHANGED
@@ -1,8 +1,9 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kleene (0.1.0)
4
+ kleene (0.6.0)
5
5
  activesupport (~> 7.1)
6
+ regexp_parser (~> 2.8)
6
7
 
7
8
  GEM
8
9
  remote: https://rubygems.org/
@@ -45,7 +46,7 @@ GEM
45
46
  parser (3.2.2.4)
46
47
  ast (~> 2.4.1)
47
48
  racc
48
- racc (1.7.2)
49
+ racc (1.7.3)
49
50
  rainbow (3.1.1)
50
51
  rake (13.1.0)
51
52
  rbs (2.8.4)
data/build.ops CHANGED
@@ -7,17 +7,25 @@ imports:
7
7
  ...
8
8
 
9
9
  # when you run this script, it should do something like:
10
- # ~/sync/projects/kleene-rb
11
- # ❯ ops run build.ops version:1.0.0
12
- # Write version.rb for version 1.0.0
13
- # [localhost] Build gem: gem build opswalrus.gemspec
14
- # [localhost] Check whether Bitwarden is locked or not: bw status
15
- # [localhost] Get Rubygems OTP: bw get totp Rubygems
16
- # [localhost] Push gem: gem push opswalrus-1.0.0.gem
17
- # [localhost] Build docker image: docker build -t opswalrus/ops:1.0.0 .
18
-
19
- # ~/sync/projects/ops/opswalrus on  main via 💎 v3.2.2 took 44s
20
-
10
+ # ~/sync/projects/kleene-rb on  master via 💎 v3.2.2
11
+ # ❯ ops run build.ops version:0.5.0
12
+ # Write version.rb for version 0.5.0
13
+ # Writing template literal to ./lib/kleene/version.rb
14
+ # localhost | Build gem
15
+ # Finished in 0.181094124 seconds with exit status 0 (success)
16
+ # ********************************************************************************
17
+ # localhost | Commit Gemfile.lock and version.rb and git push changes
18
+ # Finished in 0.798496926 seconds with exit status 0 (success)
19
+ # ********************************************************************************
20
+ # localhost | Check whether Bitwarden is locked or not
21
+ # Finished in 0.008580059 seconds with exit status 0 (success)
22
+ # ********************************************************************************
23
+ # localhost | Get Rubygems OTP
24
+ # Finished in 0.378203313 seconds with exit status 0 (success)
25
+ # ********************************************************************************
26
+ # localhost | Push gem
27
+ # Finished in 4.095049625 seconds with exit status 0 (success)
28
+ # ********************************************************************************
21
29
 
22
30
  version = params.version
23
31
 
data/kleene.gemspec CHANGED
@@ -33,6 +33,7 @@ Gem::Specification.new do |spec|
33
33
 
34
34
  # Uncomment to register a new dependency of your gem
35
35
  spec.add_dependency "activesupport", "~> 7.1"
36
+ spec.add_dependency "regexp_parser", "~> 2.8"
36
37
 
37
38
  # For more information and examples about making a new gem, check out our
38
39
  # guide at: https://bundler.io/guides/creating_gem.html
@@ -77,18 +77,10 @@ module Kleene
77
77
 
78
78
  nfa
79
79
  end
80
+ end
80
81
 
81
- def match_tracker(input) # : MatchTracker
82
- dfa = @composite_dfa.deep_clone
83
- match_tracker = setup_callbacks(dfa)
84
-
85
- input.each_char.with_index do |char, index|
86
- dfa.handle_token!(char, index)
87
- end
88
-
89
- match_tracker
90
- end
91
-
82
+ class BatchMultiMatchDFA < MultiMatchDFA
83
+ # #matches(input) is the batch-style matching interface
92
84
  def matches(input) # : Hash(NFA, Array(MatchRef))
93
85
  mt = match_tracker(input)
94
86
 
@@ -131,8 +123,19 @@ module Kleene
131
123
  mt.matches
132
124
  end
133
125
 
126
+ def match_tracker(input) # : BatchMatchTracker
127
+ dfa = @composite_dfa.deep_clone
128
+ match_tracker = setup_callbacks(dfa)
129
+
130
+ input.each_char.with_index do |char, index|
131
+ dfa.handle_token!(char, index)
132
+ end
133
+
134
+ match_tracker
135
+ end
136
+
134
137
  def setup_callbacks(dfa)
135
- match_tracker = MatchTracker.new
138
+ match_tracker = BatchMatchTracker.new
136
139
 
137
140
  # 1. identify DFA states that correspond to successful match of first character of the NFAs
138
141
  epsilon_closure_of_nfa_start_state = composite_nfa.epsilon_closure(composite_nfa.start_state)
@@ -222,10 +225,9 @@ module Kleene
222
225
 
223
226
  match_tracker
224
227
  end
225
-
226
228
  end
227
229
 
228
- class MatchTracker
230
+ class BatchMatchTracker
229
231
  # The NFA keys in the following two structures are not the original NFAs supplied to the MultiMatchDFA.
230
232
  # They are the original NFAs that have been augmented with a dead end error state, so the keys are objects that
231
233
  # are the internal state of a MultiMatchDFA
@@ -249,6 +251,10 @@ module Kleene
249
251
  attr_accessor :matches # : Hash(NFA, Array(MatchRef)) # NFA -> Array(MatchRef)
250
252
 
251
253
  def initialize
254
+ reset
255
+ end
256
+
257
+ def reset
252
258
  @candidate_match_start_positions = Hash.new
253
259
  @match_end_positions = Hash.new
254
260
  @empty_matches = Hash.new
@@ -0,0 +1,63 @@
1
+ require "set"
2
+ require "stringio"
3
+ require_relative "./kleene"
4
+
5
+ module Kleene
6
+ class NaiveOnlineRegex
7
+ def initialize(regexen, window_size = 100)
8
+ @regexen = regexen
9
+ @window_size = window_size
10
+
11
+ reset
12
+ end
13
+
14
+ def reset
15
+ @buffer = ""
16
+ @matches_per_regex = Hash.new # Hash(Regexp, Set(MatchData))
17
+ end
18
+
19
+ # #ingest(input) is the online-style matching interface
20
+ def ingest(input, debug = false) # : Set(OnlineMatch)
21
+ @buffer << input
22
+ new_online_matches = Set.new
23
+ @regexen.each do |regex|
24
+ existing_matches_for_regex = (@matches_per_regex[regex] ||= Set.new)
25
+ scan_matches = @buffer.scan_matches(regex).to_set
26
+ new_matches = scan_matches - existing_matches_for_regex # new_matches : Set(MatchData)
27
+ existing_matches_for_regex.merge(new_matches)
28
+ new_online_matches.merge(new_matches.map {|match_data| OnlineMatch.new(regex, match_data) })
29
+ end
30
+ resize_buffer!
31
+ new_online_matches
32
+ end
33
+
34
+ def matches # Hash(Regexp, Set(MatchData))
35
+ @matches_per_regex
36
+ end
37
+
38
+ def matches_for(regex) # Set(MatchData) | Nil
39
+ @matches_per_regex[regex]
40
+ end
41
+
42
+ def resize_buffer!
43
+ if @buffer.size > @window_size
44
+ @buffer = @buffer[-@window_size..-1]
45
+ end
46
+ end
47
+ end
48
+
49
+ # A {Regexp, MatchData} pair
50
+ class OnlineMatch
51
+ attr_reader :regex # Regexp
52
+ attr_reader :match # MatchData
53
+ def initialize(regex, match)
54
+ @regex, @match = regex, match
55
+ end
56
+ def to_a
57
+ @match.to_a
58
+ end
59
+ def to_h
60
+ {@regex => to_a}
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,323 @@
1
+ require "stringio"
2
+ require_relative "./kleene"
3
+
4
+ module Kleene
5
+ class MachineTuple
6
+ attr_accessor :nfa # : NFA
7
+ attr_accessor :nfa_with_dead_err # : NFA
8
+ attr_accessor :dfa # : DFA
9
+
10
+ def initialize(nfa, nfa_with_dead_err, dfa)
11
+ @nfa, @nfa_with_dead_err, @dfa = nfa, nfa_with_dead_err, dfa
12
+ end
13
+ end
14
+
15
+ class OnlineDFA
16
+ include DSL
17
+
18
+ # @original_nfas : Array(NFA)
19
+ attr_reader :nfas_with_err_state # : Array(NFA)
20
+ attr_accessor :dead_end_nfa_state_to_dead_end_nfa # : Hash(State, NFA)
21
+ attr_accessor :composite_nfa # : NFA
22
+ attr_accessor :composite_dfa # : DFA
23
+
24
+ attr_accessor :machines_by_index # : Hash(Int32, MachineTuple)
25
+ attr_accessor :nfa_to_index # : Hash(NFA, Int32)
26
+ attr_accessor :nfa_with_dead_err_to_index # : Hash(NFA, Int32)
27
+ attr_accessor :dfa_to_index # : Hash(DFA, Int32)
28
+
29
+ def initialize(nfas)
30
+ composite_alphabet = nfas.reduce(Set.new) {|memo, nfa| memo | nfa.alphabet }
31
+
32
+ @original_nfas = nfas
33
+ @nfas_with_err_state = nfas.map {|nfa| with_err_dead_end(nfa, composite_alphabet) } # copy NFAs and add dead-end error states to each of them
34
+ dfas = @original_nfas.map(&:to_dfa)
35
+
36
+ @nfa_to_index = @original_nfas.map.with_index {|nfa, index| [nfa, index] }.to_h
37
+ @nfa_with_dead_err_to_index = @nfas_with_err_state.map.with_index {|nfa, index| [nfa, index] }.to_h
38
+ @dfa_to_index = dfas.map.with_index {|dfa, index| [dfa, index] }.to_h
39
+ @machines_by_index = @original_nfas.zip(nfas_with_err_state, dfas).map.with_index {|tuple, index| nfa, nfa_with_dead_err, dfa = tuple; [index, MachineTuple.new(nfa, nfa_with_dead_err, dfa)] }.to_h
40
+
41
+ # build a mapping of (state -> nfa) pairs that capture which nfa owns each state
42
+ @dead_end_nfa_state_to_dead_end_nfa = Hash.new
43
+ @nfas_with_err_state.each do |nfa_with_dead_err|
44
+ nfa_with_dead_err.states.each do |state|
45
+ @dead_end_nfa_state_to_dead_end_nfa[state] = nfa_with_dead_err
46
+ end
47
+ end
48
+
49
+ # create a composite NFA as the union of all the NFAs with epsilon transitions from every NFA state back to the union NFA's start state
50
+ @composite_nfa = create_composite_nfa(@nfas_with_err_state)
51
+ @composite_dfa = @composite_nfa.to_dfa
52
+
53
+ reset
54
+ end
55
+
56
+ def machines_from_nfa(nfa) # : MachineTuple
57
+ machines_by_index[nfa_to_index[nfa]]
58
+ end
59
+
60
+ def machines_from_nfa_with_dead_err(nfa_with_dead_err) # : MachineTuple
61
+ machines_by_index[nfa_with_dead_err_to_index[nfa_with_dead_err]]
62
+ end
63
+
64
+ def machines_from_dfa(dfa) # : MachineTuple
65
+ machines_by_index[dfa_to_index[dfa]]
66
+ end
67
+
68
+ # create a composite NFA as the union of all the NFAs with epsilon transitions from every NFA state back to the union NFA's start state
69
+ def create_composite_nfa(nfas)
70
+ nfa = union!(nfas)
71
+
72
+ # add epsilon transitions from all the states except the start state back to the start state
73
+ nfa.states.each do |state|
74
+ if state != nfa.start_state
75
+ nfa.add_transition(NFATransition::Epsilon, state, nfa.start_state)
76
+ end
77
+ end
78
+
79
+ nfa.update_final_states
80
+
81
+ nfa
82
+ end
83
+
84
+ def reset # : OnlineMatchTracker
85
+ @active_composite_dfa = @composite_dfa.deep_clone
86
+ @active_candidate_dfas = []
87
+ @match_tracker = setup_callbacks(@active_composite_dfa)
88
+ @buffer = ""
89
+ end
90
+
91
+ # #ingest(input) is the online-style matching interface
92
+ def ingest(input, debug = false) # : Hash(NFA, Array(MatchRef))
93
+ mt = @match_tracker
94
+
95
+ start_index_of_input_fragment_in_buffer = @buffer.length
96
+
97
+ input.each_char.with_index do |char, index|
98
+ @active_composite_dfa.handle_token!(char, start_index_of_input_fragment_in_buffer + index)
99
+ end
100
+
101
+ @buffer << input
102
+
103
+ start_index_to_nfas_that_may_match = mt.invert_candidate_match_start_positions
104
+
105
+ mt.empty_matches.each do |nfa_with_dead_err, indices|
106
+ original_nfa = machines_from_nfa_with_dead_err(nfa_with_dead_err).nfa
107
+ indices.select {|index| index >= start_index_of_input_fragment_in_buffer }.each do |index|
108
+ mt.add_match(original_nfa, MatchRef.new(@buffer, index...index))
109
+ end
110
+ end
111
+
112
+ input.each_char.with_index do |char, index|
113
+ index_in_buffer = start_index_of_input_fragment_in_buffer + index
114
+
115
+ @active_candidate_dfas.reject! do |active_dfa_tuple|
116
+ dfa_clone, original_nfa, start_of_match_index = active_dfa_tuple
117
+
118
+ dfa_clone.handle_token!(char, index_in_buffer)
119
+ mt.add_match(original_nfa, MatchRef.new(@buffer, start_of_match_index..index_in_buffer)) if dfa_clone.accept?
120
+
121
+ dfa_clone.error?
122
+ end
123
+
124
+ if nfas_with_dead_err = start_index_to_nfas_that_may_match[index_in_buffer]
125
+ nfas_with_dead_err.each do |nfa_with_dead_err|
126
+ machines = machines_from_nfa_with_dead_err(nfa_with_dead_err)
127
+ original_nfa = machines.nfa
128
+ dfa = machines.dfa
129
+ dfa_clone = dfa.shallow_clone
130
+
131
+ dfa_clone.handle_token!(char, index_in_buffer)
132
+ mt.add_match(original_nfa, MatchRef.new(@buffer, index_in_buffer..index_in_buffer)) if dfa_clone.accept?
133
+
134
+ @active_candidate_dfas << [dfa_clone, original_nfa, index_in_buffer] unless dfa_clone.error?
135
+ end
136
+ end
137
+ end
138
+
139
+ matches
140
+ end
141
+
142
+ def matches
143
+ @match_tracker.matches
144
+ end
145
+
146
+ def setup_callbacks(dfa)
147
+ match_tracker = OnlineMatchTracker.new
148
+
149
+ # 1. identify DFA states that correspond to successful match of first character of the NFAs
150
+ epsilon_closure_of_nfa_start_state = composite_nfa.epsilon_closure(composite_nfa.start_state)
151
+ nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa = composite_nfa.transitions_from(epsilon_closure_of_nfa_start_state).
152
+ reject {|transition| transition.epsilon? || transition.to.error? }.
153
+ map(&:to).to_set
154
+ dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa = nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.
155
+ compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }.
156
+ reduce(Set.new) {|memo, state_set| memo | state_set }
157
+ dfa_state_to_dead_end_nfas_that_have_matched_their_first_character = Hash.new
158
+ dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.each do |dfa_state|
159
+ dfa_state_to_dead_end_nfas_that_have_matched_their_first_character[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state].
160
+ select {|nfa_state| nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.includes?(nfa_state) }.
161
+ compact_map do |nfa_state|
162
+ dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map
163
+ end.to_set
164
+ end
165
+
166
+ # 2. identify DFA states that correspond to final states in the NFAs
167
+ nfa_final_states = @nfas_with_err_state.map(&:final_states).reduce(Set.new) {|memo, state_set| memo | state_set }
168
+ dfa_states_that_correspond_to_nfa_final_states = nfa_final_states.compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }.
169
+ reduce(Set.new) {|memo, state_set| memo | state_set }
170
+ dead_end_nfas_that_have_transitioned_to_final_state = Hash.new
171
+ dfa_states_that_correspond_to_nfa_final_states.each do |dfa_state|
172
+ dead_end_nfas_that_have_transitioned_to_final_state[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state].
173
+ select {|nfa_state| nfa_final_states.includes?(nfa_state) }.
174
+ compact_map do |nfa_state|
175
+ dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map
176
+ end.to_set
177
+ end
178
+
179
+ # 3. Identify DFA states that correspond to successful match without even having seen any characters.
180
+ # These are cases where the NFA's start state is a final state or can reach a final state by following only epsilon transitions.
181
+ nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state = epsilon_closure_of_nfa_start_state.select(&:final?).to_set
182
+ dfa_states_that_represent_both_start_states_and_final_states = nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state.
183
+ compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }.
184
+ reduce(Set.new) {|memo, state_set| memo | state_set }
185
+ dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters = Hash.new
186
+ dfa_states_that_represent_both_start_states_and_final_states.each do |dfa_state|
187
+ dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state].
188
+ select {|nfa_state| nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state.includes?(nfa_state) }.
189
+ compact_map do |nfa_state|
190
+ dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map
191
+ end.to_set
192
+ end
193
+
194
+ # set up call transition call backs, since the callbacks may only be defined once per state and transition
195
+ # For (1):
196
+ # Set up transition callbacks to push the index position of the start of a match of each NFA that has begun
197
+ # to be matched on the transition to one of the states in (1)
198
+ # For (2):
199
+ # set up transition callbacks to push the index position of the end of a successful match onto the list
200
+ # of successful matches for the NFA that matched
201
+ # For (3):
202
+ # set up transision callbacks to capture successful empty matches
203
+ destination_dfa_states_for_callbacks = dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa | dfa_states_that_correspond_to_nfa_final_states
204
+ destination_dfa_states_for_callbacks.each do |dfa_state|
205
+ dfa.on_transition_to(dfa_state) do |transition, token, token_index|
206
+ destination_dfa_state = transition.to
207
+
208
+ should_track_empty_match = dfa_states_that_represent_both_start_states_and_final_states.includes?(destination_dfa_state)
209
+ should_track_start_of_candidate_match = should_track_empty_match || dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.includes?(destination_dfa_state)
210
+ should_track_end_of_match = dfa_states_that_correspond_to_nfa_final_states.includes?(destination_dfa_state)
211
+
212
+ if should_track_empty_match
213
+ dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[destination_dfa_state].each do |nfa_with_dead_end|
214
+ match_tracker.add_empty_match(nfa_with_dead_end, token_index)
215
+ end
216
+ end
217
+
218
+ if should_track_start_of_candidate_match
219
+ nfas_that_matched_first_character = dfa_state_to_dead_end_nfas_that_have_matched_their_first_character[destination_dfa_state] || Set.new
220
+ nfas_that_matched_empty_match = dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[destination_dfa_state] || Set.new
221
+ dead_end_nfas_that_are_starting_to_match = nfas_that_matched_first_character | nfas_that_matched_empty_match
222
+ dead_end_nfas_that_are_starting_to_match.each do |nfa_with_dead_end|
223
+ match_tracker.add_start_of_candidate_match(nfa_with_dead_end, token_index)
224
+ end
225
+ end
226
+
227
+ if should_track_end_of_match
228
+ dead_end_nfas_that_have_transitioned_to_final_state[destination_dfa_state].each do |nfa_with_dead_end|
229
+ match_tracker.add_end_of_match(nfa_with_dead_end, token_index)
230
+ end
231
+ end
232
+ end
233
+ end
234
+
235
+ match_tracker
236
+ end
237
+ end
238
+
239
+ class OnlineMatchTracker
240
+ # The NFA keys in the following two structures are not the original NFAs supplied to the MultiMatchDFA.
241
+ # They are the original NFAs that have been augmented with a dead end error state, so the keys are objects that
242
+ # are the internal state of a MultiMatchDFA
243
+ attr_accessor :candidate_match_start_positions # : Hash(NFA, Array(Int32)) # NFA -> Array(IndexPositionOfStartOfMatch)
244
+ # The end positions are indices at which, after handling the character, the DFA was observed to be in a match/accept state;
245
+ # however, the interpretation is ambiguous, because the accepting state may be as a result of (1) transitioning to an error state that is also marked final/accepting,
246
+ # OR it may be as a result of transitioning to (2) a non-error final state.
247
+ # In the case of (1), the match may be an empty match, where after transitioning to an error state, the DFA is in a state that
248
+ # is equivalent to the error state and start state and final state (e.g. as in an optional or kleene star DFA),
249
+ # while in the case of (2), the match may be a "normal" match.
250
+ # The ambiguity is problematic because it isn't clear whether the index position of the match is end inclusive end of a match
251
+ # or the beginning of an empty match.
252
+ # This ambiguity is all due to the construction of the composite DFA in the MultiMatchDFA - the dead end error states are epsilon-transitioned
253
+ # to the composite DFA's start state.
254
+ attr_accessor :match_end_positions # : Hash(NFA, Array(Int32)) # NFA -> Array(IndexPositionOfEndOfMatch)
255
+ attr_accessor :empty_matches # : Hash(NFA, Array(Int32)) # NFA -> Array(IndexPositionOfEmptyMatch)
256
+
257
+ # The NFA keys in the following structure are the original NFAs supplied to the MultiMatchDFA.
258
+ # This is in contrast to the augmented NFAs that are used as keys in the candidate_match_start_positions and
259
+ # match_end_positions structures, documented above ^^^.
260
+ attr_accessor :matches # : Hash(NFA, Array(MatchRef)) # NFA -> Array(MatchRef)
261
+
262
+ def initialize
263
+ reset
264
+ end
265
+
266
+ def reset
267
+ @candidate_match_start_positions = Hash.new
268
+ @match_end_positions = Hash.new
269
+ @empty_matches = Hash.new
270
+ @matches = Hash.new
271
+ end
272
+
273
+ def start_positions(nfa)
274
+ candidate_match_start_positions[nfa] ||= Array.new
275
+ end
276
+
277
+ def end_positions(nfa)
278
+ match_end_positions[nfa] ||= Array.new
279
+ end
280
+
281
+ def empty_match_positions(nfa)
282
+ empty_matches[nfa] ||= Array.new
283
+ end
284
+
285
+ def matches_for(nfa)
286
+ matches[nfa] ||= Array.new
287
+ end
288
+
289
+ def add_start_of_candidate_match(nfa_with_dead_end, token_index)
290
+ # puts "add_start_of_candidate_match(#{nfa.object_id}, #{token_index})"
291
+ positions = start_positions(nfa_with_dead_end)
292
+ positions << token_index
293
+ end
294
+
295
+ # the end positions are inclusive of the index of the last character matched, so empty matches are not accounted for in the match_end_positions array
296
+ def add_end_of_match(nfa_with_dead_end, token_index)
297
+ # puts "add_end_of_match(#{nfa.object_id}, #{token_index})"
298
+ positions = end_positions(nfa_with_dead_end)
299
+ positions << token_index
300
+ end
301
+
302
+ def add_empty_match(nfa_with_dead_end, token_index)
303
+ positions = empty_match_positions(nfa_with_dead_end)
304
+ positions << token_index
305
+ end
306
+
307
+ def invert_candidate_match_start_positions # : Hash(Int32, Array(NFA))
308
+ index_to_nfas = Hash.new
309
+ candidate_match_start_positions.each do |nfa_with_dead_end, indices|
310
+ indices.each do |index|
311
+ nfas = index_to_nfas[index] ||= Array.new
312
+ nfas << nfa_with_dead_end
313
+ end
314
+ end
315
+ index_to_nfas
316
+ end
317
+
318
+ def add_match(nfa, match)
319
+ matches = matches_for(nfa)
320
+ matches << match
321
+ end
322
+ end
323
+ end
@@ -0,0 +1,9 @@
1
+
2
+ module Kleene
3
+ class Parser
4
+ def parse(pattern)
5
+ ast = Regexp::Parser.parse(pattern)
6
+ ast
7
+ end
8
+ end
9
+ end
@@ -21,3 +21,9 @@ module Enumerable
21
21
 
22
22
  alias_method :includes?, :include?
23
23
  end
24
+
25
+ class String
26
+ def scan_matches(pattern) # : Array(MatchData)
27
+ to_enum(:scan, pattern).map { Regexp.last_match }
28
+ end
29
+ end
@@ -1,3 +1,3 @@
1
1
  module Kleene
2
- VERSION = "0.5.0"
2
+ VERSION = "0.7.0"
3
3
  end
data/lib/kleene.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "active_support"
4
4
  require "active_support/core_ext"
5
+ require "regexp_parser"
5
6
  require_relative "kleene/version"
6
7
  require_relative "kleene/patches"
7
8
  require_relative "kleene/kleene"
@@ -9,7 +10,9 @@ require_relative "kleene/dsl"
9
10
  require_relative "kleene/nfa"
10
11
  require_relative "kleene/dfa"
11
12
  require_relative "kleene/multi_match_dfa"
12
-
13
+ require_relative "kleene/online_dfa"
14
+ require_relative "kleene/naive_online_regex"
15
+ require_relative "kleene/parser"
13
16
 
14
17
  module Kleene
15
18
  class Error < StandardError; end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kleene
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Ellis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-11-04 00:00:00.000000000 Z
11
+ date: 2023-11-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '7.1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: regexp_parser
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '2.8'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '2.8'
27
41
  description: kleene is a library for building regular expression recognition automata
28
42
  - nfas, dfas, and some specialty structures.
29
43
  email:
@@ -45,7 +59,10 @@ files:
45
59
  - lib/kleene/dsl.rb
46
60
  - lib/kleene/kleene.rb
47
61
  - lib/kleene/multi_match_dfa.rb
62
+ - lib/kleene/naive_online_regex.rb
48
63
  - lib/kleene/nfa.rb
64
+ - lib/kleene/online_dfa.rb
65
+ - lib/kleene/parser.rb
49
66
  - lib/kleene/patches.rb
50
67
  - lib/kleene/version.rb
51
68
  homepage: https://github.com/davidkellis/kleene-rb