kleene 0.5.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7df94943f1746064025af64823d93819d96d04d5f6526789451dcdac643f54df
4
- data.tar.gz: 6590239e59b38ed2155f4329070ee763904dabc7535205bc5f1e23b780be48a0
3
+ metadata.gz: 674bbda22ddfbc6c4ec1624de621b96c24576cbe8aa656d228697e1d98549cdb
4
+ data.tar.gz: ddca6b95201b21359dd23c5d6b4d9591561e9045764ce90b470b6add1c4518b8
5
5
  SHA512:
6
- metadata.gz: 276cec78e2550ff2d7b95f96d3e98c55b6a43ace55d14300ae1406cd292dd5b709eb4074f1c2cac4ac806125a8296172aaa57ee37cece7a4445cb0069629c5ea
7
- data.tar.gz: 961ac38a53dbbf716d4cc34c07f26eb3cd36bb9dd0bcd65485f3c2d9cc0b667eb3d297a8d91e19507e6b7ec59180c92fca4574c10df57a4588e337f9012f8e19
6
+ metadata.gz: 9392d0b56aa48b8cef4f0337be625d6160d49c3fb0214b034f379a505ee3a142347e8bb5e62a82ad0cb982bb3ca00ad6f9b12f951e00cf48b0447a1b6fb78320
7
+ data.tar.gz: 1521365696f470bc249dac8aa77038bc9121ff60499dc9368aaf86224e64af14a06e092aef191bc9571d62d2dd63567a7051f4b07490e4f377eac8d24de83025
data/Gemfile.lock CHANGED
@@ -1,8 +1,9 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kleene (0.1.0)
4
+ kleene (0.6.0)
5
5
  activesupport (~> 7.1)
6
+ regexp_parser (~> 2.8)
6
7
 
7
8
  GEM
8
9
  remote: https://rubygems.org/
@@ -45,7 +46,7 @@ GEM
45
46
  parser (3.2.2.4)
46
47
  ast (~> 2.4.1)
47
48
  racc
48
- racc (1.7.2)
49
+ racc (1.7.3)
49
50
  rainbow (3.1.1)
50
51
  rake (13.1.0)
51
52
  rbs (2.8.4)
data/build.ops CHANGED
@@ -7,17 +7,25 @@ imports:
7
7
  ...
8
8
 
9
9
  # when you run this script, it should do something like:
10
- # ~/sync/projects/kleene-rb
11
- # ❯ ops run build.ops version:1.0.0
12
- # Write version.rb for version 1.0.0
13
- # [localhost] Build gem: gem build opswalrus.gemspec
14
- # [localhost] Check whether Bitwarden is locked or not: bw status
15
- # [localhost] Get Rubygems OTP: bw get totp Rubygems
16
- # [localhost] Push gem: gem push opswalrus-1.0.0.gem
17
- # [localhost] Build docker image: docker build -t opswalrus/ops:1.0.0 .
18
-
19
- # ~/sync/projects/ops/opswalrus on  main via 💎 v3.2.2 took 44s
20
-
10
+ # ~/sync/projects/kleene-rb on  master via 💎 v3.2.2
11
+ # ❯ ops run build.ops version:0.5.0
12
+ # Write version.rb for version 0.5.0
13
+ # Writing template literal to ./lib/kleene/version.rb
14
+ # localhost | Build gem
15
+ # Finished in 0.181094124 seconds with exit status 0 (success)
16
+ # ********************************************************************************
17
+ # localhost | Commit Gemfile.lock and version.rb and git push changes
18
+ # Finished in 0.798496926 seconds with exit status 0 (success)
19
+ # ********************************************************************************
20
+ # localhost | Check whether Bitwarden is locked or not
21
+ # Finished in 0.008580059 seconds with exit status 0 (success)
22
+ # ********************************************************************************
23
+ # localhost | Get Rubygems OTP
24
+ # Finished in 0.378203313 seconds with exit status 0 (success)
25
+ # ********************************************************************************
26
+ # localhost | Push gem
27
+ # Finished in 4.095049625 seconds with exit status 0 (success)
28
+ # ********************************************************************************
21
29
 
22
30
  version = params.version
23
31
 
data/kleene.gemspec CHANGED
@@ -33,6 +33,7 @@ Gem::Specification.new do |spec|
33
33
 
34
34
  # Uncomment to register a new dependency of your gem
35
35
  spec.add_dependency "activesupport", "~> 7.1"
36
+ spec.add_dependency "regexp_parser", "~> 2.8"
36
37
 
37
38
  # For more information and examples about making a new gem, check out our
38
39
  # guide at: https://bundler.io/guides/creating_gem.html
@@ -77,18 +77,10 @@ module Kleene
77
77
 
78
78
  nfa
79
79
  end
80
+ end
80
81
 
81
- def match_tracker(input) # : MatchTracker
82
- dfa = @composite_dfa.deep_clone
83
- match_tracker = setup_callbacks(dfa)
84
-
85
- input.each_char.with_index do |char, index|
86
- dfa.handle_token!(char, index)
87
- end
88
-
89
- match_tracker
90
- end
91
-
82
+ class BatchMultiMatchDFA < MultiMatchDFA
83
+ # #matches(input) is the batch-style matching interface
92
84
  def matches(input) # : Hash(NFA, Array(MatchRef))
93
85
  mt = match_tracker(input)
94
86
 
@@ -131,8 +123,19 @@ module Kleene
131
123
  mt.matches
132
124
  end
133
125
 
126
+ def match_tracker(input) # : BatchMatchTracker
127
+ dfa = @composite_dfa.deep_clone
128
+ match_tracker = setup_callbacks(dfa)
129
+
130
+ input.each_char.with_index do |char, index|
131
+ dfa.handle_token!(char, index)
132
+ end
133
+
134
+ match_tracker
135
+ end
136
+
134
137
  def setup_callbacks(dfa)
135
- match_tracker = MatchTracker.new
138
+ match_tracker = BatchMatchTracker.new
136
139
 
137
140
  # 1. identify DFA states that correspond to successful match of first character of the NFAs
138
141
  epsilon_closure_of_nfa_start_state = composite_nfa.epsilon_closure(composite_nfa.start_state)
@@ -222,10 +225,9 @@ module Kleene
222
225
 
223
226
  match_tracker
224
227
  end
225
-
226
228
  end
227
229
 
228
- class MatchTracker
230
+ class BatchMatchTracker
229
231
  # The NFA keys in the following two structures are not the original NFAs supplied to the MultiMatchDFA.
230
232
  # They are the original NFAs that have been augmented with a dead end error state, so the keys are objects that
231
233
  # are the internal state of a MultiMatchDFA
@@ -249,6 +251,10 @@ module Kleene
249
251
  attr_accessor :matches # : Hash(NFA, Array(MatchRef)) # NFA -> Array(MatchRef)
250
252
 
251
253
  def initialize
254
+ reset
255
+ end
256
+
257
+ def reset
252
258
  @candidate_match_start_positions = Hash.new
253
259
  @match_end_positions = Hash.new
254
260
  @empty_matches = Hash.new
@@ -0,0 +1,63 @@
1
+ require "set"
2
+ require "stringio"
3
+ require_relative "./kleene"
4
+
5
+ module Kleene
6
+ class NaiveOnlineRegex
7
+ def initialize(regexen, window_size = 100)
8
+ @regexen = regexen
9
+ @window_size = window_size
10
+
11
+ reset
12
+ end
13
+
14
+ def reset
15
+ @buffer = ""
16
+ @matches_per_regex = Hash.new # Hash(Regexp, Set(MatchData))
17
+ end
18
+
19
+ # #ingest(input) is the online-style matching interface
20
+ def ingest(input, debug = false) # : Set(OnlineMatch)
21
+ @buffer << input
22
+ new_online_matches = Set.new
23
+ @regexen.each do |regex|
24
+ existing_matches_for_regex = (@matches_per_regex[regex] ||= Set.new)
25
+ scan_matches = @buffer.scan_matches(regex).to_set
26
+ new_matches = scan_matches - existing_matches_for_regex # new_matches : Set(MatchData)
27
+ existing_matches_for_regex.merge(new_matches)
28
+ new_online_matches.merge(new_matches.map {|match_data| OnlineMatch.new(regex, match_data) })
29
+ end
30
+ resize_buffer!
31
+ new_online_matches
32
+ end
33
+
34
+ def matches # Hash(Regexp, Set(MatchData))
35
+ @matches_per_regex
36
+ end
37
+
38
+ def matches_for(regex) # Set(MatchData) | Nil
39
+ @matches_per_regex[regex]
40
+ end
41
+
42
+ def resize_buffer!
43
+ if @buffer.size > @window_size
44
+ @buffer = @buffer[-@window_size..-1]
45
+ end
46
+ end
47
+ end
48
+
49
+ # A {Regexp, MatchData} pair
50
+ class OnlineMatch
51
+ attr_reader :regex # Regexp
52
+ attr_reader :match # MatchData
53
+ def initialize(regex, match)
54
+ @regex, @match = regex, match
55
+ end
56
+ def to_a
57
+ @match.to_a
58
+ end
59
+ def to_h
60
+ {@regex => to_a}
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,323 @@
1
+ require "stringio"
2
+ require_relative "./kleene"
3
+
4
+ module Kleene
5
+ class MachineTuple
6
+ attr_accessor :nfa # : NFA
7
+ attr_accessor :nfa_with_dead_err # : NFA
8
+ attr_accessor :dfa # : DFA
9
+
10
+ def initialize(nfa, nfa_with_dead_err, dfa)
11
+ @nfa, @nfa_with_dead_err, @dfa = nfa, nfa_with_dead_err, dfa
12
+ end
13
+ end
14
+
15
+ class OnlineDFA
16
+ include DSL
17
+
18
+ # @original_nfas : Array(NFA)
19
+ attr_reader :nfas_with_err_state # : Array(NFA)
20
+ attr_accessor :dead_end_nfa_state_to_dead_end_nfa # : Hash(State, NFA)
21
+ attr_accessor :composite_nfa # : NFA
22
+ attr_accessor :composite_dfa # : DFA
23
+
24
+ attr_accessor :machines_by_index # : Hash(Int32, MachineTuple)
25
+ attr_accessor :nfa_to_index # : Hash(NFA, Int32)
26
+ attr_accessor :nfa_with_dead_err_to_index # : Hash(NFA, Int32)
27
+ attr_accessor :dfa_to_index # : Hash(DFA, Int32)
28
+
29
+ def initialize(nfas)
30
+ composite_alphabet = nfas.reduce(Set.new) {|memo, nfa| memo | nfa.alphabet }
31
+
32
+ @original_nfas = nfas
33
+ @nfas_with_err_state = nfas.map {|nfa| with_err_dead_end(nfa, composite_alphabet) } # copy NFAs and add dead-end error states to each of them
34
+ dfas = @original_nfas.map(&:to_dfa)
35
+
36
+ @nfa_to_index = @original_nfas.map.with_index {|nfa, index| [nfa, index] }.to_h
37
+ @nfa_with_dead_err_to_index = @nfas_with_err_state.map.with_index {|nfa, index| [nfa, index] }.to_h
38
+ @dfa_to_index = dfas.map.with_index {|dfa, index| [dfa, index] }.to_h
39
+ @machines_by_index = @original_nfas.zip(nfas_with_err_state, dfas).map.with_index {|tuple, index| nfa, nfa_with_dead_err, dfa = tuple; [index, MachineTuple.new(nfa, nfa_with_dead_err, dfa)] }.to_h
40
+
41
+ # build a mapping of (state -> nfa) pairs that capture which nfa owns each state
42
+ @dead_end_nfa_state_to_dead_end_nfa = Hash.new
43
+ @nfas_with_err_state.each do |nfa_with_dead_err|
44
+ nfa_with_dead_err.states.each do |state|
45
+ @dead_end_nfa_state_to_dead_end_nfa[state] = nfa_with_dead_err
46
+ end
47
+ end
48
+
49
+ # create a composite NFA as the union of all the NFAs with epsilon transitions from every NFA state back to the union NFA's start state
50
+ @composite_nfa = create_composite_nfa(@nfas_with_err_state)
51
+ @composite_dfa = @composite_nfa.to_dfa
52
+
53
+ reset
54
+ end
55
+
56
+ def machines_from_nfa(nfa) # : MachineTuple
57
+ machines_by_index[nfa_to_index[nfa]]
58
+ end
59
+
60
+ def machines_from_nfa_with_dead_err(nfa_with_dead_err) # : MachineTuple
61
+ machines_by_index[nfa_with_dead_err_to_index[nfa_with_dead_err]]
62
+ end
63
+
64
+ def machines_from_dfa(dfa) # : MachineTuple
65
+ machines_by_index[dfa_to_index[dfa]]
66
+ end
67
+
68
+ # create a composite NFA as the union of all the NFAs with epsilon transitions from every NFA state back to the union NFA's start state
69
+ def create_composite_nfa(nfas)
70
+ nfa = union!(nfas)
71
+
72
+ # add epsilon transitions from all the states except the start state back to the start state
73
+ nfa.states.each do |state|
74
+ if state != nfa.start_state
75
+ nfa.add_transition(NFATransition::Epsilon, state, nfa.start_state)
76
+ end
77
+ end
78
+
79
+ nfa.update_final_states
80
+
81
+ nfa
82
+ end
83
+
84
+ def reset # : OnlineMatchTracker
85
+ @active_composite_dfa = @composite_dfa.deep_clone
86
+ @active_candidate_dfas = []
87
+ @match_tracker = setup_callbacks(@active_composite_dfa)
88
+ @buffer = ""
89
+ end
90
+
91
+ # #ingest(input) is the online-style matching interface
92
+ def ingest(input, debug = false) # : Hash(NFA, Array(MatchRef))
93
+ mt = @match_tracker
94
+
95
+ start_index_of_input_fragment_in_buffer = @buffer.length
96
+
97
+ input.each_char.with_index do |char, index|
98
+ @active_composite_dfa.handle_token!(char, start_index_of_input_fragment_in_buffer + index)
99
+ end
100
+
101
+ @buffer << input
102
+
103
+ start_index_to_nfas_that_may_match = mt.invert_candidate_match_start_positions
104
+
105
+ mt.empty_matches.each do |nfa_with_dead_err, indices|
106
+ original_nfa = machines_from_nfa_with_dead_err(nfa_with_dead_err).nfa
107
+ indices.select {|index| index >= start_index_of_input_fragment_in_buffer }.each do |index|
108
+ mt.add_match(original_nfa, MatchRef.new(@buffer, index...index))
109
+ end
110
+ end
111
+
112
+ input.each_char.with_index do |char, index|
113
+ index_in_buffer = start_index_of_input_fragment_in_buffer + index
114
+
115
+ @active_candidate_dfas.reject! do |active_dfa_tuple|
116
+ dfa_clone, original_nfa, start_of_match_index = active_dfa_tuple
117
+
118
+ dfa_clone.handle_token!(char, index_in_buffer)
119
+ mt.add_match(original_nfa, MatchRef.new(@buffer, start_of_match_index..index_in_buffer)) if dfa_clone.accept?
120
+
121
+ dfa_clone.error?
122
+ end
123
+
124
+ if nfas_with_dead_err = start_index_to_nfas_that_may_match[index_in_buffer]
125
+ nfas_with_dead_err.each do |nfa_with_dead_err|
126
+ machines = machines_from_nfa_with_dead_err(nfa_with_dead_err)
127
+ original_nfa = machines.nfa
128
+ dfa = machines.dfa
129
+ dfa_clone = dfa.shallow_clone
130
+
131
+ dfa_clone.handle_token!(char, index_in_buffer)
132
+ mt.add_match(original_nfa, MatchRef.new(@buffer, index_in_buffer..index_in_buffer)) if dfa_clone.accept?
133
+
134
+ @active_candidate_dfas << [dfa_clone, original_nfa, index_in_buffer] unless dfa_clone.error?
135
+ end
136
+ end
137
+ end
138
+
139
+ matches
140
+ end
141
+
142
+ def matches
143
+ @match_tracker.matches
144
+ end
145
+
146
+ def setup_callbacks(dfa)
147
+ match_tracker = OnlineMatchTracker.new
148
+
149
+ # 1. identify DFA states that correspond to successful match of first character of the NFAs
150
+ epsilon_closure_of_nfa_start_state = composite_nfa.epsilon_closure(composite_nfa.start_state)
151
+ nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa = composite_nfa.transitions_from(epsilon_closure_of_nfa_start_state).
152
+ reject {|transition| transition.epsilon? || transition.to.error? }.
153
+ map(&:to).to_set
154
+ dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa = nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.
155
+ compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }.
156
+ reduce(Set.new) {|memo, state_set| memo | state_set }
157
+ dfa_state_to_dead_end_nfas_that_have_matched_their_first_character = Hash.new
158
+ dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.each do |dfa_state|
159
+ dfa_state_to_dead_end_nfas_that_have_matched_their_first_character[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state].
160
+ select {|nfa_state| nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.includes?(nfa_state) }.
161
+ compact_map do |nfa_state|
162
+ dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map
163
+ end.to_set
164
+ end
165
+
166
+ # 2. identify DFA states that correspond to final states in the NFAs
167
+ nfa_final_states = @nfas_with_err_state.map(&:final_states).reduce(Set.new) {|memo, state_set| memo | state_set }
168
+ dfa_states_that_correspond_to_nfa_final_states = nfa_final_states.compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }.
169
+ reduce(Set.new) {|memo, state_set| memo | state_set }
170
+ dead_end_nfas_that_have_transitioned_to_final_state = Hash.new
171
+ dfa_states_that_correspond_to_nfa_final_states.each do |dfa_state|
172
+ dead_end_nfas_that_have_transitioned_to_final_state[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state].
173
+ select {|nfa_state| nfa_final_states.includes?(nfa_state) }.
174
+ compact_map do |nfa_state|
175
+ dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map
176
+ end.to_set
177
+ end
178
+
179
+ # 3. Identify DFA states that correspond to successful match without even having seen any characters.
180
+ # These are cases where the NFA's start state is a final state or can reach a final state by following only epsilon transitions.
181
+ nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state = epsilon_closure_of_nfa_start_state.select(&:final?).to_set
182
+ dfa_states_that_represent_both_start_states_and_final_states = nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state.
183
+ compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }.
184
+ reduce(Set.new) {|memo, state_set| memo | state_set }
185
+ dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters = Hash.new
186
+ dfa_states_that_represent_both_start_states_and_final_states.each do |dfa_state|
187
+ dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state].
188
+ select {|nfa_state| nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state.includes?(nfa_state) }.
189
+ compact_map do |nfa_state|
190
+ dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map
191
+ end.to_set
192
+ end
193
+
194
+ # set up call transition call backs, since the callbacks may only be defined once per state and transition
195
+ # For (1):
196
+ # Set up transition callbacks to push the index position of the start of a match of each NFA that has begun
197
+ # to be matched on the transition to one of the states in (1)
198
+ # For (2):
199
+ # set up transition callbacks to push the index position of the end of a successful match onto the list
200
+ # of successful matches for the NFA that matched
201
+ # For (3):
202
+ # set up transision callbacks to capture successful empty matches
203
+ destination_dfa_states_for_callbacks = dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa | dfa_states_that_correspond_to_nfa_final_states
204
+ destination_dfa_states_for_callbacks.each do |dfa_state|
205
+ dfa.on_transition_to(dfa_state) do |transition, token, token_index|
206
+ destination_dfa_state = transition.to
207
+
208
+ should_track_empty_match = dfa_states_that_represent_both_start_states_and_final_states.includes?(destination_dfa_state)
209
+ should_track_start_of_candidate_match = should_track_empty_match || dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.includes?(destination_dfa_state)
210
+ should_track_end_of_match = dfa_states_that_correspond_to_nfa_final_states.includes?(destination_dfa_state)
211
+
212
+ if should_track_empty_match
213
+ dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[destination_dfa_state].each do |nfa_with_dead_end|
214
+ match_tracker.add_empty_match(nfa_with_dead_end, token_index)
215
+ end
216
+ end
217
+
218
+ if should_track_start_of_candidate_match
219
+ nfas_that_matched_first_character = dfa_state_to_dead_end_nfas_that_have_matched_their_first_character[destination_dfa_state] || Set.new
220
+ nfas_that_matched_empty_match = dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[destination_dfa_state] || Set.new
221
+ dead_end_nfas_that_are_starting_to_match = nfas_that_matched_first_character | nfas_that_matched_empty_match
222
+ dead_end_nfas_that_are_starting_to_match.each do |nfa_with_dead_end|
223
+ match_tracker.add_start_of_candidate_match(nfa_with_dead_end, token_index)
224
+ end
225
+ end
226
+
227
+ if should_track_end_of_match
228
+ dead_end_nfas_that_have_transitioned_to_final_state[destination_dfa_state].each do |nfa_with_dead_end|
229
+ match_tracker.add_end_of_match(nfa_with_dead_end, token_index)
230
+ end
231
+ end
232
+ end
233
+ end
234
+
235
+ match_tracker
236
+ end
237
+ end
238
+
239
+ class OnlineMatchTracker
240
+ # The NFA keys in the following two structures are not the original NFAs supplied to the MultiMatchDFA.
241
+ # They are the original NFAs that have been augmented with a dead end error state, so the keys are objects that
242
+ # are the internal state of a MultiMatchDFA
243
+ attr_accessor :candidate_match_start_positions # : Hash(NFA, Array(Int32)) # NFA -> Array(IndexPositionOfStartOfMatch)
244
+ # The end positions are indices at which, after handling the character, the DFA was observed to be in a match/accept state;
245
+ # however, the interpretation is ambiguous, because the accepting state may be as a result of (1) transitioning to an error state that is also marked final/accepting,
246
+ # OR it may be as a result of transitioning to (2) a non-error final state.
247
+ # In the case of (1), the match may be an empty match, where after transitioning to an error state, the DFA is in a state that
248
+ # is equivalent to the error state and start state and final state (e.g. as in an optional or kleene star DFA),
249
+ # while in the case of (2), the match may be a "normal" match.
250
+ # The ambiguity is problematic because it isn't clear whether the index position of the match is end inclusive end of a match
251
+ # or the beginning of an empty match.
252
+ # This ambiguity is all due to the construction of the composite DFA in the MultiMatchDFA - the dead end error states are epsilon-transitioned
253
+ # to the composite DFA's start state.
254
+ attr_accessor :match_end_positions # : Hash(NFA, Array(Int32)) # NFA -> Array(IndexPositionOfEndOfMatch)
255
+ attr_accessor :empty_matches # : Hash(NFA, Array(Int32)) # NFA -> Array(IndexPositionOfEmptyMatch)
256
+
257
+ # The NFA keys in the following structure are the original NFAs supplied to the MultiMatchDFA.
258
+ # This is in contrast to the augmented NFAs that are used as keys in the candidate_match_start_positions and
259
+ # match_end_positions structures, documented above ^^^.
260
+ attr_accessor :matches # : Hash(NFA, Array(MatchRef)) # NFA -> Array(MatchRef)
261
+
262
+ def initialize
263
+ reset
264
+ end
265
+
266
+ def reset
267
+ @candidate_match_start_positions = Hash.new
268
+ @match_end_positions = Hash.new
269
+ @empty_matches = Hash.new
270
+ @matches = Hash.new
271
+ end
272
+
273
+ def start_positions(nfa)
274
+ candidate_match_start_positions[nfa] ||= Array.new
275
+ end
276
+
277
+ def end_positions(nfa)
278
+ match_end_positions[nfa] ||= Array.new
279
+ end
280
+
281
+ def empty_match_positions(nfa)
282
+ empty_matches[nfa] ||= Array.new
283
+ end
284
+
285
+ def matches_for(nfa)
286
+ matches[nfa] ||= Array.new
287
+ end
288
+
289
+ def add_start_of_candidate_match(nfa_with_dead_end, token_index)
290
+ # puts "add_start_of_candidate_match(#{nfa.object_id}, #{token_index})"
291
+ positions = start_positions(nfa_with_dead_end)
292
+ positions << token_index
293
+ end
294
+
295
+ # the end positions are inclusive of the index of the last character matched, so empty matches are not accounted for in the match_end_positions array
296
+ def add_end_of_match(nfa_with_dead_end, token_index)
297
+ # puts "add_end_of_match(#{nfa.object_id}, #{token_index})"
298
+ positions = end_positions(nfa_with_dead_end)
299
+ positions << token_index
300
+ end
301
+
302
+ def add_empty_match(nfa_with_dead_end, token_index)
303
+ positions = empty_match_positions(nfa_with_dead_end)
304
+ positions << token_index
305
+ end
306
+
307
+ def invert_candidate_match_start_positions # : Hash(Int32, Array(NFA))
308
+ index_to_nfas = Hash.new
309
+ candidate_match_start_positions.each do |nfa_with_dead_end, indices|
310
+ indices.each do |index|
311
+ nfas = index_to_nfas[index] ||= Array.new
312
+ nfas << nfa_with_dead_end
313
+ end
314
+ end
315
+ index_to_nfas
316
+ end
317
+
318
+ def add_match(nfa, match)
319
+ matches = matches_for(nfa)
320
+ matches << match
321
+ end
322
+ end
323
+ end
@@ -0,0 +1,9 @@
1
+
2
+ module Kleene
3
+ class Parser
4
+ def parse(pattern)
5
+ ast = Regexp::Parser.parse(pattern)
6
+ ast
7
+ end
8
+ end
9
+ end
@@ -21,3 +21,9 @@ module Enumerable
21
21
 
22
22
  alias_method :includes?, :include?
23
23
  end
24
+
25
+ class String
26
+ def scan_matches(pattern) # : Array(MatchData)
27
+ to_enum(:scan, pattern).map { Regexp.last_match }
28
+ end
29
+ end
@@ -1,3 +1,3 @@
1
1
  module Kleene
2
- VERSION = "0.5.0"
2
+ VERSION = "0.7.0"
3
3
  end
data/lib/kleene.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "active_support"
4
4
  require "active_support/core_ext"
5
+ require "regexp_parser"
5
6
  require_relative "kleene/version"
6
7
  require_relative "kleene/patches"
7
8
  require_relative "kleene/kleene"
@@ -9,7 +10,9 @@ require_relative "kleene/dsl"
9
10
  require_relative "kleene/nfa"
10
11
  require_relative "kleene/dfa"
11
12
  require_relative "kleene/multi_match_dfa"
12
-
13
+ require_relative "kleene/online_dfa"
14
+ require_relative "kleene/naive_online_regex"
15
+ require_relative "kleene/parser"
13
16
 
14
17
  module Kleene
15
18
  class Error < StandardError; end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kleene
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Ellis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-11-04 00:00:00.000000000 Z
11
+ date: 2023-11-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '7.1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: regexp_parser
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '2.8'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '2.8'
27
41
  description: kleene is a library for building regular expression recognition automata
28
42
  - nfas, dfas, and some specialty structures.
29
43
  email:
@@ -45,7 +59,10 @@ files:
45
59
  - lib/kleene/dsl.rb
46
60
  - lib/kleene/kleene.rb
47
61
  - lib/kleene/multi_match_dfa.rb
62
+ - lib/kleene/naive_online_regex.rb
48
63
  - lib/kleene/nfa.rb
64
+ - lib/kleene/online_dfa.rb
65
+ - lib/kleene/parser.rb
49
66
  - lib/kleene/patches.rb
50
67
  - lib/kleene/version.rb
51
68
  homepage: https://github.com/davidkellis/kleene-rb