hoozuki 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -1
- data/README.md +18 -5
- data/Rakefile +12 -2
- data/lib/hoozuki/automaton/dfa/builder.rb +79 -0
- data/lib/hoozuki/automaton/dfa.rb +4 -41
- data/lib/hoozuki/automaton/nfa.rb +29 -108
- data/lib/hoozuki/automaton/state_id.rb +2 -1
- data/lib/hoozuki/instruction/char.rb +13 -0
- data/lib/hoozuki/instruction/jmp.rb +13 -0
- data/lib/hoozuki/instruction/match.rb +8 -0
- data/lib/hoozuki/instruction/split.rb +14 -0
- data/lib/hoozuki/instruction.rb +6 -0
- data/lib/hoozuki/node/choice.rb +13 -1
- data/lib/hoozuki/node/concatenation.rb +16 -1
- data/lib/hoozuki/node/epsilon.rb +8 -1
- data/lib/hoozuki/node/literal.rb +9 -1
- data/lib/hoozuki/node/repetition.rb +55 -1
- data/lib/hoozuki/parser.rb +888 -76
- data/lib/hoozuki/parser.y +128 -0
- data/lib/hoozuki/version.rb +2 -2
- data/lib/hoozuki/vm/compiler.rb +129 -0
- data/lib/hoozuki/vm/evaluator.rb +39 -0
- data/lib/hoozuki/vm.rb +4 -0
- data/lib/hoozuki.rb +22 -15
- data/spec/hoozuki/automaton/dfa/builder_spec.rb +79 -0
- data/spec/hoozuki/automaton/dfa_spec.rb +149 -0
- data/spec/hoozuki/automaton/nfa_spec.rb +168 -0
- data/spec/hoozuki/instruction_spec.rb +88 -0
- data/spec/hoozuki/node_spec.rb +110 -0
- data/spec/hoozuki/parser_spec.rb +168 -0
- data/spec/hoozuki/vm/compiler_spec.rb +219 -0
- data/spec/hoozuki/vm/evaluator_spec.rb +260 -0
- data/spec/hoozuki_spec.rb +186 -2
- metadata +20 -2
data/spec/hoozuki_spec.rb
CHANGED
|
@@ -1,24 +1,27 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
RSpec.describe Hoozuki do
|
|
4
|
-
|
|
5
|
-
subject { described_class.
|
|
4
|
+
shared_examples 'regex matching behavior' do |mode|
|
|
5
|
+
subject { described_class.match?(pattern, value, engine: mode) }
|
|
6
6
|
|
|
7
7
|
context 'with basic concatenation' do
|
|
8
8
|
let(:pattern) { 'abc' }
|
|
9
9
|
|
|
10
10
|
context 'when text is "abc"' do
|
|
11
11
|
let(:value) { 'abc' }
|
|
12
|
+
|
|
12
13
|
it { is_expected.to be true }
|
|
13
14
|
end
|
|
14
15
|
|
|
15
16
|
context 'when text is "ab"' do
|
|
16
17
|
let(:value) { 'ab' }
|
|
18
|
+
|
|
17
19
|
it { is_expected.to be false }
|
|
18
20
|
end
|
|
19
21
|
|
|
20
22
|
context 'when text is "abcd"' do
|
|
21
23
|
let(:value) { 'abcd' }
|
|
24
|
+
|
|
22
25
|
it { is_expected.to be false }
|
|
23
26
|
end
|
|
24
27
|
end
|
|
@@ -28,16 +31,53 @@ RSpec.describe Hoozuki do
|
|
|
28
31
|
|
|
29
32
|
context 'when text is "a"' do
|
|
30
33
|
let(:value) { 'a' }
|
|
34
|
+
|
|
31
35
|
it { is_expected.to be true }
|
|
32
36
|
end
|
|
33
37
|
|
|
34
38
|
context 'when text is "b"' do
|
|
35
39
|
let(:value) { 'b' }
|
|
40
|
+
|
|
36
41
|
it { is_expected.to be true }
|
|
37
42
|
end
|
|
38
43
|
|
|
39
44
|
context 'when text is "ab"' do
|
|
40
45
|
let(:value) { 'ab' }
|
|
46
|
+
|
|
47
|
+
it { is_expected.to be false }
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
context 'with multiple alternations' do
|
|
52
|
+
let(:pattern) { 'a|b|c|d' }
|
|
53
|
+
|
|
54
|
+
context 'when text is "a"' do
|
|
55
|
+
let(:value) { 'a' }
|
|
56
|
+
|
|
57
|
+
it { is_expected.to be true }
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
context 'when text is "b"' do
|
|
61
|
+
let(:value) { 'b' }
|
|
62
|
+
|
|
63
|
+
it { is_expected.to be true }
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
context 'when text is "c"' do
|
|
67
|
+
let(:value) { 'c' }
|
|
68
|
+
|
|
69
|
+
it { is_expected.to be true }
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
context 'when text is "d"' do
|
|
73
|
+
let(:value) { 'd' }
|
|
74
|
+
|
|
75
|
+
it { is_expected.to be true }
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
context 'when text is "e"' do
|
|
79
|
+
let(:value) { 'e' }
|
|
80
|
+
|
|
41
81
|
it { is_expected.to be false }
|
|
42
82
|
end
|
|
43
83
|
end
|
|
@@ -48,21 +88,25 @@ RSpec.describe Hoozuki do
|
|
|
48
88
|
|
|
49
89
|
context 'when text is "" (empty)' do
|
|
50
90
|
let(:value) { '' }
|
|
91
|
+
|
|
51
92
|
it { is_expected.to be true }
|
|
52
93
|
end
|
|
53
94
|
|
|
54
95
|
context 'when text is "b"' do
|
|
55
96
|
let(:value) { 'b' }
|
|
97
|
+
|
|
56
98
|
it { is_expected.to be true }
|
|
57
99
|
end
|
|
58
100
|
|
|
59
101
|
context 'when text is "bbb"' do
|
|
60
102
|
let(:value) { 'bbb' }
|
|
103
|
+
|
|
61
104
|
it { is_expected.to be true }
|
|
62
105
|
end
|
|
63
106
|
|
|
64
107
|
context 'when text is "c"' do
|
|
65
108
|
let(:value) { 'c' }
|
|
109
|
+
|
|
66
110
|
it { is_expected.to be false }
|
|
67
111
|
end
|
|
68
112
|
end
|
|
@@ -72,16 +116,19 @@ RSpec.describe Hoozuki do
|
|
|
72
116
|
|
|
73
117
|
context 'when text is "a"' do
|
|
74
118
|
let(:value) { 'a' }
|
|
119
|
+
|
|
75
120
|
it { is_expected.to be true }
|
|
76
121
|
end
|
|
77
122
|
|
|
78
123
|
context 'when text is "aaa"' do
|
|
79
124
|
let(:value) { 'aaa' }
|
|
125
|
+
|
|
80
126
|
it { is_expected.to be true }
|
|
81
127
|
end
|
|
82
128
|
|
|
83
129
|
context 'when text is "" (empty)' do
|
|
84
130
|
let(:value) { '' }
|
|
131
|
+
|
|
85
132
|
it { is_expected.to be false }
|
|
86
133
|
end
|
|
87
134
|
end
|
|
@@ -91,16 +138,19 @@ RSpec.describe Hoozuki do
|
|
|
91
138
|
|
|
92
139
|
context 'when text is "" (empty)' do
|
|
93
140
|
let(:value) { '' }
|
|
141
|
+
|
|
94
142
|
it { is_expected.to be true }
|
|
95
143
|
end
|
|
96
144
|
|
|
97
145
|
context 'when text is "c"' do
|
|
98
146
|
let(:value) { 'c' }
|
|
147
|
+
|
|
99
148
|
it { is_expected.to be true }
|
|
100
149
|
end
|
|
101
150
|
|
|
102
151
|
context 'when text is "cc"' do
|
|
103
152
|
let(:value) { 'cc' }
|
|
153
|
+
|
|
104
154
|
it { is_expected.to be false }
|
|
105
155
|
end
|
|
106
156
|
end
|
|
@@ -111,16 +161,19 @@ RSpec.describe Hoozuki do
|
|
|
111
161
|
|
|
112
162
|
context 'when text is "abcd"' do
|
|
113
163
|
let(:value) { 'abcd' }
|
|
164
|
+
|
|
114
165
|
it { is_expected.to be true }
|
|
115
166
|
end
|
|
116
167
|
|
|
117
168
|
context 'when text is "ab"' do
|
|
118
169
|
let(:value) { 'ab' }
|
|
170
|
+
|
|
119
171
|
it { is_expected.to be true }
|
|
120
172
|
end
|
|
121
173
|
|
|
122
174
|
context 'when text is "abc"' do
|
|
123
175
|
let(:value) { 'abc' }
|
|
176
|
+
|
|
124
177
|
it { is_expected.to be false }
|
|
125
178
|
end
|
|
126
179
|
end
|
|
@@ -131,11 +184,13 @@ RSpec.describe Hoozuki do
|
|
|
131
184
|
|
|
132
185
|
context 'when text is "a|b*"' do
|
|
133
186
|
let(:value) { 'a|b*' }
|
|
187
|
+
|
|
134
188
|
it { is_expected.to be true }
|
|
135
189
|
end
|
|
136
190
|
|
|
137
191
|
context 'when text is "ab"' do
|
|
138
192
|
let(:value) { 'ab' }
|
|
193
|
+
|
|
139
194
|
it { is_expected.to be false }
|
|
140
195
|
end
|
|
141
196
|
end
|
|
@@ -145,11 +200,13 @@ RSpec.describe Hoozuki do
|
|
|
145
200
|
|
|
146
201
|
context 'when text is "(a+)"' do
|
|
147
202
|
let(:value) { '(a+)' }
|
|
203
|
+
|
|
148
204
|
it { is_expected.to be true }
|
|
149
205
|
end
|
|
150
206
|
|
|
151
207
|
context 'when text is "a"' do
|
|
152
208
|
let(:value) { 'a' }
|
|
209
|
+
|
|
153
210
|
it { is_expected.to be false }
|
|
154
211
|
end
|
|
155
212
|
end
|
|
@@ -160,26 +217,31 @@ RSpec.describe Hoozuki do
|
|
|
160
217
|
|
|
161
218
|
context 'when text is "a"' do
|
|
162
219
|
let(:value) { 'a' }
|
|
220
|
+
|
|
163
221
|
it { is_expected.to be true }
|
|
164
222
|
end
|
|
165
223
|
|
|
166
224
|
context 'when text is "bc"' do
|
|
167
225
|
let(:value) { 'bc' }
|
|
226
|
+
|
|
168
227
|
it { is_expected.to be true }
|
|
169
228
|
end
|
|
170
229
|
|
|
171
230
|
context 'when text is "cde"' do
|
|
172
231
|
let(:value) { 'cde' }
|
|
232
|
+
|
|
173
233
|
it { is_expected.to be true }
|
|
174
234
|
end
|
|
175
235
|
|
|
176
236
|
context 'when text is "bbbcde"' do
|
|
177
237
|
let(:value) { 'bbbcde' }
|
|
238
|
+
|
|
178
239
|
it { is_expected.to be true }
|
|
179
240
|
end
|
|
180
241
|
|
|
181
242
|
context 'when text is "bd"' do
|
|
182
243
|
let(:value) { 'bd' }
|
|
244
|
+
|
|
183
245
|
it { is_expected.to be false }
|
|
184
246
|
end
|
|
185
247
|
end
|
|
@@ -189,23 +251,145 @@ RSpec.describe Hoozuki do
|
|
|
189
251
|
|
|
190
252
|
context 'when text is "こんやっぴー"' do
|
|
191
253
|
let(:value) { 'こんやっぴー' }
|
|
254
|
+
|
|
192
255
|
it { is_expected.to be true }
|
|
193
256
|
end
|
|
194
257
|
|
|
195
258
|
context 'when text is "おつやっぴー"' do
|
|
196
259
|
let(:value) { 'おつやっぴー' }
|
|
260
|
+
|
|
197
261
|
it { is_expected.to be true }
|
|
198
262
|
end
|
|
199
263
|
|
|
200
264
|
context 'when text is "こんおつやっぴー"' do
|
|
201
265
|
let(:value) { 'こんおつやっぴー' }
|
|
266
|
+
|
|
202
267
|
it { is_expected.to be true }
|
|
203
268
|
end
|
|
204
269
|
|
|
205
270
|
context 'when text is "こんこんきーつね"' do
|
|
206
271
|
let(:value) { 'こんこんきーつね' }
|
|
272
|
+
|
|
207
273
|
it { is_expected.to be false }
|
|
208
274
|
end
|
|
209
275
|
end
|
|
210
276
|
end
|
|
277
|
+
|
|
278
|
+
describe '#match?' do
|
|
279
|
+
context 'with :vm mode' do
|
|
280
|
+
it_behaves_like 'regex matching behavior', :vm
|
|
281
|
+
|
|
282
|
+
context 'with VM-specific edge cases' do
|
|
283
|
+
it 'handles deeply nested structures' do
|
|
284
|
+
pattern = '((((a))))'
|
|
285
|
+
expect(described_class.match?(pattern, 'a', engine: :vm)).to be true
|
|
286
|
+
expect(described_class.match?(pattern, 'b', engine: :vm)).to be false
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
it 'handles multiple quantifiers in sequence' do
|
|
290
|
+
pattern = 'a*b+c?'
|
|
291
|
+
expect(described_class.match?(pattern, 'bc', engine: :vm)).to be true
|
|
292
|
+
expect(described_class.match?(pattern, 'abc', engine: :vm)).to be true
|
|
293
|
+
expect(described_class.match?(pattern, 'aaabbbbc', engine: :vm)).to be true
|
|
294
|
+
expect(described_class.match?(pattern, 'c', engine: :vm)).to be false
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
it 'handles empty alternations' do
|
|
298
|
+
pattern = 'a|'
|
|
299
|
+
expect(described_class.match?(pattern, 'a', engine: :vm)).to be true
|
|
300
|
+
expect(described_class.match?(pattern, '', engine: :vm)).to be true
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
it 'handles nested groups with quantifiers' do
|
|
304
|
+
pattern = '(ab)*'
|
|
305
|
+
expect(described_class.match?(pattern, '', engine: :vm)).to be true
|
|
306
|
+
expect(described_class.match?(pattern, 'ab', engine: :vm)).to be true
|
|
307
|
+
expect(described_class.match?(pattern, 'abab', engine: :vm)).to be true
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
context 'with :dfa mode' do
|
|
313
|
+
it_behaves_like 'regex matching behavior', :dfa
|
|
314
|
+
|
|
315
|
+
context 'with DFA-specific edge cases' do
|
|
316
|
+
it 'handles deeply nested structures' do
|
|
317
|
+
pattern = '((((a))))'
|
|
318
|
+
expect(described_class.match?(pattern, 'a', engine: :dfa)).to be true
|
|
319
|
+
expect(described_class.match?(pattern, 'b', engine: :dfa)).to be false
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
it 'handles multiple quantifiers in sequence' do
|
|
323
|
+
pattern = 'a*b+c?'
|
|
324
|
+
expect(described_class.match?(pattern, 'bc', engine: :dfa)).to be true
|
|
325
|
+
expect(described_class.match?(pattern, 'abc', engine: :dfa)).to be true
|
|
326
|
+
expect(described_class.match?(pattern, 'aaabbbbc', engine: :dfa)).to be true
|
|
327
|
+
expect(described_class.match?(pattern, 'c', engine: :dfa)).to be false
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
it 'handles empty alternations' do
|
|
331
|
+
pattern = 'a|'
|
|
332
|
+
expect(described_class.match?(pattern, 'a', engine: :dfa)).to be true
|
|
333
|
+
expect(described_class.match?(pattern, '', engine: :dfa)).to be true
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
it 'handles nested groups with quantifiers' do
|
|
337
|
+
pattern = '(ab)*'
|
|
338
|
+
expect(described_class.match?(pattern, '', engine: :dfa)).to be true
|
|
339
|
+
expect(described_class.match?(pattern, 'ab', engine: :dfa)).to be true
|
|
340
|
+
expect(described_class.match?(pattern, 'abab', engine: :dfa)).to be true
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
context 'with common edge cases' do
|
|
346
|
+
it 'handles single character patterns for both engines' do
|
|
347
|
+
[:vm, :dfa].each do |engine|
|
|
348
|
+
expect(described_class.match?('x', 'x', engine: engine)).to be true
|
|
349
|
+
expect(described_class.match?('x', 'y', engine: engine)).to be false
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
it 'handles very long patterns for both engines' do
|
|
354
|
+
pattern = 'a' * 100
|
|
355
|
+
input = 'a' * 100
|
|
356
|
+
|
|
357
|
+
[:vm, :dfa].each do |engine|
|
|
358
|
+
expect(described_class.match?(pattern, input, engine: engine)).to be true
|
|
359
|
+
end
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
it 'handles patterns with all quantifier types for both engines' do
|
|
363
|
+
pattern = 'a*b+c?d'
|
|
364
|
+
|
|
365
|
+
[:vm, :dfa].each do |engine|
|
|
366
|
+
expect(described_class.match?(pattern, 'bd', engine: engine)).to be true
|
|
367
|
+
expect(described_class.match?(pattern, 'bcd', engine: engine)).to be true
|
|
368
|
+
expect(described_class.match?(pattern, 'abcd', engine: engine)).to be true
|
|
369
|
+
expect(described_class.match?(pattern, 'aaabbbbcd', engine: engine)).to be true
|
|
370
|
+
end
|
|
371
|
+
end
|
|
372
|
+
end
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
describe '#compile' do
|
|
376
|
+
context 'with :vm engine' do
|
|
377
|
+
it 'returns instruction array' do
|
|
378
|
+
result = described_class.compile('a', engine: :vm)
|
|
379
|
+
expect(result).to be_an(Array)
|
|
380
|
+
expect(result).to all(be_a(Hoozuki::Instruction::Char).or(be_a(Hoozuki::Instruction::Match)))
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
it 'raises error for unknown engine' do
|
|
384
|
+
expect { described_class.compile('a', engine: :unknown) }.to raise_error(ArgumentError, 'Unknown engine: unknown')
|
|
385
|
+
end
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
context 'with :dfa engine' do
|
|
389
|
+
it 'returns DFA object' do
|
|
390
|
+
result = described_class.compile('a', engine: :dfa)
|
|
391
|
+
expect(result).to be_a(Hoozuki::Automaton::DFA)
|
|
392
|
+
end
|
|
393
|
+
end
|
|
394
|
+
end
|
|
211
395
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: hoozuki
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 1.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Yudai Takada
|
|
@@ -26,8 +26,14 @@ files:
|
|
|
26
26
|
- lib/hoozuki.rb
|
|
27
27
|
- lib/hoozuki/automaton.rb
|
|
28
28
|
- lib/hoozuki/automaton/dfa.rb
|
|
29
|
+
- lib/hoozuki/automaton/dfa/builder.rb
|
|
29
30
|
- lib/hoozuki/automaton/nfa.rb
|
|
30
31
|
- lib/hoozuki/automaton/state_id.rb
|
|
32
|
+
- lib/hoozuki/instruction.rb
|
|
33
|
+
- lib/hoozuki/instruction/char.rb
|
|
34
|
+
- lib/hoozuki/instruction/jmp.rb
|
|
35
|
+
- lib/hoozuki/instruction/match.rb
|
|
36
|
+
- lib/hoozuki/instruction/split.rb
|
|
31
37
|
- lib/hoozuki/node.rb
|
|
32
38
|
- lib/hoozuki/node/choice.rb
|
|
33
39
|
- lib/hoozuki/node/concatenation.rb
|
|
@@ -35,7 +41,19 @@ files:
|
|
|
35
41
|
- lib/hoozuki/node/literal.rb
|
|
36
42
|
- lib/hoozuki/node/repetition.rb
|
|
37
43
|
- lib/hoozuki/parser.rb
|
|
44
|
+
- lib/hoozuki/parser.y
|
|
38
45
|
- lib/hoozuki/version.rb
|
|
46
|
+
- lib/hoozuki/vm.rb
|
|
47
|
+
- lib/hoozuki/vm/compiler.rb
|
|
48
|
+
- lib/hoozuki/vm/evaluator.rb
|
|
49
|
+
- spec/hoozuki/automaton/dfa/builder_spec.rb
|
|
50
|
+
- spec/hoozuki/automaton/dfa_spec.rb
|
|
51
|
+
- spec/hoozuki/automaton/nfa_spec.rb
|
|
52
|
+
- spec/hoozuki/instruction_spec.rb
|
|
53
|
+
- spec/hoozuki/node_spec.rb
|
|
54
|
+
- spec/hoozuki/parser_spec.rb
|
|
55
|
+
- spec/hoozuki/vm/compiler_spec.rb
|
|
56
|
+
- spec/hoozuki/vm/evaluator_spec.rb
|
|
39
57
|
- spec/hoozuki_spec.rb
|
|
40
58
|
- spec/spec_helper.rb
|
|
41
59
|
homepage: https://github.com/ydah/hoozuki
|
|
@@ -60,7 +78,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
60
78
|
- !ruby/object:Gem::Version
|
|
61
79
|
version: '0'
|
|
62
80
|
requirements: []
|
|
63
|
-
rubygems_version: 3.
|
|
81
|
+
rubygems_version: 3.6.9
|
|
64
82
|
specification_version: 4
|
|
65
83
|
summary: A hobby regex engine written in Ruby.
|
|
66
84
|
test_files: []
|