srl_ruby 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rspec +4 -0
- data/.rubocop.yml +3 -0
- data/.yardopts +6 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +66 -0
- data/Rakefile +16 -0
- data/bin/srl_ruby +58 -0
- data/lib/regex/abstract_method.rb +35 -0
- data/lib/regex/alternation.rb +27 -0
- data/lib/regex/anchor.rb +45 -0
- data/lib/regex/atomic_expression.rb +16 -0
- data/lib/regex/capturing_group.rb +51 -0
- data/lib/regex/char_class.rb +38 -0
- data/lib/regex/char_range.rb +51 -0
- data/lib/regex/char_shorthand.rb +50 -0
- data/lib/regex/character.rb +204 -0
- data/lib/regex/compound_expression.rb +57 -0
- data/lib/regex/concatenation.rb +29 -0
- data/lib/regex/expression.rb +60 -0
- data/lib/regex/lookaround.rb +50 -0
- data/lib/regex/match_option.rb +34 -0
- data/lib/regex/monadic_expression.rb +28 -0
- data/lib/regex/multiplicity.rb +91 -0
- data/lib/regex/non_capturing_group.rb +27 -0
- data/lib/regex/polyadic_expression.rb +60 -0
- data/lib/regex/quantifiable.rb +22 -0
- data/lib/regex/repetition.rb +29 -0
- data/lib/regex/wildcard.rb +23 -0
- data/lib/srl_ruby/ast_builder.rb +384 -0
- data/lib/srl_ruby/grammar.rb +106 -0
- data/lib/srl_ruby/regex_repr.rb +13 -0
- data/lib/srl_ruby/tokenizer.rb +147 -0
- data/lib/srl_ruby/version.rb +3 -0
- data/lib/srl_ruby.rb +4 -0
- data/spec/integration_spec.rb +451 -0
- data/spec/regex/character_spec.rb +166 -0
- data/spec/regex/multiplicity_spec.rb +79 -0
- data/spec/spec_helper.rb +16 -0
- data/spec/srl_ruby/srl_ruby_spec.rb +7 -0
- data/spec/srl_ruby/tokenizer_spec.rb +147 -0
- data/srl_ruby.gemspec +58 -0
- metadata +150 -0
@@ -0,0 +1,451 @@
|
|
1
|
+
require_relative 'spec_helper' # Use the RSpec framework
|
2
|
+
require_relative '../lib/srl_ruby/tokenizer'
|
3
|
+
require_relative '../lib/srl_ruby/grammar'
|
4
|
+
require_relative '../lib/srl_ruby/ast_builder'
|
5
|
+
|
6
|
+
module SrlRuby
|
7
|
+
describe 'Integration tests:' do
|
8
|
+
def parse(someSRL)
|
9
|
+
tokenizer = SrlRuby::Tokenizer.new(someSRL)
|
10
|
+
@engine.parse(tokenizer.tokens)
|
11
|
+
end
|
12
|
+
|
13
|
+
def regexp_repr(aResult)
|
14
|
+
# Generate an abstract syntax parse tree from the parse result
|
15
|
+
tree = @engine.convert(aResult)
|
16
|
+
tree.root
|
17
|
+
end
|
18
|
+
|
19
|
+
before(:each) do
|
20
|
+
@engine = Rley::Engine.new do |config|
|
21
|
+
config.repr_builder = ASTBuilder
|
22
|
+
end
|
23
|
+
@engine.use_grammar(SrlRuby::Grammar)
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'Parsing character ranges:' do
|
27
|
+
it "should parse 'letter from ... to ...' syntax" do
|
28
|
+
result = parse('letter from a to f')
|
29
|
+
expect(result).to be_success
|
30
|
+
|
31
|
+
regexp = regexp_repr(result)
|
32
|
+
expect(regexp.to_str).to eq('[a-f]')
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should parse 'uppercase letter from ... to ...' syntax" do
|
36
|
+
result = parse('UPPERCASE letter from A to F')
|
37
|
+
expect(result).to be_success
|
38
|
+
|
39
|
+
regexp = regexp_repr(result)
|
40
|
+
expect(regexp.to_str).to eq('[A-F]')
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should parse 'letter' syntax" do
|
44
|
+
result = parse('letter')
|
45
|
+
expect(result).to be_success
|
46
|
+
|
47
|
+
regexp = regexp_repr(result)
|
48
|
+
expect(regexp.to_str).to eq('[a-z]')
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should parse 'uppercase letter' syntax" do
|
52
|
+
result = parse('uppercase letter')
|
53
|
+
expect(result).to be_success
|
54
|
+
|
55
|
+
regexp = regexp_repr(result)
|
56
|
+
expect(regexp.to_str).to eq('[A-Z]')
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should parse 'digit from ... to ...' syntax" do
|
60
|
+
result = parse('digit from 1 to 4')
|
61
|
+
expect(result).to be_success
|
62
|
+
|
63
|
+
regexp = regexp_repr(result)
|
64
|
+
expect(regexp.to_str).to eq('[1-4]')
|
65
|
+
end
|
66
|
+
end # context
|
67
|
+
|
68
|
+
context 'Parsing string literals:' do
|
69
|
+
it 'should parse double quotes literal string' do
|
70
|
+
result = parse('literally "hello"')
|
71
|
+
expect(result).to be_success
|
72
|
+
|
73
|
+
regexp = regexp_repr(result)
|
74
|
+
expect(regexp.to_str).to eq('hello')
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'should parse single quotes literal string' do
|
78
|
+
result = parse("literally 'hello'")
|
79
|
+
expect(result).to be_success
|
80
|
+
|
81
|
+
regexp = regexp_repr(result)
|
82
|
+
expect(regexp.to_str).to eq('hello')
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'should escape special characters' do
|
86
|
+
result = parse("literally '.'")
|
87
|
+
expect(result).to be_success
|
88
|
+
|
89
|
+
regexp = regexp_repr(result)
|
90
|
+
expect(regexp.to_str).to eq('\.')
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
context 'Parsing character classes:' do
|
95
|
+
it "should parse 'digit' syntax" do
|
96
|
+
result = parse('digit')
|
97
|
+
expect(result).to be_success
|
98
|
+
|
99
|
+
regexp = regexp_repr(result)
|
100
|
+
expect(regexp.to_str).to eq('\d')
|
101
|
+
end
|
102
|
+
|
103
|
+
it "should parse 'number' syntax" do
|
104
|
+
result = parse('number')
|
105
|
+
expect(result).to be_success
|
106
|
+
|
107
|
+
regexp = regexp_repr(result)
|
108
|
+
expect(regexp.to_str).to eq('\d')
|
109
|
+
end
|
110
|
+
|
111
|
+
it "should parse 'any character' syntax" do
|
112
|
+
result = parse('any character')
|
113
|
+
expect(result).to be_success
|
114
|
+
|
115
|
+
regexp = regexp_repr(result)
|
116
|
+
expect(regexp.to_str).to eq('\w')
|
117
|
+
end
|
118
|
+
|
119
|
+
it "should parse 'no character' syntax" do
|
120
|
+
result = parse('no character')
|
121
|
+
expect(result).to be_success
|
122
|
+
|
123
|
+
regexp = regexp_repr(result)
|
124
|
+
expect(regexp.to_str).to eq('\W')
|
125
|
+
end
|
126
|
+
|
127
|
+
it "should parse 'whitespace' syntax" do
|
128
|
+
result = parse('whitespace')
|
129
|
+
expect(result).to be_success
|
130
|
+
|
131
|
+
regexp = regexp_repr(result)
|
132
|
+
expect(regexp.to_str).to eq('\s')
|
133
|
+
end
|
134
|
+
|
135
|
+
it "should parse 'no whitespace' syntax" do
|
136
|
+
result = parse('no whitespace')
|
137
|
+
expect(result).to be_success
|
138
|
+
|
139
|
+
regexp = regexp_repr(result)
|
140
|
+
expect(regexp.to_str).to eq('\S')
|
141
|
+
end
|
142
|
+
|
143
|
+
it "should parse 'anything' syntax" do
|
144
|
+
result = parse('anything')
|
145
|
+
expect(result).to be_success
|
146
|
+
|
147
|
+
regexp = regexp_repr(result)
|
148
|
+
expect(regexp.to_str).to eq('.')
|
149
|
+
end
|
150
|
+
|
151
|
+
it "should parse 'one of' syntax" do
|
152
|
+
result = parse('one of "._%+-"')
|
153
|
+
expect(result).to be_success
|
154
|
+
|
155
|
+
regexp = regexp_repr(result)
|
156
|
+
# Remark: reference implementation less readable
|
157
|
+
# (escapes more characters than required)
|
158
|
+
expect(regexp.to_str).to eq('[._%+\-]')
|
159
|
+
end
|
160
|
+
end # context
|
161
|
+
|
162
|
+
context 'Parsing special character declarations:' do
|
163
|
+
it "should parse 'tab' syntax" do
|
164
|
+
result = parse('tab')
|
165
|
+
expect(result).to be_success
|
166
|
+
|
167
|
+
regexp = regexp_repr(result)
|
168
|
+
expect(regexp.to_str).to eq('\t')
|
169
|
+
end
|
170
|
+
|
171
|
+
it "should parse 'backslash' syntax" do
|
172
|
+
result = parse('backslash')
|
173
|
+
expect(result).to be_success
|
174
|
+
|
175
|
+
regexp = regexp_repr(result)
|
176
|
+
expect(regexp.to_str).to eq('\\')
|
177
|
+
end
|
178
|
+
|
179
|
+
it "should parse 'new line' syntax" do
|
180
|
+
result = parse('new line')
|
181
|
+
expect(result).to be_success
|
182
|
+
|
183
|
+
regexp = regexp_repr(result)
|
184
|
+
expect(regexp.to_str).to eq('\n')
|
185
|
+
end
|
186
|
+
end # context
|
187
|
+
|
188
|
+
context 'Parsing alternations:' do
|
189
|
+
it "should parse 'any of' syntax" do
|
190
|
+
source = 'any of (any character, one of "._%-+")'
|
191
|
+
result = parse(source)
|
192
|
+
expect(result).to be_success
|
193
|
+
|
194
|
+
regexp = regexp_repr(result)
|
195
|
+
expect(regexp.to_str).to eq('(?:\w|[._%\-+])')
|
196
|
+
end
|
197
|
+
end # context
|
198
|
+
|
199
|
+
context 'Parsing concatenation:' do
|
200
|
+
it 'should reject dangling comma' do
|
201
|
+
source = 'literally "a",'
|
202
|
+
result = parse(source)
|
203
|
+
expect(result).not_to be_success
|
204
|
+
message_prefix = /Premature end of input after ','/
|
205
|
+
expect(result.failure_reason.message).to match(message_prefix)
|
206
|
+
end
|
207
|
+
|
208
|
+
it 'should parse concatenation' do
|
209
|
+
result = parse('any of (literally "sample", (digit once or more))')
|
210
|
+
expect(result).to be_success
|
211
|
+
|
212
|
+
regexp = regexp_repr(result)
|
213
|
+
expect(regexp.to_str).to eq('(?:sample|(?:\d+))')
|
214
|
+
end
|
215
|
+
|
216
|
+
it 'should parse a long sequence of patterns' do
|
217
|
+
source = <<-ENDS
|
218
|
+
any of (any character, one of "._%-+") once or more,
|
219
|
+
literally "@",
|
220
|
+
any of (digit, letter, one of ".-") once or more,
|
221
|
+
literally ".",
|
222
|
+
letter at least 2 times
|
223
|
+
ENDS
|
224
|
+
|
225
|
+
result = parse(source)
|
226
|
+
expect(result).to be_success
|
227
|
+
|
228
|
+
regexp = regexp_repr(result)
|
229
|
+
# SRL: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
|
230
|
+
expectation = '(?:\w|[._%\-+])+@(?:\d|[a-z]|[.\-])+\.[a-z]{2,}'
|
231
|
+
expect(regexp.to_str).to eq(expectation)
|
232
|
+
end
|
233
|
+
end # context
|
234
|
+
|
235
|
+
context 'Parsing quantifiers:' do
|
236
|
+
let(:prefix) { 'letter from p to t ' }
|
237
|
+
|
238
|
+
it "should parse 'once' syntax" do
|
239
|
+
result = parse(prefix + 'once')
|
240
|
+
expect(result).to be_success
|
241
|
+
|
242
|
+
regexp = regexp_repr(result)
|
243
|
+
expect(regexp.to_str).to eq('[p-t]{1}')
|
244
|
+
end
|
245
|
+
|
246
|
+
it "should parse 'twice' syntax" do
|
247
|
+
result = parse('digit twice')
|
248
|
+
expect(result).to be_success
|
249
|
+
|
250
|
+
regexp = regexp_repr(result)
|
251
|
+
expect(regexp.to_str).to eq('\d{2}')
|
252
|
+
end
|
253
|
+
|
254
|
+
it "should parse 'optional' syntax" do
|
255
|
+
result = parse('anything optional')
|
256
|
+
expect(result).to be_success
|
257
|
+
|
258
|
+
regexp = regexp_repr(result)
|
259
|
+
expect(regexp.to_str).to eq('.?')
|
260
|
+
end
|
261
|
+
|
262
|
+
it "should parse 'exactly ... times' syntax" do
|
263
|
+
result = parse('letter from a to f exactly 4 times')
|
264
|
+
expect(result).to be_success
|
265
|
+
|
266
|
+
regexp = regexp_repr(result)
|
267
|
+
expect(regexp.to_str).to eq('[a-f]{4}')
|
268
|
+
end
|
269
|
+
|
270
|
+
it "should parse 'between ... and ... times' syntax" do
|
271
|
+
result = parse(prefix + 'between 2 and 4 times')
|
272
|
+
expect(result).to be_success
|
273
|
+
|
274
|
+
# Dropping 'times' keyword is shorter syntax
|
275
|
+
expect(parse(prefix + 'between 2 and 4')).to be_success
|
276
|
+
|
277
|
+
regexp = regexp_repr(result)
|
278
|
+
expect(regexp.to_str).to eq('[p-t]{2,4}')
|
279
|
+
end
|
280
|
+
|
281
|
+
it "should parse 'once or more' syntax" do
|
282
|
+
result = parse(prefix + 'once or more')
|
283
|
+
expect(result).to be_success
|
284
|
+
|
285
|
+
regexp = regexp_repr(result)
|
286
|
+
expect(regexp.to_str).to eq('[p-t]+')
|
287
|
+
end
|
288
|
+
|
289
|
+
it "should parse 'never or more' syntax" do
|
290
|
+
result = parse(prefix + 'never or more')
|
291
|
+
expect(result).to be_success
|
292
|
+
|
293
|
+
regexp = regexp_repr(result)
|
294
|
+
expect(regexp.to_str).to eq('[p-t]*')
|
295
|
+
end
|
296
|
+
|
297
|
+
it "should parse 'at least ... times' syntax" do
|
298
|
+
result = parse(prefix + 'at least 10 times')
|
299
|
+
expect(result).to be_success
|
300
|
+
|
301
|
+
regexp = regexp_repr(result)
|
302
|
+
expect(regexp.to_str).to eq('[p-t]{10,}')
|
303
|
+
end
|
304
|
+
end # context
|
305
|
+
|
306
|
+
context 'Parsing lookaround:' do
|
307
|
+
it 'should parse positive lookahead' do
|
308
|
+
result = parse('letter if followed by (anything once or more, digit)')
|
309
|
+
expect(result).to be_success
|
310
|
+
|
311
|
+
regexp = regexp_repr(result)
|
312
|
+
expect(regexp.to_str).to eq('[a-z](?=(?:.+\d))')
|
313
|
+
end
|
314
|
+
|
315
|
+
it 'should parse negative lookahead' do
|
316
|
+
result = parse('letter if not followed by (anything once or more, digit)')
|
317
|
+
expect(result).to be_success
|
318
|
+
|
319
|
+
regexp = regexp_repr(result)
|
320
|
+
expect(regexp.to_str).to eq('[a-z](?!(?:.+\d))')
|
321
|
+
end
|
322
|
+
|
323
|
+
it 'should parse positive lookbehind' do
|
324
|
+
result = parse('literally "bar" if already had literally "foo"')
|
325
|
+
expect(result).to be_success
|
326
|
+
|
327
|
+
regexp = regexp_repr(result)
|
328
|
+
expect(regexp.to_str).to eq('bar(?<=foo)')
|
329
|
+
end
|
330
|
+
|
331
|
+
it 'should parse negative lookbehind' do
|
332
|
+
result = parse('literally "bar" if not already had literally "foo"')
|
333
|
+
expect(result).to be_success
|
334
|
+
|
335
|
+
regexp = regexp_repr(result)
|
336
|
+
expect(regexp.to_str).to eq('bar(?<!foo)')
|
337
|
+
end
|
338
|
+
end # context
|
339
|
+
|
340
|
+
context 'Parsing capturing group:' do
|
341
|
+
it 'should parse simple anonymous capturing group' do
|
342
|
+
result = parse('capture(literally "sample")')
|
343
|
+
expect(result).to be_success
|
344
|
+
|
345
|
+
regexp = regexp_repr(result)
|
346
|
+
expect(regexp.to_str).to eq('(sample)')
|
347
|
+
end
|
348
|
+
|
349
|
+
it 'should parse complex anonymous capturing group' do
|
350
|
+
source = 'capture(any of (literally "sample", (digit once or more)))'
|
351
|
+
result = parse(source)
|
352
|
+
expect(result).to be_success
|
353
|
+
|
354
|
+
regexp = regexp_repr(result)
|
355
|
+
expect(regexp.to_str).to eq('((?:sample|(?:\d+)))')
|
356
|
+
end
|
357
|
+
|
358
|
+
it 'should parse simple anonymous until capturing group' do
|
359
|
+
result = parse('capture anything once or more until literally "!"')
|
360
|
+
expect(result).to be_success
|
361
|
+
|
362
|
+
regexp = regexp_repr(result)
|
363
|
+
expect(regexp.to_str).to eq('(.+)!')
|
364
|
+
end
|
365
|
+
|
366
|
+
it 'should parse complex named capturing group' do
|
367
|
+
source = <<-END_SRL
|
368
|
+
capture(any of (literally "sample", (digit once or more)))
|
369
|
+
as "foo"
|
370
|
+
END_SRL
|
371
|
+
result = parse(source)
|
372
|
+
expect(result).to be_success
|
373
|
+
|
374
|
+
regexp = regexp_repr(result)
|
375
|
+
expect(regexp.to_str).to eq('(?<foo>(?:sample|(?:\d+)))')
|
376
|
+
end
|
377
|
+
|
378
|
+
it 'should parse a sequence with named capturing groups' do
|
379
|
+
source = <<-ENDS
|
380
|
+
capture (anything once or more) as "first",
|
381
|
+
literally " - ",
|
382
|
+
capture literally "second part" as "second"
|
383
|
+
ENDS
|
384
|
+
result = parse(source)
|
385
|
+
expect(result).to be_success
|
386
|
+
|
387
|
+
regexp = regexp_repr(result)
|
388
|
+
expect(regexp.to_str).to eq('(?<first>.+) - (?<second>second part)')
|
389
|
+
end
|
390
|
+
|
391
|
+
it 'should parse complex named until capturing group' do
|
392
|
+
source = 'capture (anything once or more) as "foo" until literally "m"'
|
393
|
+
result = parse(source)
|
394
|
+
expect(result).to be_success
|
395
|
+
|
396
|
+
regexp = regexp_repr(result)
|
397
|
+
expect(regexp.to_str).to eq('(?<foo>.+)m')
|
398
|
+
end
|
399
|
+
end # context
|
400
|
+
|
401
|
+
context 'Parsing anchors:' do
|
402
|
+
it 'should parse begin anchors' do
|
403
|
+
result = parse('starts with literally "match"')
|
404
|
+
expect(result).to be_success
|
405
|
+
|
406
|
+
regexp = regexp_repr(result)
|
407
|
+
expect(regexp.to_str).to eq('^match')
|
408
|
+
end
|
409
|
+
|
410
|
+
it 'should parse begin anchors (alternative syntax)' do
|
411
|
+
result = parse('begin with literally "match"')
|
412
|
+
expect(result).to be_success
|
413
|
+
|
414
|
+
regexp = regexp_repr(result)
|
415
|
+
expect(regexp.to_str).to eq('^match')
|
416
|
+
end
|
417
|
+
|
418
|
+
it 'should parse end anchors' do
|
419
|
+
result = parse('literally "match" must end')
|
420
|
+
expect(result).to be_success
|
421
|
+
|
422
|
+
regexp = regexp_repr(result)
|
423
|
+
expect(regexp.to_str).to eq('match$')
|
424
|
+
end
|
425
|
+
|
426
|
+
it 'should parse combination of begin and end anchors' do
|
427
|
+
result = parse('starts with literally "match" must end')
|
428
|
+
expect(result).to be_success
|
429
|
+
|
430
|
+
regexp = regexp_repr(result)
|
431
|
+
expect(regexp.to_str).to eq('^match$')
|
432
|
+
end
|
433
|
+
|
434
|
+
it 'should accept anchor with a sequence of patterns' do
|
435
|
+
source = <<-ENDS
|
436
|
+
begin with any of (digit, letter, one of ".-") once or more,
|
437
|
+
literally ".",
|
438
|
+
letter at least 2 times must end
|
439
|
+
ENDS
|
440
|
+
|
441
|
+
result = parse(source)
|
442
|
+
expect(result).to be_success
|
443
|
+
|
444
|
+
regexp = regexp_repr(result)
|
445
|
+
# SRL: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
|
446
|
+
expect(regexp.to_str).to eq('^(?:\d|[a-z]|[.\-])+\.[a-z]{2,}$')
|
447
|
+
end
|
448
|
+
end # context
|
449
|
+
end # describe
|
450
|
+
end # module
|
451
|
+
# End of file
|
@@ -0,0 +1,166 @@
|
|
1
|
+
# File: character_spec.rb
|
2
|
+
require_relative '../spec_helper' # Use the RSpec test framework
|
3
|
+
require_relative '../../lib/regex/character'
|
4
|
+
|
5
|
+
module Regex # Open this namespace, to get rid of scope qualifiers
|
6
|
+
describe Character do
|
7
|
+
# This constant holds an arbitrary selection of characters
|
8
|
+
SampleChars = [?a, ?\0, ?\u0107].freeze
|
9
|
+
|
10
|
+
# This constant holds the codepoints of the character selection
|
11
|
+
SampleInts = [0x61, 0, 0x0107].freeze
|
12
|
+
|
13
|
+
# This constant holds an arbitrary selection of two characters (digrams)
|
14
|
+
# escape sequences
|
15
|
+
SampleDigrams = %w[\n \e \0 \6 \k].freeze
|
16
|
+
|
17
|
+
# This constant holds an arbitrary selection of escaped octal
|
18
|
+
# or hexadecimal literals
|
19
|
+
SampleNumEscs = %w[\0 \07 \x07 \xa \x0F \u03a3 \u{a}].freeze
|
20
|
+
|
21
|
+
before(:all) do
|
22
|
+
# Ensure that the set of codepoints is mapping the set of chars...
|
23
|
+
expect(SampleChars.map(&:ord)).to eq(SampleInts)
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'Creation & initialization' do
|
27
|
+
it 'should be created with a with an integer value (codepoint) or...' do
|
28
|
+
SampleInts.each do |aCodepoint|
|
29
|
+
expect { Character.new(aCodepoint) }.not_to raise_error
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
it '...could be created with a single character String or...' do
|
34
|
+
SampleChars.each do |aChar|
|
35
|
+
expect { Character.new(aChar) }.not_to raise_error
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
it '...could be created with an escape sequence' do
|
40
|
+
# Case 1: escape sequence is a digram
|
41
|
+
SampleDigrams.each do |anEscapeSeq|
|
42
|
+
expect { Character.new(anEscapeSeq) }.not_to raise_error
|
43
|
+
end
|
44
|
+
|
45
|
+
# Case 2: escape sequence is an escaped octal or hexadecimal literal
|
46
|
+
SampleNumEscs.each do |anEscapeSeq|
|
47
|
+
expect { Character.new(anEscapeSeq) }.not_to raise_error
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end # context
|
51
|
+
|
52
|
+
context 'Provided services' do
|
53
|
+
it 'Should know its lexeme if created from a string' do
|
54
|
+
# Lexeme is defined when the character was initialised from a text
|
55
|
+
SampleChars.each do |aChar|
|
56
|
+
ch = Character.new(aChar)
|
57
|
+
expect(ch.lexeme).to eq(aChar)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
it 'Should not know its lexeme representation from a codepoint' do
|
62
|
+
SampleInts.each do |aChar|
|
63
|
+
ch = Character.new(aChar)
|
64
|
+
expect(ch.lexeme).to be_nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'should know its String representation' do
|
69
|
+
# Try for one character
|
70
|
+
newOne = Character.new(?\u03a3)
|
71
|
+
expect(newOne.char).to eq('Σ')
|
72
|
+
expect(newOne.to_str).to eq("\u03A3")
|
73
|
+
|
74
|
+
# Try with our chars sample
|
75
|
+
SampleChars.each { |aChar| Character.new(aChar).to_str == aChar }
|
76
|
+
|
77
|
+
# Try with our codepoint sample
|
78
|
+
mapped_chars = SampleInts.map do |aCodepoint|
|
79
|
+
Character.new(aCodepoint).char
|
80
|
+
end
|
81
|
+
expect(mapped_chars).to eq(SampleChars)
|
82
|
+
|
83
|
+
# Try with our escape sequence samples
|
84
|
+
(SampleDigrams + SampleNumEscs).each do |anEscSeq|
|
85
|
+
expectation = String.class_eval(%Q|"#{anEscSeq}"|, __FILE__, __LINE__)
|
86
|
+
Character.new(anEscSeq).to_str == expectation
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'should know its codepoint' do
|
91
|
+
# Try for one character
|
92
|
+
newOne = Character.new(?\u03a3)
|
93
|
+
expect(newOne.codepoint).to eq(0x03a3)
|
94
|
+
|
95
|
+
# Try with our chars sample
|
96
|
+
allCodepoints = SampleChars.map do |aChar|
|
97
|
+
Character.new(aChar).codepoint
|
98
|
+
end
|
99
|
+
expect(allCodepoints).to eq(SampleInts)
|
100
|
+
|
101
|
+
# Try with our codepoint sample
|
102
|
+
mapped_chars = SampleInts.each do |aCodepoint|
|
103
|
+
expect(Character.new(aCodepoint).codepoint).to eq(aCodepoint)
|
104
|
+
end
|
105
|
+
|
106
|
+
# Try with our escape sequence samples
|
107
|
+
(SampleDigrams + SampleNumEscs).each do |anEscSeq|
|
108
|
+
expectation = String.class_eval(%Q|"#{anEscSeq}".ord()|, __FILE__, __LINE__)
|
109
|
+
expect(Character.new(anEscSeq).codepoint).to eq(expectation)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
it 'should known whether it is equal to another Object' do
|
114
|
+
newOne = Character.new(?\u03a3)
|
115
|
+
|
116
|
+
# Case 1: test equality with itself
|
117
|
+
expect(newOne).to eq(newOne)
|
118
|
+
|
119
|
+
# Case 2: test equality with another Character
|
120
|
+
expect(newOne).to eq(Character.new(?\u03a3))
|
121
|
+
expect(newOne).not_to eq(Character.new(?\u0333))
|
122
|
+
|
123
|
+
# Case 3: test equality with an integer value
|
124
|
+
# (equality based on codepoint value)
|
125
|
+
expect(newOne).to eq(0x03a3)
|
126
|
+
expect(newOne).not_to eq(0x0333)
|
127
|
+
|
128
|
+
# Case 4: test equality with a single-character String
|
129
|
+
expect(newOne).to eq(?\u03a3)
|
130
|
+
expect(newOne).not_to eq(?\u0333)
|
131
|
+
|
132
|
+
# Case 5: test fails with multiple character strings
|
133
|
+
expect(newOne).not_to eq('03a3')
|
134
|
+
|
135
|
+
# Case 6: equality testing with arbitray object
|
136
|
+
expect(newOne).not_to eq(nil)
|
137
|
+
expect(newOne).not_to eq(Object.new)
|
138
|
+
|
139
|
+
# In case 6, equality is based on to_s method.
|
140
|
+
simulator = double('fake')
|
141
|
+
expect(simulator).to receive(:to_s).and_return(?\u03a3)
|
142
|
+
expect(newOne).to eq(simulator)
|
143
|
+
|
144
|
+
# Create a module that re-defines the existing to_s method
|
145
|
+
module Tweak_to_s
|
146
|
+
def to_s() # Overwrite the existing to_s method
|
147
|
+
return ?\u03a3
|
148
|
+
end
|
149
|
+
end # module
|
150
|
+
weird = Object.new
|
151
|
+
weird.extend(Tweak_to_s)
|
152
|
+
expect(newOne).to eq(weird)
|
153
|
+
end
|
154
|
+
|
155
|
+
it 'should know its readable description' do
|
156
|
+
ch1 = Character.new('a')
|
157
|
+
expect(ch1.explain).to eq("the character 'a'")
|
158
|
+
|
159
|
+
ch2 = Character.new(?\u03a3)
|
160
|
+
expect(ch2.explain).to eq("the character '\u03a3'")
|
161
|
+
end
|
162
|
+
end # context
|
163
|
+
end # describe
|
164
|
+
end # module
|
165
|
+
|
166
|
+
# End of file
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# File: Multiplicity_spec.rb
|
2
|
+
|
3
|
+
require_relative '../spec_helper' # Use the RSpec test framework
|
4
|
+
require_relative '../../lib/regex/multiplicity'
|
5
|
+
|
6
|
+
module SRL
|
7
|
+
# Reopen the module, in order to get rid of fully qualified names
|
8
|
+
module Regex # This module is used as a namespace
|
9
|
+
describe Multiplicity do
|
10
|
+
context 'Creation & initialisation' do
|
11
|
+
it 'should be created with 3 arguments' do
|
12
|
+
# Valid cases: initialized with two integer values and a policy symbol
|
13
|
+
%i[greedy lazy possessive].each do |aPolicy|
|
14
|
+
expect { Multiplicity.new(0, 1, aPolicy) }.not_to raise_error
|
15
|
+
end
|
16
|
+
|
17
|
+
# Invalid case: initialized with invalid policy value
|
18
|
+
err = StandardError
|
19
|
+
msg = "Invalid repetition policy 'KO'."
|
20
|
+
expect { Multiplicity.new(0, :more, 'KO') }.to raise_error(err, msg)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
context 'Provided services' do
|
25
|
+
it 'should know its text representation' do
|
26
|
+
policy2text = { greedy: '', lazy: '?', possessive: '+' }
|
27
|
+
|
28
|
+
# Case: zero or one
|
29
|
+
policy2text.each_key do |aPolicy|
|
30
|
+
multi = Multiplicity.new(0, 1, aPolicy)
|
31
|
+
expect(multi.to_str).to eq("?#{policy2text[aPolicy]}")
|
32
|
+
end
|
33
|
+
|
34
|
+
# Case: zero or more
|
35
|
+
policy2text.each_key do |aPolicy|
|
36
|
+
multi = Multiplicity.new(0, :more, aPolicy)
|
37
|
+
expect(multi.to_str).to eq("*#{policy2text[aPolicy]}")
|
38
|
+
end
|
39
|
+
|
40
|
+
# Case: one or more
|
41
|
+
policy2text.each_key do |aPolicy|
|
42
|
+
multi = Multiplicity.new(1, :more, aPolicy)
|
43
|
+
expect(multi.to_str).to eq("+#{policy2text[aPolicy]}")
|
44
|
+
end
|
45
|
+
|
46
|
+
# Case: exactly m times
|
47
|
+
policy2text.each_key do |aPolicy|
|
48
|
+
samples = [1, 2, 5, 100]
|
49
|
+
samples.each do |aCount|
|
50
|
+
multi = Multiplicity.new(aCount, aCount, aPolicy)
|
51
|
+
expect(multi.to_str).to eq("{#{aCount}}#{policy2text[aPolicy]}")
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Case: m, n times
|
56
|
+
policy2text.each_key do |aPolicy|
|
57
|
+
samples = [1, 2, 5, 100]
|
58
|
+
samples.each do |aCount|
|
59
|
+
upper = aCount + 1 + rand(20)
|
60
|
+
multi = Multiplicity.new(aCount, upper, aPolicy)
|
61
|
+
expectation = "{#{aCount},#{upper}}#{policy2text[aPolicy]}"
|
62
|
+
expect(multi.to_str).to eq(expectation)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# Case: m or more
|
67
|
+
policy2text.each_key do |aPolicy|
|
68
|
+
samples = [2, 3, 5, 100]
|
69
|
+
samples.each do |aCount|
|
70
|
+
multi = Multiplicity.new(aCount, :more, aPolicy)
|
71
|
+
expect(multi.to_str).to eq("{#{aCount},}#{policy2text[aPolicy]}")
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end # module
|
78
|
+
end # module
|
79
|
+
# End of file
|