iregexp 0.0.1 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/iregexp +1 -1
- data/iregexp.gemspec +2 -2
- data/lib/iregexp.rb +2 -2
- data/lib/parser/iregexpgrammar.rb +197 -144
- data/test-data/simple.irl +49 -0
- data/test-data/simple.out +114 -0
- metadata +4 -4
- data/bin/iregexp~ +0 -102
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b48ffdc37779e3d0a862960ac7fcfd685de4cf59ef24dbc7556557372c5058c5
|
4
|
+
data.tar.gz: 074b33ad95b11cef1dc154f90eeee325432172d1721da222b08ab178791ae504
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 423cf8699b215e8aa87d6480a15be10d4964e1b51ebe736e721369c0d5a7d499786af1ab600b42d478f1ece5363557cd94c9d7bdc631a28ae7d7d935631eb102
|
7
|
+
data.tar.gz: aa366d615c0e246d50653a41efa1f852caf596db29952c0864c5fa63a3a9f69e545fc3242ff712726555fff97c5cfaa7e46a52de0420dfb302365563e15066a2
|
data/bin/iregexp
CHANGED
@@ -15,7 +15,7 @@ FUNCSIG_CHARS = {"l" => :logical, "n" => :nodes, "v" => :value}
|
|
15
15
|
$options = OpenStruct.new
|
16
16
|
begin
|
17
17
|
op = OptionParser.new do |opts|
|
18
|
-
opts.banner = "Usage:
|
18
|
+
opts.banner = "Usage: iregexp [options] file | -e expr"
|
19
19
|
|
20
20
|
opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
|
21
21
|
$options.verbose = v
|
data/iregexp.gemspec
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "iregexp"
|
3
|
-
s.version = "0.0.
|
3
|
+
s.version = "0.0.3"
|
4
4
|
s.summary = "I-Regexp Tools"
|
5
5
|
s.description = %q{iregexp implements converters and miscellaneous tools for I-Regexp}
|
6
6
|
s.author = "Carsten Bormann"
|
7
7
|
s.email = "cabo@tzi.org"
|
8
8
|
s.license = "MIT"
|
9
9
|
s.homepage = "http://github.com/cabo/iregexp"
|
10
|
-
s.files = Dir['lib/**/*.rb'] + %w(iregexp.gemspec) + Dir['data/*'] + Dir['bin/**/*.rb']
|
10
|
+
s.files = Dir['lib/**/*.rb'] + %w(iregexp.gemspec) + Dir['data/*'] + Dir['test-data/*'] + Dir['bin/**/*.rb']
|
11
11
|
s.executables = Dir['bin/*'].map {|x| File.basename(x)}
|
12
12
|
s.required_ruby_version = '>= 3.0'
|
13
13
|
|
data/lib/iregexp.rb
CHANGED
@@ -16,7 +16,7 @@ class IREGEXP
|
|
16
16
|
reason << line
|
17
17
|
reason << "#{'~' * (parser.failure_column - 1)}^"
|
18
18
|
end
|
19
|
-
reason.join("\n").gsub(/[\u0000-\u0009\u000b
|
19
|
+
reason.join("\n").gsub(/[\u0000-\u0009\u000b-\u001f\u007f]/) {|c| "\\u%04x" % c.ord}
|
20
20
|
end
|
21
21
|
|
22
22
|
SAFE_FN = /\A[-._a-zA-Z0-9]+\z/
|
@@ -24,7 +24,7 @@ class IREGEXP
|
|
24
24
|
def self.from_iregexp(s)
|
25
25
|
ast = @@parser.parse s
|
26
26
|
if !ast
|
27
|
-
fail ParseError.new(self.reason(@@parser, s))
|
27
|
+
fail ParseError.new("\n" << self.reason(@@parser, s))
|
28
28
|
end
|
29
29
|
ret = IREGEXP.new(ast)
|
30
30
|
|
@@ -196,19 +196,6 @@ module IREGEXPGRAMMAR
|
|
196
196
|
end
|
197
197
|
end
|
198
198
|
|
199
|
-
module Quantifier1
|
200
|
-
def quantity
|
201
|
-
elements[1]
|
202
|
-
end
|
203
|
-
|
204
|
-
end
|
205
|
-
|
206
|
-
module Quantifier2
|
207
|
-
def ast
|
208
|
-
quantity.ast
|
209
|
-
end
|
210
|
-
end
|
211
|
-
|
212
199
|
def _nt_quantifier
|
213
200
|
start_index = index
|
214
201
|
if node_cache[:quantifier].has_key?(index)
|
@@ -222,11 +209,11 @@ module IREGEXPGRAMMAR
|
|
222
209
|
|
223
210
|
i0 = index
|
224
211
|
i1 = index
|
225
|
-
if
|
212
|
+
if (match_len = has_terminal?("*", false, index))
|
226
213
|
r2 = true
|
227
|
-
@index +=
|
214
|
+
@index += match_len
|
228
215
|
else
|
229
|
-
terminal_parse_failure('
|
216
|
+
terminal_parse_failure('"*"')
|
230
217
|
r2 = nil
|
231
218
|
end
|
232
219
|
if r2
|
@@ -235,11 +222,11 @@ module IREGEXPGRAMMAR
|
|
235
222
|
r1.extend(Quantifier0)
|
236
223
|
r1.extend(Quantifier0)
|
237
224
|
else
|
238
|
-
if (match_len = has_terminal?("
|
225
|
+
if (match_len = has_terminal?("+", false, index))
|
239
226
|
r3 = true
|
240
227
|
@index += match_len
|
241
228
|
else
|
242
|
-
terminal_parse_failure('"
|
229
|
+
terminal_parse_failure('"+"')
|
243
230
|
r3 = nil
|
244
231
|
end
|
245
232
|
if r3
|
@@ -248,48 +235,32 @@ module IREGEXPGRAMMAR
|
|
248
235
|
r1.extend(Quantifier0)
|
249
236
|
r1.extend(Quantifier0)
|
250
237
|
else
|
251
|
-
|
252
|
-
|
238
|
+
if (match_len = has_terminal?("?", false, index))
|
239
|
+
r4 = true
|
240
|
+
@index += match_len
|
241
|
+
else
|
242
|
+
terminal_parse_failure('"?"')
|
243
|
+
r4 = nil
|
244
|
+
end
|
245
|
+
if r4
|
246
|
+
r4 = SyntaxNode.new(input, (index-1)...index) if r4 == true
|
247
|
+
r1 = r4
|
248
|
+
r1.extend(Quantifier0)
|
249
|
+
r1.extend(Quantifier0)
|
250
|
+
else
|
251
|
+
@index = i1
|
252
|
+
r1 = nil
|
253
|
+
end
|
253
254
|
end
|
254
255
|
end
|
255
256
|
if r1
|
256
257
|
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
257
258
|
r0 = r1
|
258
259
|
else
|
259
|
-
|
260
|
-
if (match_len = has_terminal?("{", false, index))
|
261
|
-
r5 = true
|
262
|
-
@index += match_len
|
263
|
-
else
|
264
|
-
terminal_parse_failure('"{"')
|
265
|
-
r5 = nil
|
266
|
-
end
|
267
|
-
s4 << r5
|
260
|
+
r5 = _nt_range_quantifier
|
268
261
|
if r5
|
269
|
-
|
270
|
-
|
271
|
-
if r6
|
272
|
-
if (match_len = has_terminal?("}", false, index))
|
273
|
-
r7 = true
|
274
|
-
@index += match_len
|
275
|
-
else
|
276
|
-
terminal_parse_failure('"}"')
|
277
|
-
r7 = nil
|
278
|
-
end
|
279
|
-
s4 << r7
|
280
|
-
end
|
281
|
-
end
|
282
|
-
if s4.last
|
283
|
-
r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
|
284
|
-
r4.extend(Quantifier1)
|
285
|
-
r4.extend(Quantifier2)
|
286
|
-
else
|
287
|
-
@index = i4
|
288
|
-
r4 = nil
|
289
|
-
end
|
290
|
-
if r4
|
291
|
-
r4 = SyntaxNode.new(input, (index-1)...index) if r4 == true
|
292
|
-
r0 = r4
|
262
|
+
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
263
|
+
r0 = r5
|
293
264
|
else
|
294
265
|
@index = i0
|
295
266
|
r0 = nil
|
@@ -301,20 +272,20 @@ module IREGEXPGRAMMAR
|
|
301
272
|
r0
|
302
273
|
end
|
303
274
|
|
304
|
-
module
|
275
|
+
module RangeQuantifier0
|
305
276
|
end
|
306
277
|
|
307
|
-
module
|
278
|
+
module RangeQuantifier1
|
308
279
|
def QuantExact
|
309
|
-
elements[
|
280
|
+
elements[1]
|
310
281
|
end
|
311
282
|
|
312
283
|
end
|
313
284
|
|
314
|
-
module
|
285
|
+
module RangeQuantifier2
|
315
286
|
def ast
|
316
|
-
l = elements[
|
317
|
-
r = if rp = elements[
|
287
|
+
l = elements[1].ast
|
288
|
+
r = if rp = elements[2].elements
|
318
289
|
if rp[1].text_value != ''
|
319
290
|
rp[1].ast
|
320
291
|
else
|
@@ -327,63 +298,83 @@ module IREGEXPGRAMMAR
|
|
327
298
|
end
|
328
299
|
end
|
329
300
|
|
330
|
-
def
|
301
|
+
def _nt_range_quantifier
|
331
302
|
start_index = index
|
332
|
-
if node_cache[:
|
333
|
-
cached = node_cache[:
|
303
|
+
if node_cache[:range_quantifier].has_key?(index)
|
304
|
+
cached = node_cache[:range_quantifier][index]
|
334
305
|
if cached
|
335
|
-
node_cache[:
|
306
|
+
node_cache[:range_quantifier][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
336
307
|
@index = cached.interval.end
|
337
308
|
end
|
338
309
|
return cached
|
339
310
|
end
|
340
311
|
|
341
312
|
i0, s0 = index, []
|
342
|
-
|
313
|
+
if (match_len = has_terminal?("{", false, index))
|
314
|
+
r1 = true
|
315
|
+
@index += match_len
|
316
|
+
else
|
317
|
+
terminal_parse_failure('"{"')
|
318
|
+
r1 = nil
|
319
|
+
end
|
343
320
|
s0 << r1
|
344
321
|
if r1
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
end
|
353
|
-
s3 << r4
|
354
|
-
if r4
|
355
|
-
r6 = _nt_QuantExact
|
356
|
-
if r6
|
357
|
-
r5 = r6
|
322
|
+
r2 = _nt_QuantExact
|
323
|
+
s0 << r2
|
324
|
+
if r2
|
325
|
+
i4, s4 = index, []
|
326
|
+
if (match_len = has_terminal?(",", false, index))
|
327
|
+
r5 = true
|
328
|
+
@index += match_len
|
358
329
|
else
|
359
|
-
|
330
|
+
terminal_parse_failure('","')
|
331
|
+
r5 = nil
|
332
|
+
end
|
333
|
+
s4 << r5
|
334
|
+
if r5
|
335
|
+
r7 = _nt_QuantExact
|
336
|
+
if r7
|
337
|
+
r6 = r7
|
338
|
+
else
|
339
|
+
r6 = instantiate_node(SyntaxNode,input, index...index)
|
340
|
+
end
|
341
|
+
s4 << r6
|
342
|
+
end
|
343
|
+
if s4.last
|
344
|
+
r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
|
345
|
+
r4.extend(RangeQuantifier0)
|
346
|
+
else
|
347
|
+
@index = i4
|
348
|
+
r4 = nil
|
349
|
+
end
|
350
|
+
if r4
|
351
|
+
r3 = r4
|
352
|
+
else
|
353
|
+
r3 = instantiate_node(SyntaxNode,input, index...index)
|
354
|
+
end
|
355
|
+
s0 << r3
|
356
|
+
if r3
|
357
|
+
if (match_len = has_terminal?("}", false, index))
|
358
|
+
r8 = true
|
359
|
+
@index += match_len
|
360
|
+
else
|
361
|
+
terminal_parse_failure('"}"')
|
362
|
+
r8 = nil
|
363
|
+
end
|
364
|
+
s0 << r8
|
360
365
|
end
|
361
|
-
s3 << r5
|
362
|
-
end
|
363
|
-
if s3.last
|
364
|
-
r3 = instantiate_node(SyntaxNode,input, i3...index, s3)
|
365
|
-
r3.extend(Quantity0)
|
366
|
-
else
|
367
|
-
@index = i3
|
368
|
-
r3 = nil
|
369
|
-
end
|
370
|
-
if r3
|
371
|
-
r2 = r3
|
372
|
-
else
|
373
|
-
r2 = instantiate_node(SyntaxNode,input, index...index)
|
374
366
|
end
|
375
|
-
s0 << r2
|
376
367
|
end
|
377
368
|
if s0.last
|
378
369
|
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
379
|
-
r0.extend(
|
380
|
-
r0.extend(
|
370
|
+
r0.extend(RangeQuantifier1)
|
371
|
+
r0.extend(RangeQuantifier2)
|
381
372
|
else
|
382
373
|
@index = i0
|
383
374
|
r0 = nil
|
384
375
|
end
|
385
376
|
|
386
|
-
node_cache[:
|
377
|
+
node_cache[:range_quantifier][start_index] = r0
|
387
378
|
|
388
379
|
r0
|
389
380
|
end
|
@@ -547,11 +538,11 @@ module IREGEXPGRAMMAR
|
|
547
538
|
r0.extend(NormalChar0)
|
548
539
|
r0.extend(NormalChar0)
|
549
540
|
else
|
550
|
-
if
|
541
|
+
if (match_len = has_terminal?(",", false, index))
|
551
542
|
r2 = true
|
552
|
-
@index +=
|
543
|
+
@index += match_len
|
553
544
|
else
|
554
|
-
terminal_parse_failure('
|
545
|
+
terminal_parse_failure('","')
|
555
546
|
r2 = nil
|
556
547
|
end
|
557
548
|
if r2
|
@@ -560,11 +551,11 @@ module IREGEXPGRAMMAR
|
|
560
551
|
r0.extend(NormalChar0)
|
561
552
|
r0.extend(NormalChar0)
|
562
553
|
else
|
563
|
-
if
|
554
|
+
if (match_len = has_terminal?("-", false, index))
|
564
555
|
r3 = true
|
565
|
-
@index +=
|
556
|
+
@index += match_len
|
566
557
|
else
|
567
|
-
terminal_parse_failure('
|
558
|
+
terminal_parse_failure('"-"')
|
568
559
|
r3 = nil
|
569
560
|
end
|
570
561
|
if r3
|
@@ -573,11 +564,11 @@ module IREGEXPGRAMMAR
|
|
573
564
|
r0.extend(NormalChar0)
|
574
565
|
r0.extend(NormalChar0)
|
575
566
|
else
|
576
|
-
if has_terminal?(@regexps[gr = '\A[
|
567
|
+
if has_terminal?(@regexps[gr = '\A[/->]'] ||= Regexp.new(gr), :regexp, index)
|
577
568
|
r4 = true
|
578
569
|
@index += 1
|
579
570
|
else
|
580
|
-
terminal_parse_failure('[
|
571
|
+
terminal_parse_failure('[/->]')
|
581
572
|
r4 = nil
|
582
573
|
end
|
583
574
|
if r4
|
@@ -586,11 +577,11 @@ module IREGEXPGRAMMAR
|
|
586
577
|
r0.extend(NormalChar0)
|
587
578
|
r0.extend(NormalChar0)
|
588
579
|
else
|
589
|
-
if has_terminal?(@regexps[gr = '\A[
|
580
|
+
if has_terminal?(@regexps[gr = '\A[@-Z]'] ||= Regexp.new(gr), :regexp, index)
|
590
581
|
r5 = true
|
591
582
|
@index += 1
|
592
583
|
else
|
593
|
-
terminal_parse_failure('[
|
584
|
+
terminal_parse_failure('[@-Z]')
|
594
585
|
r5 = nil
|
595
586
|
end
|
596
587
|
if r5
|
@@ -599,11 +590,11 @@ module IREGEXPGRAMMAR
|
|
599
590
|
r0.extend(NormalChar0)
|
600
591
|
r0.extend(NormalChar0)
|
601
592
|
else
|
602
|
-
if has_terminal?(@regexps[gr = '\A[
|
593
|
+
if has_terminal?(@regexps[gr = '\A[\\^-z]'] ||= Regexp.new(gr), :regexp, index)
|
603
594
|
r6 = true
|
604
595
|
@index += 1
|
605
596
|
else
|
606
|
-
terminal_parse_failure('[
|
597
|
+
terminal_parse_failure('[\\^-z]')
|
607
598
|
r6 = nil
|
608
599
|
end
|
609
600
|
if r6
|
@@ -612,8 +603,22 @@ module IREGEXPGRAMMAR
|
|
612
603
|
r0.extend(NormalChar0)
|
613
604
|
r0.extend(NormalChar0)
|
614
605
|
else
|
615
|
-
@
|
616
|
-
|
606
|
+
if has_terminal?(@regexps[gr = '\A[~-]'] ||= Regexp.new(gr), :regexp, index)
|
607
|
+
r7 = true
|
608
|
+
@index += 1
|
609
|
+
else
|
610
|
+
terminal_parse_failure('[~-]')
|
611
|
+
r7 = nil
|
612
|
+
end
|
613
|
+
if r7
|
614
|
+
r7 = SyntaxNode.new(input, (index-1)...index) if r7 == true
|
615
|
+
r0 = r7
|
616
|
+
r0.extend(NormalChar0)
|
617
|
+
r0.extend(NormalChar0)
|
618
|
+
else
|
619
|
+
@index = i0
|
620
|
+
r0 = nil
|
621
|
+
end
|
617
622
|
end
|
618
623
|
end
|
619
624
|
end
|
@@ -726,85 +731,97 @@ module IREGEXPGRAMMAR
|
|
726
731
|
r3 = SyntaxNode.new(input, (index-1)...index) if r3 == true
|
727
732
|
r2 = r3
|
728
733
|
else
|
729
|
-
if
|
734
|
+
if (match_len = has_terminal?("-", false, index))
|
730
735
|
r4 = true
|
731
|
-
@index +=
|
736
|
+
@index += match_len
|
732
737
|
else
|
733
|
-
terminal_parse_failure('
|
738
|
+
terminal_parse_failure('"-"')
|
734
739
|
r4 = nil
|
735
740
|
end
|
736
741
|
if r4
|
737
742
|
r4 = SyntaxNode.new(input, (index-1)...index) if r4 == true
|
738
743
|
r2 = r4
|
739
744
|
else
|
740
|
-
if (match_len = has_terminal?("
|
745
|
+
if (match_len = has_terminal?(".", false, index))
|
741
746
|
r5 = true
|
742
747
|
@index += match_len
|
743
748
|
else
|
744
|
-
terminal_parse_failure('"
|
749
|
+
terminal_parse_failure('"."')
|
745
750
|
r5 = nil
|
746
751
|
end
|
747
752
|
if r5
|
748
753
|
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
749
754
|
r2 = r5
|
750
755
|
else
|
751
|
-
if
|
756
|
+
if (match_len = has_terminal?("?", false, index))
|
752
757
|
r6 = true
|
753
|
-
@index +=
|
758
|
+
@index += match_len
|
754
759
|
else
|
755
|
-
terminal_parse_failure('
|
760
|
+
terminal_parse_failure('"?"')
|
756
761
|
r6 = nil
|
757
762
|
end
|
758
763
|
if r6
|
759
764
|
r6 = SyntaxNode.new(input, (index-1)...index) if r6 == true
|
760
765
|
r2 = r6
|
761
766
|
else
|
762
|
-
if (
|
767
|
+
if has_terminal?(@regexps[gr = '\A[\\[-\\^]'] ||= Regexp.new(gr), :regexp, index)
|
763
768
|
r7 = true
|
764
|
-
@index +=
|
769
|
+
@index += 1
|
765
770
|
else
|
766
|
-
terminal_parse_failure('
|
771
|
+
terminal_parse_failure('[\\[-\\^]')
|
767
772
|
r7 = nil
|
768
773
|
end
|
769
774
|
if r7
|
770
775
|
r7 = SyntaxNode.new(input, (index-1)...index) if r7 == true
|
771
776
|
r2 = r7
|
772
777
|
else
|
773
|
-
if (match_len = has_terminal?("
|
778
|
+
if (match_len = has_terminal?("n", false, index))
|
774
779
|
r8 = true
|
775
780
|
@index += match_len
|
776
781
|
else
|
777
|
-
terminal_parse_failure('"
|
782
|
+
terminal_parse_failure('"n"')
|
778
783
|
r8 = nil
|
779
784
|
end
|
780
785
|
if r8
|
781
786
|
r8 = SyntaxNode.new(input, (index-1)...index) if r8 == true
|
782
787
|
r2 = r8
|
783
788
|
else
|
784
|
-
if (match_len = has_terminal?("
|
789
|
+
if (match_len = has_terminal?("r", false, index))
|
785
790
|
r9 = true
|
786
791
|
@index += match_len
|
787
792
|
else
|
788
|
-
terminal_parse_failure('"
|
793
|
+
terminal_parse_failure('"r"')
|
789
794
|
r9 = nil
|
790
795
|
end
|
791
796
|
if r9
|
792
797
|
r9 = SyntaxNode.new(input, (index-1)...index) if r9 == true
|
793
798
|
r2 = r9
|
794
799
|
else
|
795
|
-
if
|
800
|
+
if (match_len = has_terminal?("t", false, index))
|
796
801
|
r10 = true
|
797
|
-
@index +=
|
802
|
+
@index += match_len
|
798
803
|
else
|
799
|
-
terminal_parse_failure('
|
804
|
+
terminal_parse_failure('"t"')
|
800
805
|
r10 = nil
|
801
806
|
end
|
802
807
|
if r10
|
803
808
|
r10 = SyntaxNode.new(input, (index-1)...index) if r10 == true
|
804
809
|
r2 = r10
|
805
810
|
else
|
806
|
-
@
|
807
|
-
|
811
|
+
if has_terminal?(@regexps[gr = '\A[\\{-\\}]'] ||= Regexp.new(gr), :regexp, index)
|
812
|
+
r11 = true
|
813
|
+
@index += 1
|
814
|
+
else
|
815
|
+
terminal_parse_failure('[\\{-\\}]')
|
816
|
+
r11 = nil
|
817
|
+
end
|
818
|
+
if r11
|
819
|
+
r11 = SyntaxNode.new(input, (index-1)...index) if r11 == true
|
820
|
+
r2 = r11
|
821
|
+
else
|
822
|
+
@index = i2
|
823
|
+
r2 = nil
|
824
|
+
end
|
808
825
|
end
|
809
826
|
end
|
810
827
|
end
|
@@ -1390,41 +1407,65 @@ module IREGEXPGRAMMAR
|
|
1390
1407
|
s0 << r1
|
1391
1408
|
if r1
|
1392
1409
|
i3 = index
|
1393
|
-
if
|
1410
|
+
if (match_len = has_terminal?("l", false, index))
|
1394
1411
|
r4 = true
|
1395
|
-
@index +=
|
1412
|
+
@index += match_len
|
1396
1413
|
else
|
1397
|
-
terminal_parse_failure('
|
1414
|
+
terminal_parse_failure('"l"')
|
1398
1415
|
r4 = nil
|
1399
1416
|
end
|
1400
1417
|
if r4
|
1401
1418
|
r4 = SyntaxNode.new(input, (index-1)...index) if r4 == true
|
1402
1419
|
r3 = r4
|
1403
1420
|
else
|
1404
|
-
if (match_len = has_terminal?("
|
1421
|
+
if (match_len = has_terminal?("m", false, index))
|
1405
1422
|
r5 = true
|
1406
1423
|
@index += match_len
|
1407
1424
|
else
|
1408
|
-
terminal_parse_failure('"
|
1425
|
+
terminal_parse_failure('"m"')
|
1409
1426
|
r5 = nil
|
1410
1427
|
end
|
1411
1428
|
if r5
|
1412
1429
|
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
1413
1430
|
r3 = r5
|
1414
1431
|
else
|
1415
|
-
if
|
1432
|
+
if (match_len = has_terminal?("o", false, index))
|
1416
1433
|
r6 = true
|
1417
|
-
@index +=
|
1434
|
+
@index += match_len
|
1418
1435
|
else
|
1419
|
-
terminal_parse_failure('
|
1436
|
+
terminal_parse_failure('"o"')
|
1420
1437
|
r6 = nil
|
1421
1438
|
end
|
1422
1439
|
if r6
|
1423
1440
|
r6 = SyntaxNode.new(input, (index-1)...index) if r6 == true
|
1424
1441
|
r3 = r6
|
1425
1442
|
else
|
1426
|
-
|
1427
|
-
|
1443
|
+
if (match_len = has_terminal?("t", false, index))
|
1444
|
+
r7 = true
|
1445
|
+
@index += match_len
|
1446
|
+
else
|
1447
|
+
terminal_parse_failure('"t"')
|
1448
|
+
r7 = nil
|
1449
|
+
end
|
1450
|
+
if r7
|
1451
|
+
r7 = SyntaxNode.new(input, (index-1)...index) if r7 == true
|
1452
|
+
r3 = r7
|
1453
|
+
else
|
1454
|
+
if (match_len = has_terminal?("u", false, index))
|
1455
|
+
r8 = true
|
1456
|
+
@index += match_len
|
1457
|
+
else
|
1458
|
+
terminal_parse_failure('"u"')
|
1459
|
+
r8 = nil
|
1460
|
+
end
|
1461
|
+
if r8
|
1462
|
+
r8 = SyntaxNode.new(input, (index-1)...index) if r8 == true
|
1463
|
+
r3 = r8
|
1464
|
+
else
|
1465
|
+
@index = i3
|
1466
|
+
r3 = nil
|
1467
|
+
end
|
1468
|
+
end
|
1428
1469
|
end
|
1429
1470
|
end
|
1430
1471
|
end
|
@@ -1934,19 +1975,31 @@ module IREGEXPGRAMMAR
|
|
1934
1975
|
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
1935
1976
|
r3 = r5
|
1936
1977
|
else
|
1937
|
-
if
|
1978
|
+
if (match_len = has_terminal?("n", false, index))
|
1938
1979
|
r6 = true
|
1939
|
-
@index +=
|
1980
|
+
@index += match_len
|
1940
1981
|
else
|
1941
|
-
terminal_parse_failure('
|
1982
|
+
terminal_parse_failure('"n"')
|
1942
1983
|
r6 = nil
|
1943
1984
|
end
|
1944
1985
|
if r6
|
1945
1986
|
r6 = SyntaxNode.new(input, (index-1)...index) if r6 == true
|
1946
1987
|
r3 = r6
|
1947
1988
|
else
|
1948
|
-
|
1949
|
-
|
1989
|
+
if (match_len = has_terminal?("o", false, index))
|
1990
|
+
r7 = true
|
1991
|
+
@index += match_len
|
1992
|
+
else
|
1993
|
+
terminal_parse_failure('"o"')
|
1994
|
+
r7 = nil
|
1995
|
+
end
|
1996
|
+
if r7
|
1997
|
+
r7 = SyntaxNode.new(input, (index-1)...index) if r7 == true
|
1998
|
+
r3 = r7
|
1999
|
+
else
|
2000
|
+
@index = i3
|
2001
|
+
r3 = nil
|
2002
|
+
end
|
1950
2003
|
end
|
1951
2004
|
end
|
1952
2005
|
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
=
|
2
|
+
=.
|
3
|
+
=a
|
4
|
+
=🤔
|
5
|
+
-\
|
6
|
+
-\v
|
7
|
+
-\\\
|
8
|
+
-\\\v
|
9
|
+
-\\\z
|
10
|
+
-\ca
|
11
|
+
=\(\)\*\+\.\?\[\\
|
12
|
+
=\]\{\|\}\t\r\n
|
13
|
+
=a*
|
14
|
+
=🤔*
|
15
|
+
-a**
|
16
|
+
=a|b
|
17
|
+
=aa|bb
|
18
|
+
=aa|
|
19
|
+
=|bb
|
20
|
+
=aa|bb*
|
21
|
+
=aa|bb+
|
22
|
+
=aa|bb?
|
23
|
+
=aa|bb{6}
|
24
|
+
=aa|bb{6,}
|
25
|
+
=aa|bb{6,8}
|
26
|
+
=(aa)
|
27
|
+
-aa)
|
28
|
+
-(aa
|
29
|
+
=aa(bb|cc)dd
|
30
|
+
-aabb|cc)dd
|
31
|
+
-aa(bb|ccdd
|
32
|
+
=[abc]
|
33
|
+
-[a
|
34
|
+
-a]
|
35
|
+
=[a🤔b]
|
36
|
+
=[^abc]
|
37
|
+
=[ab-f]
|
38
|
+
=[^ab-f]
|
39
|
+
=[^-ab-f]
|
40
|
+
=[-ab-f]
|
41
|
+
=[ab-f-]
|
42
|
+
=[a\n-\r-]
|
43
|
+
=\p{Nd}
|
44
|
+
=\P{Ll}
|
45
|
+
=\P{L}
|
46
|
+
=\p{Co}
|
47
|
+
*[\p{Nd}]
|
48
|
+
*[\P{Nd}]
|
49
|
+
*[^\p{Nd}\p{Ll}]
|
@@ -0,0 +1,114 @@
|
|
1
|
+
= ["seq"]
|
2
|
+
""
|
3
|
+
=. ["dot"]
|
4
|
+
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t"
|
5
|
+
=a "a"
|
6
|
+
"a"
|
7
|
+
=🤔 "🤔"
|
8
|
+
"🤔"
|
9
|
+
-\ Expected one of [\(-\+], "-", ".", "?", [\[-\^], "n", "r", "t", [\{-\}] at line 1, column 2 (byte 2):
|
10
|
+
\
|
11
|
+
~^
|
12
|
+
-\v Expected one of [\(-\+], "-", ".", "?", [\[-\^], "n", "r", "t", [\{-\}] at line 1, column 2 (byte 2):
|
13
|
+
\v
|
14
|
+
~^
|
15
|
+
-\\\ Expected one of [\(-\+], "-", ".", "?", [\[-\^], "n", "r", "t", [\{-\}] at line 1, column 4 (byte 4):
|
16
|
+
\\\
|
17
|
+
~~~^
|
18
|
+
-\\\v Expected one of [\(-\+], "-", ".", "?", [\[-\^], "n", "r", "t", [\{-\}] at line 1, column 4 (byte 4):
|
19
|
+
\\\v
|
20
|
+
~~~^
|
21
|
+
-\\\z Expected one of [\(-\+], "-", ".", "?", [\[-\^], "n", "r", "t", [\{-\}] at line 1, column 4 (byte 4):
|
22
|
+
\\\z
|
23
|
+
~~~^
|
24
|
+
-\ca Expected one of [\(-\+], "-", ".", "?", [\[-\^], "n", "r", "t", [\{-\}] at line 1, column 2 (byte 2):
|
25
|
+
\ca
|
26
|
+
~^
|
27
|
+
=\(\)\*\+\.\?\[\\ ["seq", "(", ")", "*", "+", ".", "?", "[", "\\"]
|
28
|
+
"()*+.?[\\"
|
29
|
+
=\]\{\|\}\t\r\n ["seq", "]", "{", "|", "}", "\t", "\r", "\n"]
|
30
|
+
"]{|}\t\r\n"
|
31
|
+
=a* ["rep", 0, false, "a"]
|
32
|
+
"", "a", "aa", "aaa", "aaaa"
|
33
|
+
=🤔* ["rep", 0, false, "🤔"]
|
34
|
+
"", "🤔", "🤔🤔", "🤔🤔🤔", "🤔🤔🤔🤔"
|
35
|
+
-a** Expected one of [\u0000-'], ",", "-", [/->], [@-Z], [\^-z], [~-], ".", "\\", "\\p{", "\\P{", "[", "(", "|" at line 1, column 3 (byte 3):
|
36
|
+
a**
|
37
|
+
~~^
|
38
|
+
=a|b ["alt", "a", "b"]
|
39
|
+
"a", "b"
|
40
|
+
=aa|bb ["alt", ["seq", "a", "a"], ["seq", "b", "b"]]
|
41
|
+
"aa", "bb"
|
42
|
+
=aa| ["alt", ["seq", "a", "a"], ["seq"]]
|
43
|
+
"aa", ""
|
44
|
+
=|bb ["alt", ["seq"], ["seq", "b", "b"]]
|
45
|
+
"", "bb"
|
46
|
+
=aa|bb* ["alt", ["seq", "a", "a"], ["seq", "b", ["rep", 0, false, "b"]]]
|
47
|
+
"aa", "b", "bb", "bbb", "bbbb", "bbbbb"
|
48
|
+
=aa|bb+ ["alt", ["seq", "a", "a"], ["seq", "b", ["rep", 1, false, "b"]]]
|
49
|
+
"aa", "bb", "bbb", "bbbb", "bbbbb", "bbbbbb"
|
50
|
+
=aa|bb? ["alt", ["seq", "a", "a"], ["seq", "b", ["rep", 0, 1, "b"]]]
|
51
|
+
"aa", "b", "bb"
|
52
|
+
=aa|bb{6} ["alt", ["seq", "a", "a"], ["seq", "b", ["rep", 6, 6, "b"]]]
|
53
|
+
"aa", "bbbbbbb"
|
54
|
+
=aa|bb{6,} ["alt", ["seq", "a", "a"], ["seq", "b", ["rep", 6, false, "b"]]]
|
55
|
+
"aa", "bbbbbbb", "bbbbbbbb", "bbbbbbbbb", "bbbbbbbbbb", "bbbbbbbbbbb"
|
56
|
+
=aa|bb{6,8} ["alt", ["seq", "a", "a"], ["seq", "b", ["rep", 6, 8, "b"]]]
|
57
|
+
"aa", "bbbbbbb", "bbbbbbbb", "bbbbbbbbb"
|
58
|
+
=(aa) ["seq", "a", "a"]
|
59
|
+
"aa"
|
60
|
+
-aa) Expected one of "*", "+", "?", "{", [\u0000-'], ",", "-", [/->], [@-Z], [\^-z], [~-], ".", "\\", "\\p{", "\\P{", "[", "(", "|" at line 1, column 3 (byte 3):
|
61
|
+
aa)
|
62
|
+
~~^
|
63
|
+
-(aa Expected one of "*", "+", "?", "{", [\u0000-'], ",", "-", [/->], [@-Z], [\^-z], [~-], ".", "\\", "\\p{", "\\P{", "[", "(", "|", ")" at line 1, column 4 (byte 4):
|
64
|
+
(aa
|
65
|
+
~~~^
|
66
|
+
=aa(bb|cc)dd ["seq", "a", "a", ["alt", ["seq", "b", "b"], ["seq", "c", "c"]], "d", "d"]
|
67
|
+
"aabbdd", "aaccdd"
|
68
|
+
-aabb|cc)dd Expected one of "*", "+", "?", "{", [\u0000-'], ",", "-", [/->], [@-Z], [\^-z], [~-], ".", "\\", "\\p{", "\\P{", "[", "(", "|" at line 1, column 8 (byte 8):
|
69
|
+
aabb|cc)dd
|
70
|
+
~~~~~~~^
|
71
|
+
-aa(bb|ccdd Expected one of "*", "+", "?", "{", [\u0000-'], ",", "-", [/->], [@-Z], [\^-z], [~-], ".", "\\", "\\p{", "\\P{", "[", "(", "|", ")" at line 1, column 11 (byte 11):
|
72
|
+
aa(bb|ccdd
|
73
|
+
~~~~~~~~~~^
|
74
|
+
=[abc] ["pos", "a", "b", "c"]
|
75
|
+
"a", "b", "c"
|
76
|
+
-[a Expected one of "-", [\u0000-,], [\.-Z], [\^-], "\\", "\\p{", "\\P{", "]" at line 1, column 3 (byte 3):
|
77
|
+
[a
|
78
|
+
~~^
|
79
|
+
-a] Expected one of "*", "+", "?", "{", [\u0000-'], ",", "-", [/->], [@-Z], [\^-z], [~-], ".", "\\", "\\p{", "\\P{", "[", "(", "|" at line 1, column 2 (byte 2):
|
80
|
+
a]
|
81
|
+
~^
|
82
|
+
=[a🤔b] ["pos", "a", "🤔", "b"]
|
83
|
+
"a", "🤔", "b"
|
84
|
+
=[^abc] ["neg", "a", "b", "c"]
|
85
|
+
"\n", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v"
|
86
|
+
=[ab-f] ["pos", "a", ["-", "b", "f"]]
|
87
|
+
"a", "b", "c", "d", "e", "f"
|
88
|
+
=[^ab-f] ["neg", "a", ["-", "b", "f"]]
|
89
|
+
"\n", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y"
|
90
|
+
=[^-ab-f] ["neg", "-", "a", ["-", "b", "f"]]
|
91
|
+
"\n", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y"
|
92
|
+
=[-ab-f] ["pos", "-", "a", ["-", "b", "f"]]
|
93
|
+
"-", "a", "b", "c", "d", "e", "f"
|
94
|
+
=[ab-f-] ["pos", "a", ["-", "b", "f"], "-"]
|
95
|
+
"a", "b", "c", "d", "e", "f", "-"
|
96
|
+
=[a\n-\r-] ["pos", "a", ["-", "\n", "\r"], "-"]
|
97
|
+
"a", "\n", "\u000b", "\f", "\r", "-"
|
98
|
+
=\p{Nd} ["p", "Nd"]
|
99
|
+
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "٠", "١", "٢", "٣", "٤", "٥", "٦", "٧", "٨", "٩"
|
100
|
+
=\P{Ll} ["P", "Ll"]
|
101
|
+
"\n", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S"
|
102
|
+
=\P{L} ["P", "L"]
|
103
|
+
"\n", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "!", "\"", "#", "%", "&", "'", "(", ")", "*"
|
104
|
+
=\p{Co} ["p", "Co"]
|
105
|
+
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""
|
106
|
+
*[\p{Nd}] ["pos", ["p", "Nd"]]
|
107
|
+
bug in regexp example generator:
|
108
|
+
"p", "{", "N", "d", "}"
|
109
|
+
*[\P{Nd}] ["pos", ["P", "Nd"]]
|
110
|
+
bug in regexp example generator:
|
111
|
+
"P", "{", "N", "d", "}"
|
112
|
+
*[^\p{Nd}\p{Ll}] ["neg", ["p", "Nd"], ["p", "Ll"]]
|
113
|
+
bug in regexp example generator:
|
114
|
+
"\n", "a", "b", "c", "e", "f", "g", "h", "i", "j", "k", "m", "n", "o", "q", "r", "s", "t", "u", "v"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iregexp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Carsten Bormann
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-05-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -70,17 +70,17 @@ description: iregexp implements converters and miscellaneous tools for I-Regexp
|
|
70
70
|
email: cabo@tzi.org
|
71
71
|
executables:
|
72
72
|
- iregexp
|
73
|
-
- iregexp~
|
74
73
|
extensions: []
|
75
74
|
extra_rdoc_files: []
|
76
75
|
files:
|
77
76
|
- bin/iregexp
|
78
|
-
- bin/iregexp~
|
79
77
|
- iregexp.gemspec
|
80
78
|
- lib/iregexp.rb
|
81
79
|
- lib/parser/iregexp-util.rb
|
82
80
|
- lib/parser/iregexpgrammar.rb
|
83
81
|
- lib/writer/iregexp-writer.rb
|
82
|
+
- test-data/simple.irl
|
83
|
+
- test-data/simple.out
|
84
84
|
homepage: http://github.com/cabo/iregexp
|
85
85
|
licenses:
|
86
86
|
- MIT
|
data/bin/iregexp~
DELETED
@@ -1,102 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'pp'
|
3
|
-
require 'yaml'
|
4
|
-
require 'treetop'
|
5
|
-
require 'json'
|
6
|
-
|
7
|
-
require_relative '../lib/jpt'
|
8
|
-
|
9
|
-
Encoding.default_external = Encoding::UTF_8
|
10
|
-
require 'optparse'
|
11
|
-
require 'ostruct'
|
12
|
-
|
13
|
-
FUNCSIG_CHARS = {"l" => :logical, "n" => :nodes, "v" => :value}
|
14
|
-
|
15
|
-
$options = OpenStruct.new
|
16
|
-
begin
|
17
|
-
op = OptionParser.new do |opts|
|
18
|
-
opts.banner = "Usage: jpt.rb [options] file.jp"
|
19
|
-
|
20
|
-
opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
|
21
|
-
$options.verbose = v
|
22
|
-
end
|
23
|
-
opts.on("-l", "--[no-]lines", "multi-line mode") do |v|
|
24
|
-
$options.lines = v
|
25
|
-
end
|
26
|
-
opts.on("-q", "--[no-]test", "test-file mode") do |v|
|
27
|
-
$options.test = v
|
28
|
-
end
|
29
|
-
opts.on("-fFUNCSIG", "--[no-]f=FUNCSIG", "add function signature name=rppp") do |v|
|
30
|
-
fail "funcsig format must be name=rppp" unless v =~ /\A([a-z][_a-z0-9]*)-([lnv]+)\z/
|
31
|
-
|
32
|
-
JPTType.add_funcsig($1, $2)
|
33
|
-
end
|
34
|
-
opts.on("-tFMT", "--to=FMT", [:basic, :neat, :json, :yaml, :enum, :jp], "Target format") do |v|
|
35
|
-
$options.target = v
|
36
|
-
end
|
37
|
-
end
|
38
|
-
op.parse!
|
39
|
-
rescue Exception => e
|
40
|
-
warn e
|
41
|
-
exit 1
|
42
|
-
end
|
43
|
-
|
44
|
-
if ARGV == []
|
45
|
-
puts op
|
46
|
-
exit 1
|
47
|
-
end
|
48
|
-
jp_file = ARGF.read
|
49
|
-
|
50
|
-
if $options.test
|
51
|
-
argument = query = output = nil
|
52
|
-
jp_file.scan(/((?:^(?:$|[^$=].*)\n)+)|([$].*)|=(.*)|#.*/) do |arg,qy,out|
|
53
|
-
begin
|
54
|
-
if arg
|
55
|
-
argument = JSON.parse(arg)
|
56
|
-
puts
|
57
|
-
puts JSON.dump(argument)
|
58
|
-
elsif qy
|
59
|
-
jpt = JPT.from_jp(qy)
|
60
|
-
output = jpt.apply(argument)
|
61
|
-
print jpt.tree.inspect << " "
|
62
|
-
puts "➔ #{JSON.dump(output)}"
|
63
|
-
elsif out
|
64
|
-
suggested = JSON.parse(out)
|
65
|
-
if output != suggested
|
66
|
-
p [:SUGGESTED, suggested]
|
67
|
-
end
|
68
|
-
end
|
69
|
-
rescue => e
|
70
|
-
warn "*** #{e.detailed_message} #{e.backtrace}"
|
71
|
-
end
|
72
|
-
end
|
73
|
-
elsif $options.lines
|
74
|
-
lines = jp_file.lines(chomp: true)
|
75
|
-
col = lines.map(&:length).max
|
76
|
-
form = "%-#{col}s %s"
|
77
|
-
lines.each do |line|
|
78
|
-
jpt = JPT.from_jp(line)
|
79
|
-
result = jpt.tree.inspect
|
80
|
-
puts form % [line, result]
|
81
|
-
end
|
82
|
-
else
|
83
|
-
|
84
|
-
jpt = JPT.from_jp(jp_file)
|
85
|
-
result = jpt.tree
|
86
|
-
|
87
|
-
case $options.target
|
88
|
-
when :basic, nil
|
89
|
-
pp result
|
90
|
-
when :neat, :json
|
91
|
-
require 'neatjson'
|
92
|
-
puts JSON.neat_generate(result, after_comma: 1, after_colon: 1)
|
93
|
-
when :yaml
|
94
|
-
puts result.to_yaml
|
95
|
-
when :jp
|
96
|
-
require_relative '../lib/writer/jp-writer.rb'
|
97
|
-
puts jpt.to_s
|
98
|
-
else
|
99
|
-
warn ["Unknown target format: ", $options.target].inspect
|
100
|
-
end
|
101
|
-
|
102
|
-
end
|