iregexp 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/iregexp +1 -1
- data/iregexp.gemspec +2 -2
- data/lib/iregexp.rb +2 -2
- data/lib/parser/iregexpgrammar.rb +197 -144
- data/test-data/simple.irl +49 -0
- data/test-data/simple.out +114 -0
- metadata +4 -4
- data/bin/iregexp~ +0 -102
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b48ffdc37779e3d0a862960ac7fcfd685de4cf59ef24dbc7556557372c5058c5
|
4
|
+
data.tar.gz: 074b33ad95b11cef1dc154f90eeee325432172d1721da222b08ab178791ae504
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 423cf8699b215e8aa87d6480a15be10d4964e1b51ebe736e721369c0d5a7d499786af1ab600b42d478f1ece5363557cd94c9d7bdc631a28ae7d7d935631eb102
|
7
|
+
data.tar.gz: aa366d615c0e246d50653a41efa1f852caf596db29952c0864c5fa63a3a9f69e545fc3242ff712726555fff97c5cfaa7e46a52de0420dfb302365563e15066a2
|
data/bin/iregexp
CHANGED
@@ -15,7 +15,7 @@ FUNCSIG_CHARS = {"l" => :logical, "n" => :nodes, "v" => :value}
|
|
15
15
|
$options = OpenStruct.new
|
16
16
|
begin
|
17
17
|
op = OptionParser.new do |opts|
|
18
|
-
opts.banner = "Usage:
|
18
|
+
opts.banner = "Usage: iregexp [options] file | -e expr"
|
19
19
|
|
20
20
|
opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
|
21
21
|
$options.verbose = v
|
data/iregexp.gemspec
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "iregexp"
|
3
|
-
s.version = "0.0.
|
3
|
+
s.version = "0.0.3"
|
4
4
|
s.summary = "I-Regexp Tools"
|
5
5
|
s.description = %q{iregexp implements converters and miscellaneous tools for I-Regexp}
|
6
6
|
s.author = "Carsten Bormann"
|
7
7
|
s.email = "cabo@tzi.org"
|
8
8
|
s.license = "MIT"
|
9
9
|
s.homepage = "http://github.com/cabo/iregexp"
|
10
|
-
s.files = Dir['lib/**/*.rb'] + %w(iregexp.gemspec) + Dir['data/*'] + Dir['bin/**/*.rb']
|
10
|
+
s.files = Dir['lib/**/*.rb'] + %w(iregexp.gemspec) + Dir['data/*'] + Dir['test-data/*'] + Dir['bin/**/*.rb']
|
11
11
|
s.executables = Dir['bin/*'].map {|x| File.basename(x)}
|
12
12
|
s.required_ruby_version = '>= 3.0'
|
13
13
|
|
data/lib/iregexp.rb
CHANGED
@@ -16,7 +16,7 @@ class IREGEXP
|
|
16
16
|
reason << line
|
17
17
|
reason << "#{'~' * (parser.failure_column - 1)}^"
|
18
18
|
end
|
19
|
-
reason.join("\n").gsub(/[\u0000-\u0009\u000b
|
19
|
+
reason.join("\n").gsub(/[\u0000-\u0009\u000b-\u001f\u007f]/) {|c| "\\u%04x" % c.ord}
|
20
20
|
end
|
21
21
|
|
22
22
|
SAFE_FN = /\A[-._a-zA-Z0-9]+\z/
|
@@ -24,7 +24,7 @@ class IREGEXP
|
|
24
24
|
def self.from_iregexp(s)
|
25
25
|
ast = @@parser.parse s
|
26
26
|
if !ast
|
27
|
-
fail ParseError.new(self.reason(@@parser, s))
|
27
|
+
fail ParseError.new("\n" << self.reason(@@parser, s))
|
28
28
|
end
|
29
29
|
ret = IREGEXP.new(ast)
|
30
30
|
|
@@ -196,19 +196,6 @@ module IREGEXPGRAMMAR
|
|
196
196
|
end
|
197
197
|
end
|
198
198
|
|
199
|
-
module Quantifier1
|
200
|
-
def quantity
|
201
|
-
elements[1]
|
202
|
-
end
|
203
|
-
|
204
|
-
end
|
205
|
-
|
206
|
-
module Quantifier2
|
207
|
-
def ast
|
208
|
-
quantity.ast
|
209
|
-
end
|
210
|
-
end
|
211
|
-
|
212
199
|
def _nt_quantifier
|
213
200
|
start_index = index
|
214
201
|
if node_cache[:quantifier].has_key?(index)
|
@@ -222,11 +209,11 @@ module IREGEXPGRAMMAR
|
|
222
209
|
|
223
210
|
i0 = index
|
224
211
|
i1 = index
|
225
|
-
if
|
212
|
+
if (match_len = has_terminal?("*", false, index))
|
226
213
|
r2 = true
|
227
|
-
@index +=
|
214
|
+
@index += match_len
|
228
215
|
else
|
229
|
-
terminal_parse_failure('
|
216
|
+
terminal_parse_failure('"*"')
|
230
217
|
r2 = nil
|
231
218
|
end
|
232
219
|
if r2
|
@@ -235,11 +222,11 @@ module IREGEXPGRAMMAR
|
|
235
222
|
r1.extend(Quantifier0)
|
236
223
|
r1.extend(Quantifier0)
|
237
224
|
else
|
238
|
-
if (match_len = has_terminal?("
|
225
|
+
if (match_len = has_terminal?("+", false, index))
|
239
226
|
r3 = true
|
240
227
|
@index += match_len
|
241
228
|
else
|
242
|
-
terminal_parse_failure('"
|
229
|
+
terminal_parse_failure('"+"')
|
243
230
|
r3 = nil
|
244
231
|
end
|
245
232
|
if r3
|
@@ -248,48 +235,32 @@ module IREGEXPGRAMMAR
|
|
248
235
|
r1.extend(Quantifier0)
|
249
236
|
r1.extend(Quantifier0)
|
250
237
|
else
|
251
|
-
|
252
|
-
|
238
|
+
if (match_len = has_terminal?("?", false, index))
|
239
|
+
r4 = true
|
240
|
+
@index += match_len
|
241
|
+
else
|
242
|
+
terminal_parse_failure('"?"')
|
243
|
+
r4 = nil
|
244
|
+
end
|
245
|
+
if r4
|
246
|
+
r4 = SyntaxNode.new(input, (index-1)...index) if r4 == true
|
247
|
+
r1 = r4
|
248
|
+
r1.extend(Quantifier0)
|
249
|
+
r1.extend(Quantifier0)
|
250
|
+
else
|
251
|
+
@index = i1
|
252
|
+
r1 = nil
|
253
|
+
end
|
253
254
|
end
|
254
255
|
end
|
255
256
|
if r1
|
256
257
|
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
257
258
|
r0 = r1
|
258
259
|
else
|
259
|
-
|
260
|
-
if (match_len = has_terminal?("{", false, index))
|
261
|
-
r5 = true
|
262
|
-
@index += match_len
|
263
|
-
else
|
264
|
-
terminal_parse_failure('"{"')
|
265
|
-
r5 = nil
|
266
|
-
end
|
267
|
-
s4 << r5
|
260
|
+
r5 = _nt_range_quantifier
|
268
261
|
if r5
|
269
|
-
|
270
|
-
|
271
|
-
if r6
|
272
|
-
if (match_len = has_terminal?("}", false, index))
|
273
|
-
r7 = true
|
274
|
-
@index += match_len
|
275
|
-
else
|
276
|
-
terminal_parse_failure('"}"')
|
277
|
-
r7 = nil
|
278
|
-
end
|
279
|
-
s4 << r7
|
280
|
-
end
|
281
|
-
end
|
282
|
-
if s4.last
|
283
|
-
r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
|
284
|
-
r4.extend(Quantifier1)
|
285
|
-
r4.extend(Quantifier2)
|
286
|
-
else
|
287
|
-
@index = i4
|
288
|
-
r4 = nil
|
289
|
-
end
|
290
|
-
if r4
|
291
|
-
r4 = SyntaxNode.new(input, (index-1)...index) if r4 == true
|
292
|
-
r0 = r4
|
262
|
+
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
263
|
+
r0 = r5
|
293
264
|
else
|
294
265
|
@index = i0
|
295
266
|
r0 = nil
|
@@ -301,20 +272,20 @@ module IREGEXPGRAMMAR
|
|
301
272
|
r0
|
302
273
|
end
|
303
274
|
|
304
|
-
module
|
275
|
+
module RangeQuantifier0
|
305
276
|
end
|
306
277
|
|
307
|
-
module
|
278
|
+
module RangeQuantifier1
|
308
279
|
def QuantExact
|
309
|
-
elements[
|
280
|
+
elements[1]
|
310
281
|
end
|
311
282
|
|
312
283
|
end
|
313
284
|
|
314
|
-
module
|
285
|
+
module RangeQuantifier2
|
315
286
|
def ast
|
316
|
-
l = elements[
|
317
|
-
r = if rp = elements[
|
287
|
+
l = elements[1].ast
|
288
|
+
r = if rp = elements[2].elements
|
318
289
|
if rp[1].text_value != ''
|
319
290
|
rp[1].ast
|
320
291
|
else
|
@@ -327,63 +298,83 @@ module IREGEXPGRAMMAR
|
|
327
298
|
end
|
328
299
|
end
|
329
300
|
|
330
|
-
def
|
301
|
+
def _nt_range_quantifier
|
331
302
|
start_index = index
|
332
|
-
if node_cache[:
|
333
|
-
cached = node_cache[:
|
303
|
+
if node_cache[:range_quantifier].has_key?(index)
|
304
|
+
cached = node_cache[:range_quantifier][index]
|
334
305
|
if cached
|
335
|
-
node_cache[:
|
306
|
+
node_cache[:range_quantifier][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
336
307
|
@index = cached.interval.end
|
337
308
|
end
|
338
309
|
return cached
|
339
310
|
end
|
340
311
|
|
341
312
|
i0, s0 = index, []
|
342
|
-
|
313
|
+
if (match_len = has_terminal?("{", false, index))
|
314
|
+
r1 = true
|
315
|
+
@index += match_len
|
316
|
+
else
|
317
|
+
terminal_parse_failure('"{"')
|
318
|
+
r1 = nil
|
319
|
+
end
|
343
320
|
s0 << r1
|
344
321
|
if r1
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
end
|
353
|
-
s3 << r4
|
354
|
-
if r4
|
355
|
-
r6 = _nt_QuantExact
|
356
|
-
if r6
|
357
|
-
r5 = r6
|
322
|
+
r2 = _nt_QuantExact
|
323
|
+
s0 << r2
|
324
|
+
if r2
|
325
|
+
i4, s4 = index, []
|
326
|
+
if (match_len = has_terminal?(",", false, index))
|
327
|
+
r5 = true
|
328
|
+
@index += match_len
|
358
329
|
else
|
359
|
-
|
330
|
+
terminal_parse_failure('","')
|
331
|
+
r5 = nil
|
332
|
+
end
|
333
|
+
s4 << r5
|
334
|
+
if r5
|
335
|
+
r7 = _nt_QuantExact
|
336
|
+
if r7
|
337
|
+
r6 = r7
|
338
|
+
else
|
339
|
+
r6 = instantiate_node(SyntaxNode,input, index...index)
|
340
|
+
end
|
341
|
+
s4 << r6
|
342
|
+
end
|
343
|
+
if s4.last
|
344
|
+
r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
|
345
|
+
r4.extend(RangeQuantifier0)
|
346
|
+
else
|
347
|
+
@index = i4
|
348
|
+
r4 = nil
|
349
|
+
end
|
350
|
+
if r4
|
351
|
+
r3 = r4
|
352
|
+
else
|
353
|
+
r3 = instantiate_node(SyntaxNode,input, index...index)
|
354
|
+
end
|
355
|
+
s0 << r3
|
356
|
+
if r3
|
357
|
+
if (match_len = has_terminal?("}", false, index))
|
358
|
+
r8 = true
|
359
|
+
@index += match_len
|
360
|
+
else
|
361
|
+
terminal_parse_failure('"}"')
|
362
|
+
r8 = nil
|
363
|
+
end
|
364
|
+
s0 << r8
|
360
365
|
end
|
361
|
-
s3 << r5
|
362
|
-
end
|
363
|
-
if s3.last
|
364
|
-
r3 = instantiate_node(SyntaxNode,input, i3...index, s3)
|
365
|
-
r3.extend(Quantity0)
|
366
|
-
else
|
367
|
-
@index = i3
|
368
|
-
r3 = nil
|
369
|
-
end
|
370
|
-
if r3
|
371
|
-
r2 = r3
|
372
|
-
else
|
373
|
-
r2 = instantiate_node(SyntaxNode,input, index...index)
|
374
366
|
end
|
375
|
-
s0 << r2
|
376
367
|
end
|
377
368
|
if s0.last
|
378
369
|
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
379
|
-
r0.extend(
|
380
|
-
r0.extend(
|
370
|
+
r0.extend(RangeQuantifier1)
|
371
|
+
r0.extend(RangeQuantifier2)
|
381
372
|
else
|
382
373
|
@index = i0
|
383
374
|
r0 = nil
|
384
375
|
end
|
385
376
|
|
386
|
-
node_cache[:
|
377
|
+
node_cache[:range_quantifier][start_index] = r0
|
387
378
|
|
388
379
|
r0
|
389
380
|
end
|
@@ -547,11 +538,11 @@ module IREGEXPGRAMMAR
|
|
547
538
|
r0.extend(NormalChar0)
|
548
539
|
r0.extend(NormalChar0)
|
549
540
|
else
|
550
|
-
if
|
541
|
+
if (match_len = has_terminal?(",", false, index))
|
551
542
|
r2 = true
|
552
|
-
@index +=
|
543
|
+
@index += match_len
|
553
544
|
else
|
554
|
-
terminal_parse_failure('
|
545
|
+
terminal_parse_failure('","')
|
555
546
|
r2 = nil
|
556
547
|
end
|
557
548
|
if r2
|
@@ -560,11 +551,11 @@ module IREGEXPGRAMMAR
|
|
560
551
|
r0.extend(NormalChar0)
|
561
552
|
r0.extend(NormalChar0)
|
562
553
|
else
|
563
|
-
if
|
554
|
+
if (match_len = has_terminal?("-", false, index))
|
564
555
|
r3 = true
|
565
|
-
@index +=
|
556
|
+
@index += match_len
|
566
557
|
else
|
567
|
-
terminal_parse_failure('
|
558
|
+
terminal_parse_failure('"-"')
|
568
559
|
r3 = nil
|
569
560
|
end
|
570
561
|
if r3
|
@@ -573,11 +564,11 @@ module IREGEXPGRAMMAR
|
|
573
564
|
r0.extend(NormalChar0)
|
574
565
|
r0.extend(NormalChar0)
|
575
566
|
else
|
576
|
-
if has_terminal?(@regexps[gr = '\A[
|
567
|
+
if has_terminal?(@regexps[gr = '\A[/->]'] ||= Regexp.new(gr), :regexp, index)
|
577
568
|
r4 = true
|
578
569
|
@index += 1
|
579
570
|
else
|
580
|
-
terminal_parse_failure('[
|
571
|
+
terminal_parse_failure('[/->]')
|
581
572
|
r4 = nil
|
582
573
|
end
|
583
574
|
if r4
|
@@ -586,11 +577,11 @@ module IREGEXPGRAMMAR
|
|
586
577
|
r0.extend(NormalChar0)
|
587
578
|
r0.extend(NormalChar0)
|
588
579
|
else
|
589
|
-
if has_terminal?(@regexps[gr = '\A[
|
580
|
+
if has_terminal?(@regexps[gr = '\A[@-Z]'] ||= Regexp.new(gr), :regexp, index)
|
590
581
|
r5 = true
|
591
582
|
@index += 1
|
592
583
|
else
|
593
|
-
terminal_parse_failure('[
|
584
|
+
terminal_parse_failure('[@-Z]')
|
594
585
|
r5 = nil
|
595
586
|
end
|
596
587
|
if r5
|
@@ -599,11 +590,11 @@ module IREGEXPGRAMMAR
|
|
599
590
|
r0.extend(NormalChar0)
|
600
591
|
r0.extend(NormalChar0)
|
601
592
|
else
|
602
|
-
if has_terminal?(@regexps[gr = '\A[
|
593
|
+
if has_terminal?(@regexps[gr = '\A[\\^-z]'] ||= Regexp.new(gr), :regexp, index)
|
603
594
|
r6 = true
|
604
595
|
@index += 1
|
605
596
|
else
|
606
|
-
terminal_parse_failure('[
|
597
|
+
terminal_parse_failure('[\\^-z]')
|
607
598
|
r6 = nil
|
608
599
|
end
|
609
600
|
if r6
|
@@ -612,8 +603,22 @@ module IREGEXPGRAMMAR
|
|
612
603
|
r0.extend(NormalChar0)
|
613
604
|
r0.extend(NormalChar0)
|
614
605
|
else
|
615
|
-
@
|
616
|
-
|
606
|
+
if has_terminal?(@regexps[gr = '\A[~-]'] ||= Regexp.new(gr), :regexp, index)
|
607
|
+
r7 = true
|
608
|
+
@index += 1
|
609
|
+
else
|
610
|
+
terminal_parse_failure('[~-]')
|
611
|
+
r7 = nil
|
612
|
+
end
|
613
|
+
if r7
|
614
|
+
r7 = SyntaxNode.new(input, (index-1)...index) if r7 == true
|
615
|
+
r0 = r7
|
616
|
+
r0.extend(NormalChar0)
|
617
|
+
r0.extend(NormalChar0)
|
618
|
+
else
|
619
|
+
@index = i0
|
620
|
+
r0 = nil
|
621
|
+
end
|
617
622
|
end
|
618
623
|
end
|
619
624
|
end
|
@@ -726,85 +731,97 @@ module IREGEXPGRAMMAR
|
|
726
731
|
r3 = SyntaxNode.new(input, (index-1)...index) if r3 == true
|
727
732
|
r2 = r3
|
728
733
|
else
|
729
|
-
if
|
734
|
+
if (match_len = has_terminal?("-", false, index))
|
730
735
|
r4 = true
|
731
|
-
@index +=
|
736
|
+
@index += match_len
|
732
737
|
else
|
733
|
-
terminal_parse_failure('
|
738
|
+
terminal_parse_failure('"-"')
|
734
739
|
r4 = nil
|
735
740
|
end
|
736
741
|
if r4
|
737
742
|
r4 = SyntaxNode.new(input, (index-1)...index) if r4 == true
|
738
743
|
r2 = r4
|
739
744
|
else
|
740
|
-
if (match_len = has_terminal?("
|
745
|
+
if (match_len = has_terminal?(".", false, index))
|
741
746
|
r5 = true
|
742
747
|
@index += match_len
|
743
748
|
else
|
744
|
-
terminal_parse_failure('"
|
749
|
+
terminal_parse_failure('"."')
|
745
750
|
r5 = nil
|
746
751
|
end
|
747
752
|
if r5
|
748
753
|
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
749
754
|
r2 = r5
|
750
755
|
else
|
751
|
-
if
|
756
|
+
if (match_len = has_terminal?("?", false, index))
|
752
757
|
r6 = true
|
753
|
-
@index +=
|
758
|
+
@index += match_len
|
754
759
|
else
|
755
|
-
terminal_parse_failure('
|
760
|
+
terminal_parse_failure('"?"')
|
756
761
|
r6 = nil
|
757
762
|
end
|
758
763
|
if r6
|
759
764
|
r6 = SyntaxNode.new(input, (index-1)...index) if r6 == true
|
760
765
|
r2 = r6
|
761
766
|
else
|
762
|
-
if (
|
767
|
+
if has_terminal?(@regexps[gr = '\A[\\[-\\^]'] ||= Regexp.new(gr), :regexp, index)
|
763
768
|
r7 = true
|
764
|
-
@index +=
|
769
|
+
@index += 1
|
765
770
|
else
|
766
|
-
terminal_parse_failure('
|
771
|
+
terminal_parse_failure('[\\[-\\^]')
|
767
772
|
r7 = nil
|
768
773
|
end
|
769
774
|
if r7
|
770
775
|
r7 = SyntaxNode.new(input, (index-1)...index) if r7 == true
|
771
776
|
r2 = r7
|
772
777
|
else
|
773
|
-
if (match_len = has_terminal?("
|
778
|
+
if (match_len = has_terminal?("n", false, index))
|
774
779
|
r8 = true
|
775
780
|
@index += match_len
|
776
781
|
else
|
777
|
-
terminal_parse_failure('"
|
782
|
+
terminal_parse_failure('"n"')
|
778
783
|
r8 = nil
|
779
784
|
end
|
780
785
|
if r8
|
781
786
|
r8 = SyntaxNode.new(input, (index-1)...index) if r8 == true
|
782
787
|
r2 = r8
|
783
788
|
else
|
784
|
-
if (match_len = has_terminal?("
|
789
|
+
if (match_len = has_terminal?("r", false, index))
|
785
790
|
r9 = true
|
786
791
|
@index += match_len
|
787
792
|
else
|
788
|
-
terminal_parse_failure('"
|
793
|
+
terminal_parse_failure('"r"')
|
789
794
|
r9 = nil
|
790
795
|
end
|
791
796
|
if r9
|
792
797
|
r9 = SyntaxNode.new(input, (index-1)...index) if r9 == true
|
793
798
|
r2 = r9
|
794
799
|
else
|
795
|
-
if
|
800
|
+
if (match_len = has_terminal?("t", false, index))
|
796
801
|
r10 = true
|
797
|
-
@index +=
|
802
|
+
@index += match_len
|
798
803
|
else
|
799
|
-
terminal_parse_failure('
|
804
|
+
terminal_parse_failure('"t"')
|
800
805
|
r10 = nil
|
801
806
|
end
|
802
807
|
if r10
|
803
808
|
r10 = SyntaxNode.new(input, (index-1)...index) if r10 == true
|
804
809
|
r2 = r10
|
805
810
|
else
|
806
|
-
@
|
807
|
-
|
811
|
+
if has_terminal?(@regexps[gr = '\A[\\{-\\}]'] ||= Regexp.new(gr), :regexp, index)
|
812
|
+
r11 = true
|
813
|
+
@index += 1
|
814
|
+
else
|
815
|
+
terminal_parse_failure('[\\{-\\}]')
|
816
|
+
r11 = nil
|
817
|
+
end
|
818
|
+
if r11
|
819
|
+
r11 = SyntaxNode.new(input, (index-1)...index) if r11 == true
|
820
|
+
r2 = r11
|
821
|
+
else
|
822
|
+
@index = i2
|
823
|
+
r2 = nil
|
824
|
+
end
|
808
825
|
end
|
809
826
|
end
|
810
827
|
end
|
@@ -1390,41 +1407,65 @@ module IREGEXPGRAMMAR
|
|
1390
1407
|
s0 << r1
|
1391
1408
|
if r1
|
1392
1409
|
i3 = index
|
1393
|
-
if
|
1410
|
+
if (match_len = has_terminal?("l", false, index))
|
1394
1411
|
r4 = true
|
1395
|
-
@index +=
|
1412
|
+
@index += match_len
|
1396
1413
|
else
|
1397
|
-
terminal_parse_failure('
|
1414
|
+
terminal_parse_failure('"l"')
|
1398
1415
|
r4 = nil
|
1399
1416
|
end
|
1400
1417
|
if r4
|
1401
1418
|
r4 = SyntaxNode.new(input, (index-1)...index) if r4 == true
|
1402
1419
|
r3 = r4
|
1403
1420
|
else
|
1404
|
-
if (match_len = has_terminal?("
|
1421
|
+
if (match_len = has_terminal?("m", false, index))
|
1405
1422
|
r5 = true
|
1406
1423
|
@index += match_len
|
1407
1424
|
else
|
1408
|
-
terminal_parse_failure('"
|
1425
|
+
terminal_parse_failure('"m"')
|
1409
1426
|
r5 = nil
|
1410
1427
|
end
|
1411
1428
|
if r5
|
1412
1429
|
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
1413
1430
|
r3 = r5
|
1414
1431
|
else
|
1415
|
-
if
|
1432
|
+
if (match_len = has_terminal?("o", false, index))
|
1416
1433
|
r6 = true
|
1417
|
-
@index +=
|
1434
|
+
@index += match_len
|
1418
1435
|
else
|
1419
|
-
terminal_parse_failure('
|
1436
|
+
terminal_parse_failure('"o"')
|
1420
1437
|
r6 = nil
|
1421
1438
|
end
|
1422
1439
|
if r6
|
1423
1440
|
r6 = SyntaxNode.new(input, (index-1)...index) if r6 == true
|
1424
1441
|
r3 = r6
|
1425
1442
|
else
|
1426
|
-
|
1427
|
-
|
1443
|
+
if (match_len = has_terminal?("t", false, index))
|
1444
|
+
r7 = true
|
1445
|
+
@index += match_len
|
1446
|
+
else
|
1447
|
+
terminal_parse_failure('"t"')
|
1448
|
+
r7 = nil
|
1449
|
+
end
|
1450
|
+
if r7
|
1451
|
+
r7 = SyntaxNode.new(input, (index-1)...index) if r7 == true
|
1452
|
+
r3 = r7
|
1453
|
+
else
|
1454
|
+
if (match_len = has_terminal?("u", false, index))
|
1455
|
+
r8 = true
|
1456
|
+
@index += match_len
|
1457
|
+
else
|
1458
|
+
terminal_parse_failure('"u"')
|
1459
|
+
r8 = nil
|
1460
|
+
end
|
1461
|
+
if r8
|
1462
|
+
r8 = SyntaxNode.new(input, (index-1)...index) if r8 == true
|
1463
|
+
r3 = r8
|
1464
|
+
else
|
1465
|
+
@index = i3
|
1466
|
+
r3 = nil
|
1467
|
+
end
|
1468
|
+
end
|
1428
1469
|
end
|
1429
1470
|
end
|
1430
1471
|
end
|
@@ -1934,19 +1975,31 @@ module IREGEXPGRAMMAR
|
|
1934
1975
|
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
1935
1976
|
r3 = r5
|
1936
1977
|
else
|
1937
|
-
if
|
1978
|
+
if (match_len = has_terminal?("n", false, index))
|
1938
1979
|
r6 = true
|
1939
|
-
@index +=
|
1980
|
+
@index += match_len
|
1940
1981
|
else
|
1941
|
-
terminal_parse_failure('
|
1982
|
+
terminal_parse_failure('"n"')
|
1942
1983
|
r6 = nil
|
1943
1984
|
end
|
1944
1985
|
if r6
|
1945
1986
|
r6 = SyntaxNode.new(input, (index-1)...index) if r6 == true
|
1946
1987
|
r3 = r6
|
1947
1988
|
else
|
1948
|
-
|
1949
|
-
|
1989
|
+
if (match_len = has_terminal?("o", false, index))
|
1990
|
+
r7 = true
|
1991
|
+
@index += match_len
|
1992
|
+
else
|
1993
|
+
terminal_parse_failure('"o"')
|
1994
|
+
r7 = nil
|
1995
|
+
end
|
1996
|
+
if r7
|
1997
|
+
r7 = SyntaxNode.new(input, (index-1)...index) if r7 == true
|
1998
|
+
r3 = r7
|
1999
|
+
else
|
2000
|
+
@index = i3
|
2001
|
+
r3 = nil
|
2002
|
+
end
|
1950
2003
|
end
|
1951
2004
|
end
|
1952
2005
|
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
=
|
2
|
+
=.
|
3
|
+
=a
|
4
|
+
=🤔
|
5
|
+
-\
|
6
|
+
-\v
|
7
|
+
-\\\
|
8
|
+
-\\\v
|
9
|
+
-\\\z
|
10
|
+
-\ca
|
11
|
+
=\(\)\*\+\.\?\[\\
|
12
|
+
=\]\{\|\}\t\r\n
|
13
|
+
=a*
|
14
|
+
=🤔*
|
15
|
+
-a**
|
16
|
+
=a|b
|
17
|
+
=aa|bb
|
18
|
+
=aa|
|
19
|
+
=|bb
|
20
|
+
=aa|bb*
|
21
|
+
=aa|bb+
|
22
|
+
=aa|bb?
|
23
|
+
=aa|bb{6}
|
24
|
+
=aa|bb{6,}
|
25
|
+
=aa|bb{6,8}
|
26
|
+
=(aa)
|
27
|
+
-aa)
|
28
|
+
-(aa
|
29
|
+
=aa(bb|cc)dd
|
30
|
+
-aabb|cc)dd
|
31
|
+
-aa(bb|ccdd
|
32
|
+
=[abc]
|
33
|
+
-[a
|
34
|
+
-a]
|
35
|
+
=[a🤔b]
|
36
|
+
=[^abc]
|
37
|
+
=[ab-f]
|
38
|
+
=[^ab-f]
|
39
|
+
=[^-ab-f]
|
40
|
+
=[-ab-f]
|
41
|
+
=[ab-f-]
|
42
|
+
=[a\n-\r-]
|
43
|
+
=\p{Nd}
|
44
|
+
=\P{Ll}
|
45
|
+
=\P{L}
|
46
|
+
=\p{Co}
|
47
|
+
*[\p{Nd}]
|
48
|
+
*[\P{Nd}]
|
49
|
+
*[^\p{Nd}\p{Ll}]
|
@@ -0,0 +1,114 @@
|
|
1
|
+
= ["seq"]
|
2
|
+
""
|
3
|
+
=. ["dot"]
|
4
|
+
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t"
|
5
|
+
=a "a"
|
6
|
+
"a"
|
7
|
+
=🤔 "🤔"
|
8
|
+
"🤔"
|
9
|
+
-\ Expected one of [\(-\+], "-", ".", "?", [\[-\^], "n", "r", "t", [\{-\}] at line 1, column 2 (byte 2):
|
10
|
+
\
|
11
|
+
~^
|
12
|
+
-\v Expected one of [\(-\+], "-", ".", "?", [\[-\^], "n", "r", "t", [\{-\}] at line 1, column 2 (byte 2):
|
13
|
+
\v
|
14
|
+
~^
|
15
|
+
-\\\ Expected one of [\(-\+], "-", ".", "?", [\[-\^], "n", "r", "t", [\{-\}] at line 1, column 4 (byte 4):
|
16
|
+
\\\
|
17
|
+
~~~^
|
18
|
+
-\\\v Expected one of [\(-\+], "-", ".", "?", [\[-\^], "n", "r", "t", [\{-\}] at line 1, column 4 (byte 4):
|
19
|
+
\\\v
|
20
|
+
~~~^
|
21
|
+
-\\\z Expected one of [\(-\+], "-", ".", "?", [\[-\^], "n", "r", "t", [\{-\}] at line 1, column 4 (byte 4):
|
22
|
+
\\\z
|
23
|
+
~~~^
|
24
|
+
-\ca Expected one of [\(-\+], "-", ".", "?", [\[-\^], "n", "r", "t", [\{-\}] at line 1, column 2 (byte 2):
|
25
|
+
\ca
|
26
|
+
~^
|
27
|
+
=\(\)\*\+\.\?\[\\ ["seq", "(", ")", "*", "+", ".", "?", "[", "\\"]
|
28
|
+
"()*+.?[\\"
|
29
|
+
=\]\{\|\}\t\r\n ["seq", "]", "{", "|", "}", "\t", "\r", "\n"]
|
30
|
+
"]{|}\t\r\n"
|
31
|
+
=a* ["rep", 0, false, "a"]
|
32
|
+
"", "a", "aa", "aaa", "aaaa"
|
33
|
+
=🤔* ["rep", 0, false, "🤔"]
|
34
|
+
"", "🤔", "🤔🤔", "🤔🤔🤔", "🤔🤔🤔🤔"
|
35
|
+
-a** Expected one of [\u0000-'], ",", "-", [/->], [@-Z], [\^-z], [~-], ".", "\\", "\\p{", "\\P{", "[", "(", "|" at line 1, column 3 (byte 3):
|
36
|
+
a**
|
37
|
+
~~^
|
38
|
+
=a|b ["alt", "a", "b"]
|
39
|
+
"a", "b"
|
40
|
+
=aa|bb ["alt", ["seq", "a", "a"], ["seq", "b", "b"]]
|
41
|
+
"aa", "bb"
|
42
|
+
=aa| ["alt", ["seq", "a", "a"], ["seq"]]
|
43
|
+
"aa", ""
|
44
|
+
=|bb ["alt", ["seq"], ["seq", "b", "b"]]
|
45
|
+
"", "bb"
|
46
|
+
=aa|bb* ["alt", ["seq", "a", "a"], ["seq", "b", ["rep", 0, false, "b"]]]
|
47
|
+
"aa", "b", "bb", "bbb", "bbbb", "bbbbb"
|
48
|
+
=aa|bb+ ["alt", ["seq", "a", "a"], ["seq", "b", ["rep", 1, false, "b"]]]
|
49
|
+
"aa", "bb", "bbb", "bbbb", "bbbbb", "bbbbbb"
|
50
|
+
=aa|bb? ["alt", ["seq", "a", "a"], ["seq", "b", ["rep", 0, 1, "b"]]]
|
51
|
+
"aa", "b", "bb"
|
52
|
+
=aa|bb{6} ["alt", ["seq", "a", "a"], ["seq", "b", ["rep", 6, 6, "b"]]]
|
53
|
+
"aa", "bbbbbbb"
|
54
|
+
=aa|bb{6,} ["alt", ["seq", "a", "a"], ["seq", "b", ["rep", 6, false, "b"]]]
|
55
|
+
"aa", "bbbbbbb", "bbbbbbbb", "bbbbbbbbb", "bbbbbbbbbb", "bbbbbbbbbbb"
|
56
|
+
=aa|bb{6,8} ["alt", ["seq", "a", "a"], ["seq", "b", ["rep", 6, 8, "b"]]]
|
57
|
+
"aa", "bbbbbbb", "bbbbbbbb", "bbbbbbbbb"
|
58
|
+
=(aa) ["seq", "a", "a"]
|
59
|
+
"aa"
|
60
|
+
-aa) Expected one of "*", "+", "?", "{", [\u0000-'], ",", "-", [/->], [@-Z], [\^-z], [~-], ".", "\\", "\\p{", "\\P{", "[", "(", "|" at line 1, column 3 (byte 3):
|
61
|
+
aa)
|
62
|
+
~~^
|
63
|
+
-(aa Expected one of "*", "+", "?", "{", [\u0000-'], ",", "-", [/->], [@-Z], [\^-z], [~-], ".", "\\", "\\p{", "\\P{", "[", "(", "|", ")" at line 1, column 4 (byte 4):
|
64
|
+
(aa
|
65
|
+
~~~^
|
66
|
+
=aa(bb|cc)dd ["seq", "a", "a", ["alt", ["seq", "b", "b"], ["seq", "c", "c"]], "d", "d"]
|
67
|
+
"aabbdd", "aaccdd"
|
68
|
+
-aabb|cc)dd Expected one of "*", "+", "?", "{", [\u0000-'], ",", "-", [/->], [@-Z], [\^-z], [~-], ".", "\\", "\\p{", "\\P{", "[", "(", "|" at line 1, column 8 (byte 8):
|
69
|
+
aabb|cc)dd
|
70
|
+
~~~~~~~^
|
71
|
+
-aa(bb|ccdd Expected one of "*", "+", "?", "{", [\u0000-'], ",", "-", [/->], [@-Z], [\^-z], [~-], ".", "\\", "\\p{", "\\P{", "[", "(", "|", ")" at line 1, column 11 (byte 11):
|
72
|
+
aa(bb|ccdd
|
73
|
+
~~~~~~~~~~^
|
74
|
+
=[abc] ["pos", "a", "b", "c"]
|
75
|
+
"a", "b", "c"
|
76
|
+
-[a Expected one of "-", [\u0000-,], [\.-Z], [\^-], "\\", "\\p{", "\\P{", "]" at line 1, column 3 (byte 3):
|
77
|
+
[a
|
78
|
+
~~^
|
79
|
+
-a] Expected one of "*", "+", "?", "{", [\u0000-'], ",", "-", [/->], [@-Z], [\^-z], [~-], ".", "\\", "\\p{", "\\P{", "[", "(", "|" at line 1, column 2 (byte 2):
|
80
|
+
a]
|
81
|
+
~^
|
82
|
+
=[a🤔b] ["pos", "a", "🤔", "b"]
|
83
|
+
"a", "🤔", "b"
|
84
|
+
=[^abc] ["neg", "a", "b", "c"]
|
85
|
+
"\n", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v"
|
86
|
+
=[ab-f] ["pos", "a", ["-", "b", "f"]]
|
87
|
+
"a", "b", "c", "d", "e", "f"
|
88
|
+
=[^ab-f] ["neg", "a", ["-", "b", "f"]]
|
89
|
+
"\n", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y"
|
90
|
+
=[^-ab-f] ["neg", "-", "a", ["-", "b", "f"]]
|
91
|
+
"\n", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y"
|
92
|
+
=[-ab-f] ["pos", "-", "a", ["-", "b", "f"]]
|
93
|
+
"-", "a", "b", "c", "d", "e", "f"
|
94
|
+
=[ab-f-] ["pos", "a", ["-", "b", "f"], "-"]
|
95
|
+
"a", "b", "c", "d", "e", "f", "-"
|
96
|
+
=[a\n-\r-] ["pos", "a", ["-", "\n", "\r"], "-"]
|
97
|
+
"a", "\n", "\u000b", "\f", "\r", "-"
|
98
|
+
=\p{Nd} ["p", "Nd"]
|
99
|
+
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "٠", "١", "٢", "٣", "٤", "٥", "٦", "٧", "٨", "٩"
|
100
|
+
=\P{Ll} ["P", "Ll"]
|
101
|
+
"\n", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S"
|
102
|
+
=\P{L} ["P", "L"]
|
103
|
+
"\n", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "!", "\"", "#", "%", "&", "'", "(", ")", "*"
|
104
|
+
=\p{Co} ["p", "Co"]
|
105
|
+
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""
|
106
|
+
*[\p{Nd}] ["pos", ["p", "Nd"]]
|
107
|
+
bug in regexp example generator:
|
108
|
+
"p", "{", "N", "d", "}"
|
109
|
+
*[\P{Nd}] ["pos", ["P", "Nd"]]
|
110
|
+
bug in regexp example generator:
|
111
|
+
"P", "{", "N", "d", "}"
|
112
|
+
*[^\p{Nd}\p{Ll}] ["neg", ["p", "Nd"], ["p", "Ll"]]
|
113
|
+
bug in regexp example generator:
|
114
|
+
"\n", "a", "b", "c", "e", "f", "g", "h", "i", "j", "k", "m", "n", "o", "q", "r", "s", "t", "u", "v"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iregexp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Carsten Bormann
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-05-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -70,17 +70,17 @@ description: iregexp implements converters and miscellaneous tools for I-Regexp
|
|
70
70
|
email: cabo@tzi.org
|
71
71
|
executables:
|
72
72
|
- iregexp
|
73
|
-
- iregexp~
|
74
73
|
extensions: []
|
75
74
|
extra_rdoc_files: []
|
76
75
|
files:
|
77
76
|
- bin/iregexp
|
78
|
-
- bin/iregexp~
|
79
77
|
- iregexp.gemspec
|
80
78
|
- lib/iregexp.rb
|
81
79
|
- lib/parser/iregexp-util.rb
|
82
80
|
- lib/parser/iregexpgrammar.rb
|
83
81
|
- lib/writer/iregexp-writer.rb
|
82
|
+
- test-data/simple.irl
|
83
|
+
- test-data/simple.out
|
84
84
|
homepage: http://github.com/cabo/iregexp
|
85
85
|
licenses:
|
86
86
|
- MIT
|
data/bin/iregexp~
DELETED
@@ -1,102 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'pp'
|
3
|
-
require 'yaml'
|
4
|
-
require 'treetop'
|
5
|
-
require 'json'
|
6
|
-
|
7
|
-
require_relative '../lib/jpt'
|
8
|
-
|
9
|
-
Encoding.default_external = Encoding::UTF_8
|
10
|
-
require 'optparse'
|
11
|
-
require 'ostruct'
|
12
|
-
|
13
|
-
FUNCSIG_CHARS = {"l" => :logical, "n" => :nodes, "v" => :value}
|
14
|
-
|
15
|
-
$options = OpenStruct.new
|
16
|
-
begin
|
17
|
-
op = OptionParser.new do |opts|
|
18
|
-
opts.banner = "Usage: jpt.rb [options] file.jp"
|
19
|
-
|
20
|
-
opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
|
21
|
-
$options.verbose = v
|
22
|
-
end
|
23
|
-
opts.on("-l", "--[no-]lines", "multi-line mode") do |v|
|
24
|
-
$options.lines = v
|
25
|
-
end
|
26
|
-
opts.on("-q", "--[no-]test", "test-file mode") do |v|
|
27
|
-
$options.test = v
|
28
|
-
end
|
29
|
-
opts.on("-fFUNCSIG", "--[no-]f=FUNCSIG", "add function signature name=rppp") do |v|
|
30
|
-
fail "funcsig format must be name=rppp" unless v =~ /\A([a-z][_a-z0-9]*)-([lnv]+)\z/
|
31
|
-
|
32
|
-
JPTType.add_funcsig($1, $2)
|
33
|
-
end
|
34
|
-
opts.on("-tFMT", "--to=FMT", [:basic, :neat, :json, :yaml, :enum, :jp], "Target format") do |v|
|
35
|
-
$options.target = v
|
36
|
-
end
|
37
|
-
end
|
38
|
-
op.parse!
|
39
|
-
rescue Exception => e
|
40
|
-
warn e
|
41
|
-
exit 1
|
42
|
-
end
|
43
|
-
|
44
|
-
if ARGV == []
|
45
|
-
puts op
|
46
|
-
exit 1
|
47
|
-
end
|
48
|
-
jp_file = ARGF.read
|
49
|
-
|
50
|
-
if $options.test
|
51
|
-
argument = query = output = nil
|
52
|
-
jp_file.scan(/((?:^(?:$|[^$=].*)\n)+)|([$].*)|=(.*)|#.*/) do |arg,qy,out|
|
53
|
-
begin
|
54
|
-
if arg
|
55
|
-
argument = JSON.parse(arg)
|
56
|
-
puts
|
57
|
-
puts JSON.dump(argument)
|
58
|
-
elsif qy
|
59
|
-
jpt = JPT.from_jp(qy)
|
60
|
-
output = jpt.apply(argument)
|
61
|
-
print jpt.tree.inspect << " "
|
62
|
-
puts "➔ #{JSON.dump(output)}"
|
63
|
-
elsif out
|
64
|
-
suggested = JSON.parse(out)
|
65
|
-
if output != suggested
|
66
|
-
p [:SUGGESTED, suggested]
|
67
|
-
end
|
68
|
-
end
|
69
|
-
rescue => e
|
70
|
-
warn "*** #{e.detailed_message} #{e.backtrace}"
|
71
|
-
end
|
72
|
-
end
|
73
|
-
elsif $options.lines
|
74
|
-
lines = jp_file.lines(chomp: true)
|
75
|
-
col = lines.map(&:length).max
|
76
|
-
form = "%-#{col}s %s"
|
77
|
-
lines.each do |line|
|
78
|
-
jpt = JPT.from_jp(line)
|
79
|
-
result = jpt.tree.inspect
|
80
|
-
puts form % [line, result]
|
81
|
-
end
|
82
|
-
else
|
83
|
-
|
84
|
-
jpt = JPT.from_jp(jp_file)
|
85
|
-
result = jpt.tree
|
86
|
-
|
87
|
-
case $options.target
|
88
|
-
when :basic, nil
|
89
|
-
pp result
|
90
|
-
when :neat, :json
|
91
|
-
require 'neatjson'
|
92
|
-
puts JSON.neat_generate(result, after_comma: 1, after_colon: 1)
|
93
|
-
when :yaml
|
94
|
-
puts result.to_yaml
|
95
|
-
when :jp
|
96
|
-
require_relative '../lib/writer/jp-writer.rb'
|
97
|
-
puts jpt.to_s
|
98
|
-
else
|
99
|
-
warn ["Unknown target format: ", $options.target].inspect
|
100
|
-
end
|
101
|
-
|
102
|
-
end
|