ruby_grammar_builder 1.1.10 → 1.1.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ruby_grammar_builder/pattern_extensions/or_pattern.rb +54 -0
- data/lib/ruby_grammar_builder/pattern_variations/base_pattern.rb +41 -27
- data/lib/ruby_grammar_builder/pattern_variations/repeatable_pattern.rb +9 -7
- data/lib/ruby_grammar_builder/transforms/resolve_placeholders.rb +12 -75
- data/lib/ruby_grammar_builder/util.rb +5 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 11a8a1b576e0b4aa298d5216fab27ecafbdfb89bdbcbe1604699b17f944e6773
|
4
|
+
data.tar.gz: 705f94bbb89097d41510042c581bab138bfbcb74bd703e540df429524547c6fd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e3121cb4e1efddf789f32c6c79a77ebd35bee9417ba654f33d6d99b4672ebffc09d4f54254826477f44ac75b38ff06c3efab0a1e73c73e56627021bc7c8e4411
|
7
|
+
data.tar.gz: 20de057ae05b377087bcbe77e3d42ddd12069640538e98a0507d05d90ffe75c2092be09820d9b7341327294ff86d4671896aeea21be4007d59954dee76196a45
|
@@ -14,7 +14,61 @@ class OrPattern < PatternBase
|
|
14
14
|
def evaluate_operator
|
15
15
|
AlternationOperator.new
|
16
16
|
end
|
17
|
+
|
18
|
+
def run_self_tests
|
19
|
+
pass = [true]
|
17
20
|
|
21
|
+
# some patterns are not able to be evaluated
|
22
|
+
# do not attempt to unless required
|
23
|
+
return true unless [
|
24
|
+
:should_fully_match,
|
25
|
+
:should_not_fully_match,
|
26
|
+
:should_partially_match,
|
27
|
+
:should_not_partially_match,
|
28
|
+
].any? { |k| @arguments.include? k }
|
29
|
+
|
30
|
+
copy = @match.__deep_clone_self__
|
31
|
+
test_regex = copy.to_r
|
32
|
+
test_fully_regex = wrap_with_anchors(copy).to_r
|
33
|
+
|
34
|
+
warn = lambda do |symbol|
|
35
|
+
puts [
|
36
|
+
"",
|
37
|
+
"When testing the pattern #{test_regex.inspect}. The unit test for #{symbol} failed.",
|
38
|
+
"The unit test has the following patterns:",
|
39
|
+
"#{@arguments[symbol].to_yaml}",
|
40
|
+
"The Failing pattern is below:",
|
41
|
+
"#{self}",
|
42
|
+
].join("\n")
|
43
|
+
end
|
44
|
+
if @arguments[:should_fully_match].is_a? Array
|
45
|
+
unless @arguments[:should_fully_match].all? { |test| test =~ test_fully_regex }
|
46
|
+
warn.call :should_fully_match
|
47
|
+
pass << false
|
48
|
+
end
|
49
|
+
end
|
50
|
+
if @arguments[:should_not_fully_match].is_a? Array
|
51
|
+
unless @arguments[:should_not_fully_match].none? { |test| test =~ test_fully_regex }
|
52
|
+
warn.call :should_not_fully_match
|
53
|
+
pass << false
|
54
|
+
end
|
55
|
+
end
|
56
|
+
if @arguments[:should_partially_match].is_a? Array
|
57
|
+
unless @arguments[:should_partially_match].all? { |test| test =~ test_regex }
|
58
|
+
warn.call :should_partially_match
|
59
|
+
pass << false
|
60
|
+
end
|
61
|
+
end
|
62
|
+
if @arguments[:should_not_partially_match].is_a? Array
|
63
|
+
unless @arguments[:should_not_partially_match].none? { |test| test =~ test_regex }
|
64
|
+
warn.call :should_not_partially_match
|
65
|
+
pass << false
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
pass.none?(&:!)
|
70
|
+
end
|
71
|
+
|
18
72
|
#
|
19
73
|
# Raises an error to prevent use as initial type
|
20
74
|
#
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
2
|
+
$ruby_grammar_builder__unit_test_active = false
|
3
3
|
#
|
4
4
|
# Provides a base class to simplify the writing of complex regular expressions rules
|
5
5
|
# This class completely handles capture numbers and provides convenience methods for
|
@@ -42,7 +42,7 @@ class PatternBase
|
|
42
42
|
# @return [Boolean] can this capture become capture group 0
|
43
43
|
#
|
44
44
|
def optimize_outer_group?
|
45
|
-
needs_to_capture? and @next_pattern.nil?
|
45
|
+
self.needs_to_capture? and @next_pattern.nil?
|
46
46
|
end
|
47
47
|
|
48
48
|
#
|
@@ -73,18 +73,6 @@ class PatternBase
|
|
73
73
|
new_pattern.insert!(pattern).freeze
|
74
74
|
end
|
75
75
|
|
76
|
-
#
|
77
|
-
# Adds a capture group if needed
|
78
|
-
#
|
79
|
-
# @param [String] regex_as_string the pattern as a string
|
80
|
-
#
|
81
|
-
# @return [String] the pattern, potentially with a capture group
|
82
|
-
#
|
83
|
-
def add_capture_group_if_needed(regex_as_string)
|
84
|
-
regex_as_string = "(#{regex_as_string})" if needs_to_capture?
|
85
|
-
regex_as_string
|
86
|
-
end
|
87
|
-
|
88
76
|
#
|
89
77
|
# Uses a block to transform all Patterns in the list
|
90
78
|
#
|
@@ -306,7 +294,7 @@ class PatternBase
|
|
306
294
|
#
|
307
295
|
def to_tag
|
308
296
|
output = {
|
309
|
-
match: evaluate,
|
297
|
+
match: self.evaluate(),
|
310
298
|
}
|
311
299
|
|
312
300
|
output[:captures] = convert_group_attributes_to_captures(collect_group_attributes)
|
@@ -415,6 +403,8 @@ class PatternBase
|
|
415
403
|
# @return [Boolean] If all test passed return true, otherwise false
|
416
404
|
#
|
417
405
|
def run_tests
|
406
|
+
original_flag_value = $ruby_grammar_builder__unit_test_active
|
407
|
+
$ruby_grammar_builder__unit_test_active = true
|
418
408
|
pass = [
|
419
409
|
run_self_tests,
|
420
410
|
]
|
@@ -427,6 +417,7 @@ class PatternBase
|
|
427
417
|
elsif @arguments[:includes].is_a? PatternBase
|
428
418
|
pass << @arguments[:includes].run_tests
|
429
419
|
end
|
420
|
+
$ruby_grammar_builder__unit_test_active = original_flag_value
|
430
421
|
pass.none?(&:!)
|
431
422
|
end
|
432
423
|
|
@@ -448,8 +439,17 @@ class PatternBase
|
|
448
439
|
].any? { |k| @arguments.include? k }
|
449
440
|
|
450
441
|
copy = __deep_clone_self__
|
451
|
-
|
452
|
-
|
442
|
+
begin
|
443
|
+
test_regex = copy.to_r
|
444
|
+
test_fully_regex = wrap_with_anchors(copy).to_r
|
445
|
+
rescue => exception
|
446
|
+
raise <<~HEREDOC
|
447
|
+
|
448
|
+
|
449
|
+
error running unit tests for: #{copy}
|
450
|
+
#{exception}
|
451
|
+
HEREDOC
|
452
|
+
end
|
453
453
|
|
454
454
|
warn = lambda do |symbol|
|
455
455
|
puts [
|
@@ -558,7 +558,12 @@ class PatternBase
|
|
558
558
|
def do_evaluate_self(groups)
|
559
559
|
match = @match
|
560
560
|
match = match.evaluate(groups) if match.is_a? PatternBase
|
561
|
-
|
561
|
+
if self.needs_to_capture?
|
562
|
+
match = "(#{match})"
|
563
|
+
elsif not string_single_entity?(match)
|
564
|
+
match = "(?:#{match})"
|
565
|
+
end
|
566
|
+
return match
|
562
567
|
end
|
563
568
|
|
564
569
|
#
|
@@ -599,7 +604,7 @@ class PatternBase
|
|
599
604
|
|
600
605
|
# (see string_single_entity)
|
601
606
|
def single_entity?
|
602
|
-
string_single_entity? evaluate
|
607
|
+
return string_single_entity?( self.evaluate() )
|
603
608
|
end
|
604
609
|
|
605
610
|
# does this pattern contain no capturing groups
|
@@ -684,7 +689,7 @@ class PatternBase
|
|
684
689
|
#
|
685
690
|
def do_collect_self_groups(next_group)
|
686
691
|
groups = []
|
687
|
-
groups << {group: next_group}.merge(@arguments) if needs_to_capture?
|
692
|
+
groups << {group: next_group}.merge(@arguments) if self.needs_to_capture?
|
688
693
|
groups
|
689
694
|
end
|
690
695
|
|
@@ -719,22 +724,31 @@ class PatternBase
|
|
719
724
|
self_regex = self_regex.gsub(/\(\?\#\[:backreference:([^\\]+?):\]\)/) do
|
720
725
|
match_reference = Regexp.last_match(1)
|
721
726
|
if references[match_reference].nil?
|
722
|
-
|
727
|
+
if $ruby_grammar_builder__unit_test_active
|
728
|
+
"(?#would_be_backref_but_null_because_unit_test)A(?<=B)"
|
729
|
+
else
|
730
|
+
raise "groups:#{groups}\nreferences: #{references}\nWhen processing the matchResultOf:#{match_reference}, I couldn't find the group it was referencing"
|
731
|
+
end
|
732
|
+
else
|
733
|
+
# if the reference does exist, then replace it with it's number
|
734
|
+
"(?:\\#{references[match_reference]})"
|
723
735
|
end
|
724
|
-
|
725
|
-
# if the reference does exist, then replace it with it's number
|
726
|
-
"(?:\\#{references[match_reference]})"
|
727
736
|
end
|
728
737
|
|
729
738
|
# check for a subroutine to the Nth group, replace it with `\N`
|
730
739
|
self_regex = self_regex.gsub(/\(\?\#\[:subroutine:([^\\]+?):\]\)/) do
|
731
740
|
match_reference = Regexp.last_match(1)
|
732
741
|
if references[match_reference].nil?
|
733
|
-
|
742
|
+
if $ruby_grammar_builder__unit_test_active
|
743
|
+
"(?#would_be_subroutine_but_null_because_unit_test)A(?<=B)"
|
744
|
+
else
|
745
|
+
raise "groups:#{groups}\nreferences: #{references}\nWhen processing the recursivelyMatch:#{match_reference}, I couldn't find the group it was referencing"
|
746
|
+
end
|
747
|
+
else
|
748
|
+
# if the reference does exist, then replace it with it's number
|
749
|
+
"\\g<#{references[match_reference]}>"
|
734
750
|
end
|
735
751
|
|
736
|
-
# if the reference does exist, then replace it with it's number
|
737
|
-
"\\g<#{references[match_reference]}>"
|
738
752
|
end
|
739
753
|
# rubocop:enable Metrics/LineLength
|
740
754
|
self_regex
|
@@ -43,10 +43,6 @@ class RepeatablePattern < PatternBase
|
|
43
43
|
# canonize dont_back_track? and as_few_as_possible?
|
44
44
|
@arguments[:dont_back_track?] ||= @arguments[:possessive?]
|
45
45
|
@arguments[:as_few_as_possible?] ||= @arguments[:lazy?]
|
46
|
-
if @arguments[:greedy?]
|
47
|
-
@arguments[:dont_back_track?] = false
|
48
|
-
@arguments[:as_few_as_possible?] = false
|
49
|
-
end
|
50
46
|
# extract the data
|
51
47
|
at_least = attributes_clone[:at_least]
|
52
48
|
at_most = attributes_clone[:at_most]
|
@@ -91,7 +87,7 @@ class RepeatablePattern < PatternBase
|
|
91
87
|
|
92
88
|
# by default assume no quantifiers
|
93
89
|
quantifier = ""
|
94
|
-
# if there is no at_least, at_most, or how_many_times
|
90
|
+
# if there is no at_least, at_most, or how_many_times?, then theres no quantifier
|
95
91
|
if @at_least.nil? and @at_most.nil?
|
96
92
|
quantifier = ""
|
97
93
|
# if there is a quantifier
|
@@ -156,7 +152,13 @@ class RepeatablePattern < PatternBase
|
|
156
152
|
|
157
153
|
# (see PatternBase#do_evaluate_self)
|
158
154
|
def do_evaluate_self(groups)
|
159
|
-
|
155
|
+
match = add_quantifier_options_to(@match, groups)
|
156
|
+
if self.needs_to_capture?
|
157
|
+
match = "(#{match})"
|
158
|
+
elsif not string_single_entity?(match)
|
159
|
+
match = "(?:#{match})"
|
160
|
+
end
|
161
|
+
return match
|
160
162
|
end
|
161
163
|
|
162
164
|
# controls weather @arguments[:at_most] et. al. set @at_most et. al.
|
@@ -177,7 +179,7 @@ class RepeatablePattern < PatternBase
|
|
177
179
|
if quantifying_allowed?
|
178
180
|
output += ",\n#{indent} at_least: " + @arguments[:at_least].to_s if @arguments[:at_least]
|
179
181
|
output += ",\n#{indent} at_most: " + @arguments[:at_most].to_s if @arguments[:at_most]
|
180
|
-
output += ",\n#{indent} how_many_times
|
182
|
+
output += ",\n#{indent} how_many_times?: " + @arguments[:how_many_times?].to_s if @arguments[:how_many_times?]
|
181
183
|
output += ",\n#{indent} word_cannot_be_any_of: " + @arguments[:word_cannot_be_any_of].to_s if @arguments[:word_cannot_be_any_of]
|
182
184
|
end
|
183
185
|
output += ",\n#{indent} dont_back_track?: " + @arguments[:dont_back_track?].to_s if @arguments[:dont_back_track?]
|
@@ -5,20 +5,25 @@
|
|
5
5
|
#
|
6
6
|
class ResolvePlaceholders < GrammarTransform
|
7
7
|
def pre_transform(pattern, options)
|
8
|
+
# skip past anything that isn't a pattern
|
8
9
|
return pattern unless pattern.is_a? PatternBase
|
9
10
|
pattern_copy = pattern.__deep_clone__
|
11
|
+
# recursively fill in all of the placeholders by looking them up
|
10
12
|
pattern_copy.map!(true) do |each_pattern_like|
|
11
13
|
|
12
14
|
arguments = each_pattern_like.arguments
|
13
15
|
repository = options[:repository]
|
14
|
-
|
16
|
+
name_of_placeholder = arguments[:placeholder]
|
17
|
+
#
|
18
|
+
# PlaceholderPattern
|
19
|
+
#
|
15
20
|
if each_pattern_like.is_a?(PlaceholderPattern)
|
16
|
-
|
17
|
-
unless repository[
|
18
|
-
raise ":#{
|
21
|
+
# error if can't find thing the placeholder is reffering to
|
22
|
+
unless repository[name_of_placeholder].is_a? PatternBase
|
23
|
+
raise ":#{name_of_placeholder} is not a pattern and cannot be substituted"
|
19
24
|
end
|
20
|
-
|
21
|
-
each_pattern_like.match = repository[
|
25
|
+
# if the pattern exists though, make the substitution
|
26
|
+
each_pattern_like.match = repository[name_of_placeholder].__deep_clone__
|
22
27
|
#
|
23
28
|
# token pattern
|
24
29
|
#
|
@@ -50,72 +55,4 @@ end
|
|
50
55
|
|
51
56
|
# resolving placeholders has no dependencies and makes analyzing patterns much nicer
|
52
57
|
# so it happens fairly early
|
53
|
-
Grammar.register_transform(ResolvePlaceholders.new, 0)
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
# # frozen_string_literal: true
|
60
|
-
|
61
|
-
# #
|
62
|
-
# # Resolves any embedded placeholders
|
63
|
-
# #
|
64
|
-
# class ResolvePlaceholders < GrammarTransform
|
65
|
-
# def pre_transform(pattern, options)
|
66
|
-
# # skip past anything that isn't a pattern
|
67
|
-
# return pattern unless pattern.is_a? PatternBase
|
68
|
-
|
69
|
-
# pattern_copy = pattern.__deep_clone__
|
70
|
-
# # recursively fill in all of the placeholders by looking them up
|
71
|
-
# repository = options[:repository]
|
72
|
-
# pattern_copy.map!(true) do |each_pattern_like|
|
73
|
-
# arguments = each_pattern_like.arguments
|
74
|
-
# name_of_placeholder = arguments[:placeholder]
|
75
|
-
# #
|
76
|
-
# # placeholder pattern
|
77
|
-
# #
|
78
|
-
# if each_pattern_like.is_a?(PlaceholderPattern)
|
79
|
-
# # error if can't find thing the placeholder is reffering to
|
80
|
-
# if !repository[name_of_placeholder].is_a?(PatternBase)
|
81
|
-
# raise "\n#{arguments[:placeholder]} is not a pattern and cannot be substituted"
|
82
|
-
# end
|
83
|
-
|
84
|
-
# # if the pattern exists though, make the substitution
|
85
|
-
# arguments = { match:repository[arguments[:placeholder]].__deep_clone__ }
|
86
|
-
# for each_key, each_value in each_pattern_like.arguments
|
87
|
-
# arguments[each_key] = each_value
|
88
|
-
# end
|
89
|
-
# each_pattern_like = Pattern.new(arguments)
|
90
|
-
# #
|
91
|
-
# # token pattern
|
92
|
-
# #
|
93
|
-
# elsif each_pattern_like.is_a?(TokenPattern)
|
94
|
-
# qualifying_patterns = []
|
95
|
-
# for each_key, each_value in repository
|
96
|
-
# next unless each_value.is_a?(PatternBase)
|
97
|
-
# qualifying_patterns << each_value if arguments[:pattern_filter][each_value]
|
98
|
-
# end
|
99
|
-
# if qualifying_patterns.size == 0
|
100
|
-
# raise <<-HEREDOC.remove_indent
|
101
|
-
|
102
|
-
|
103
|
-
# When creating a token filter #{arguments[:pattern_filter]}
|
104
|
-
# all the patterns that are in the grammar repository were searched
|
105
|
-
# but none of thier adjective lists matched the token filter
|
106
|
-
# HEREDOC
|
107
|
-
# end
|
108
|
-
|
109
|
-
|
110
|
-
# # change this pattern right before the grammar is generated
|
111
|
-
# each_pattern_like.match = oneOf(qualifying_patterns)
|
112
|
-
# end
|
113
|
-
# each_pattern_like
|
114
|
-
# end
|
115
|
-
# return pattern_copy
|
116
|
-
# end
|
117
|
-
# end
|
118
|
-
|
119
|
-
# # resolving placeholders has no dependencies and makes analyzing patterns much nicer
|
120
|
-
# # so it happens fairly early
|
121
|
-
# Grammar.register_transform(ResolvePlaceholders.new, 0)
|
58
|
+
Grammar.register_transform(ResolvePlaceholders.new, 0)
|
@@ -52,21 +52,23 @@ end
|
|
52
52
|
# @return [Boolean] if the string represents an single regex entity
|
53
53
|
def string_single_entity?(regex_string)
|
54
54
|
normal_char = '[a-zA-Z0-9_\-@&%#\'"<>=\/\.,`~\s;:!]'
|
55
|
+
escape_sequence = '\\\\[\w\W]'
|
56
|
+
character_class_that_doesnt_contain_bracket = '\[[^\]]*\]'
|
55
57
|
# normal char
|
56
58
|
if regex_string =~ /^#{normal_char}$/
|
57
59
|
return true
|
58
60
|
end
|
59
61
|
# escape sequence (all are valid, even stuff like \@ ("\\@") or "\\" + "\n" )
|
60
|
-
if regex_string =~
|
62
|
+
if regex_string =~ /^#{escape_sequence}$/
|
61
63
|
return true
|
62
64
|
end
|
63
65
|
# character class that doesn't contain ]
|
64
|
-
if regex_string =~
|
66
|
+
if regex_string =~ /^#{character_class_that_doesnt_contain_bracket}$/
|
65
67
|
return true
|
66
68
|
end
|
67
69
|
|
68
70
|
# fail if more than one of any of the above
|
69
|
-
if regex_string =~ /^(#{normal_char}
|
71
|
+
if regex_string =~ /^(#{normal_char}|#{escape_sequence}|#{character_class_that_doesnt_contain_bracket}){2,}$/
|
70
72
|
return false
|
71
73
|
end
|
72
74
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby_grammar_builder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Hykin
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2023-02
|
12
|
+
date: 2023-06-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: deep_clone
|