ruby_grammar_builder 1.1.10 → 1.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ruby_grammar_builder/pattern_extensions/or_pattern.rb +54 -0
- data/lib/ruby_grammar_builder/pattern_variations/base_pattern.rb +41 -27
- data/lib/ruby_grammar_builder/pattern_variations/repeatable_pattern.rb +9 -7
- data/lib/ruby_grammar_builder/transforms/resolve_placeholders.rb +12 -75
- data/lib/ruby_grammar_builder/util.rb +5 -3
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 11a8a1b576e0b4aa298d5216fab27ecafbdfb89bdbcbe1604699b17f944e6773
|
|
4
|
+
data.tar.gz: 705f94bbb89097d41510042c581bab138bfbcb74bd703e540df429524547c6fd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e3121cb4e1efddf789f32c6c79a77ebd35bee9417ba654f33d6d99b4672ebffc09d4f54254826477f44ac75b38ff06c3efab0a1e73c73e56627021bc7c8e4411
|
|
7
|
+
data.tar.gz: 20de057ae05b377087bcbe77e3d42ddd12069640538e98a0507d05d90ffe75c2092be09820d9b7341327294ff86d4671896aeea21be4007d59954dee76196a45
|
|
@@ -14,7 +14,61 @@ class OrPattern < PatternBase
|
|
|
14
14
|
def evaluate_operator
|
|
15
15
|
AlternationOperator.new
|
|
16
16
|
end
|
|
17
|
+
|
|
18
|
+
def run_self_tests
|
|
19
|
+
pass = [true]
|
|
17
20
|
|
|
21
|
+
# some patterns are not able to be evaluated
|
|
22
|
+
# do not attempt to unless required
|
|
23
|
+
return true unless [
|
|
24
|
+
:should_fully_match,
|
|
25
|
+
:should_not_fully_match,
|
|
26
|
+
:should_partially_match,
|
|
27
|
+
:should_not_partially_match,
|
|
28
|
+
].any? { |k| @arguments.include? k }
|
|
29
|
+
|
|
30
|
+
copy = @match.__deep_clone_self__
|
|
31
|
+
test_regex = copy.to_r
|
|
32
|
+
test_fully_regex = wrap_with_anchors(copy).to_r
|
|
33
|
+
|
|
34
|
+
warn = lambda do |symbol|
|
|
35
|
+
puts [
|
|
36
|
+
"",
|
|
37
|
+
"When testing the pattern #{test_regex.inspect}. The unit test for #{symbol} failed.",
|
|
38
|
+
"The unit test has the following patterns:",
|
|
39
|
+
"#{@arguments[symbol].to_yaml}",
|
|
40
|
+
"The Failing pattern is below:",
|
|
41
|
+
"#{self}",
|
|
42
|
+
].join("\n")
|
|
43
|
+
end
|
|
44
|
+
if @arguments[:should_fully_match].is_a? Array
|
|
45
|
+
unless @arguments[:should_fully_match].all? { |test| test =~ test_fully_regex }
|
|
46
|
+
warn.call :should_fully_match
|
|
47
|
+
pass << false
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
if @arguments[:should_not_fully_match].is_a? Array
|
|
51
|
+
unless @arguments[:should_not_fully_match].none? { |test| test =~ test_fully_regex }
|
|
52
|
+
warn.call :should_not_fully_match
|
|
53
|
+
pass << false
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
if @arguments[:should_partially_match].is_a? Array
|
|
57
|
+
unless @arguments[:should_partially_match].all? { |test| test =~ test_regex }
|
|
58
|
+
warn.call :should_partially_match
|
|
59
|
+
pass << false
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
if @arguments[:should_not_partially_match].is_a? Array
|
|
63
|
+
unless @arguments[:should_not_partially_match].none? { |test| test =~ test_regex }
|
|
64
|
+
warn.call :should_not_partially_match
|
|
65
|
+
pass << false
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
pass.none?(&:!)
|
|
70
|
+
end
|
|
71
|
+
|
|
18
72
|
#
|
|
19
73
|
# Raises an error to prevent use as initial type
|
|
20
74
|
#
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
-
|
|
2
|
+
$ruby_grammar_builder__unit_test_active = false
|
|
3
3
|
#
|
|
4
4
|
# Provides a base class to simplify the writing of complex regular expressions rules
|
|
5
5
|
# This class completely handles capture numbers and provides convenience methods for
|
|
@@ -42,7 +42,7 @@ class PatternBase
|
|
|
42
42
|
# @return [Boolean] can this capture become capture group 0
|
|
43
43
|
#
|
|
44
44
|
def optimize_outer_group?
|
|
45
|
-
needs_to_capture? and @next_pattern.nil?
|
|
45
|
+
self.needs_to_capture? and @next_pattern.nil?
|
|
46
46
|
end
|
|
47
47
|
|
|
48
48
|
#
|
|
@@ -73,18 +73,6 @@ class PatternBase
|
|
|
73
73
|
new_pattern.insert!(pattern).freeze
|
|
74
74
|
end
|
|
75
75
|
|
|
76
|
-
#
|
|
77
|
-
# Adds a capture group if needed
|
|
78
|
-
#
|
|
79
|
-
# @param [String] regex_as_string the pattern as a string
|
|
80
|
-
#
|
|
81
|
-
# @return [String] the pattern, potentially with a capture group
|
|
82
|
-
#
|
|
83
|
-
def add_capture_group_if_needed(regex_as_string)
|
|
84
|
-
regex_as_string = "(#{regex_as_string})" if needs_to_capture?
|
|
85
|
-
regex_as_string
|
|
86
|
-
end
|
|
87
|
-
|
|
88
76
|
#
|
|
89
77
|
# Uses a block to transform all Patterns in the list
|
|
90
78
|
#
|
|
@@ -306,7 +294,7 @@ class PatternBase
|
|
|
306
294
|
#
|
|
307
295
|
def to_tag
|
|
308
296
|
output = {
|
|
309
|
-
match: evaluate,
|
|
297
|
+
match: self.evaluate(),
|
|
310
298
|
}
|
|
311
299
|
|
|
312
300
|
output[:captures] = convert_group_attributes_to_captures(collect_group_attributes)
|
|
@@ -415,6 +403,8 @@ class PatternBase
|
|
|
415
403
|
# @return [Boolean] If all test passed return true, otherwise false
|
|
416
404
|
#
|
|
417
405
|
def run_tests
|
|
406
|
+
original_flag_value = $ruby_grammar_builder__unit_test_active
|
|
407
|
+
$ruby_grammar_builder__unit_test_active = true
|
|
418
408
|
pass = [
|
|
419
409
|
run_self_tests,
|
|
420
410
|
]
|
|
@@ -427,6 +417,7 @@ class PatternBase
|
|
|
427
417
|
elsif @arguments[:includes].is_a? PatternBase
|
|
428
418
|
pass << @arguments[:includes].run_tests
|
|
429
419
|
end
|
|
420
|
+
$ruby_grammar_builder__unit_test_active = original_flag_value
|
|
430
421
|
pass.none?(&:!)
|
|
431
422
|
end
|
|
432
423
|
|
|
@@ -448,8 +439,17 @@ class PatternBase
|
|
|
448
439
|
].any? { |k| @arguments.include? k }
|
|
449
440
|
|
|
450
441
|
copy = __deep_clone_self__
|
|
451
|
-
|
|
452
|
-
|
|
442
|
+
begin
|
|
443
|
+
test_regex = copy.to_r
|
|
444
|
+
test_fully_regex = wrap_with_anchors(copy).to_r
|
|
445
|
+
rescue => exception
|
|
446
|
+
raise <<~HEREDOC
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
error running unit tests for: #{copy}
|
|
450
|
+
#{exception}
|
|
451
|
+
HEREDOC
|
|
452
|
+
end
|
|
453
453
|
|
|
454
454
|
warn = lambda do |symbol|
|
|
455
455
|
puts [
|
|
@@ -558,7 +558,12 @@ class PatternBase
|
|
|
558
558
|
def do_evaluate_self(groups)
|
|
559
559
|
match = @match
|
|
560
560
|
match = match.evaluate(groups) if match.is_a? PatternBase
|
|
561
|
-
|
|
561
|
+
if self.needs_to_capture?
|
|
562
|
+
match = "(#{match})"
|
|
563
|
+
elsif not string_single_entity?(match)
|
|
564
|
+
match = "(?:#{match})"
|
|
565
|
+
end
|
|
566
|
+
return match
|
|
562
567
|
end
|
|
563
568
|
|
|
564
569
|
#
|
|
@@ -599,7 +604,7 @@ class PatternBase
|
|
|
599
604
|
|
|
600
605
|
# (see string_single_entity)
|
|
601
606
|
def single_entity?
|
|
602
|
-
string_single_entity? evaluate
|
|
607
|
+
return string_single_entity?( self.evaluate() )
|
|
603
608
|
end
|
|
604
609
|
|
|
605
610
|
# does this pattern contain no capturing groups
|
|
@@ -684,7 +689,7 @@ class PatternBase
|
|
|
684
689
|
#
|
|
685
690
|
def do_collect_self_groups(next_group)
|
|
686
691
|
groups = []
|
|
687
|
-
groups << {group: next_group}.merge(@arguments) if needs_to_capture?
|
|
692
|
+
groups << {group: next_group}.merge(@arguments) if self.needs_to_capture?
|
|
688
693
|
groups
|
|
689
694
|
end
|
|
690
695
|
|
|
@@ -719,22 +724,31 @@ class PatternBase
|
|
|
719
724
|
self_regex = self_regex.gsub(/\(\?\#\[:backreference:([^\\]+?):\]\)/) do
|
|
720
725
|
match_reference = Regexp.last_match(1)
|
|
721
726
|
if references[match_reference].nil?
|
|
722
|
-
|
|
727
|
+
if $ruby_grammar_builder__unit_test_active
|
|
728
|
+
"(?#would_be_backref_but_null_because_unit_test)A(?<=B)"
|
|
729
|
+
else
|
|
730
|
+
raise "groups:#{groups}\nreferences: #{references}\nWhen processing the matchResultOf:#{match_reference}, I couldn't find the group it was referencing"
|
|
731
|
+
end
|
|
732
|
+
else
|
|
733
|
+
# if the reference does exist, then replace it with it's number
|
|
734
|
+
"(?:\\#{references[match_reference]})"
|
|
723
735
|
end
|
|
724
|
-
|
|
725
|
-
# if the reference does exist, then replace it with it's number
|
|
726
|
-
"(?:\\#{references[match_reference]})"
|
|
727
736
|
end
|
|
728
737
|
|
|
729
738
|
# check for a subroutine to the Nth group, replace it with `\N`
|
|
730
739
|
self_regex = self_regex.gsub(/\(\?\#\[:subroutine:([^\\]+?):\]\)/) do
|
|
731
740
|
match_reference = Regexp.last_match(1)
|
|
732
741
|
if references[match_reference].nil?
|
|
733
|
-
|
|
742
|
+
if $ruby_grammar_builder__unit_test_active
|
|
743
|
+
"(?#would_be_subroutine_but_null_because_unit_test)A(?<=B)"
|
|
744
|
+
else
|
|
745
|
+
raise "groups:#{groups}\nreferences: #{references}\nWhen processing the recursivelyMatch:#{match_reference}, I couldn't find the group it was referencing"
|
|
746
|
+
end
|
|
747
|
+
else
|
|
748
|
+
# if the reference does exist, then replace it with it's number
|
|
749
|
+
"\\g<#{references[match_reference]}>"
|
|
734
750
|
end
|
|
735
751
|
|
|
736
|
-
# if the reference does exist, then replace it with it's number
|
|
737
|
-
"\\g<#{references[match_reference]}>"
|
|
738
752
|
end
|
|
739
753
|
# rubocop:enable Metrics/LineLength
|
|
740
754
|
self_regex
|
|
@@ -43,10 +43,6 @@ class RepeatablePattern < PatternBase
|
|
|
43
43
|
# canonize dont_back_track? and as_few_as_possible?
|
|
44
44
|
@arguments[:dont_back_track?] ||= @arguments[:possessive?]
|
|
45
45
|
@arguments[:as_few_as_possible?] ||= @arguments[:lazy?]
|
|
46
|
-
if @arguments[:greedy?]
|
|
47
|
-
@arguments[:dont_back_track?] = false
|
|
48
|
-
@arguments[:as_few_as_possible?] = false
|
|
49
|
-
end
|
|
50
46
|
# extract the data
|
|
51
47
|
at_least = attributes_clone[:at_least]
|
|
52
48
|
at_most = attributes_clone[:at_most]
|
|
@@ -91,7 +87,7 @@ class RepeatablePattern < PatternBase
|
|
|
91
87
|
|
|
92
88
|
# by default assume no quantifiers
|
|
93
89
|
quantifier = ""
|
|
94
|
-
# if there is no at_least, at_most, or how_many_times
|
|
90
|
+
# if there is no at_least, at_most, or how_many_times?, then theres no quantifier
|
|
95
91
|
if @at_least.nil? and @at_most.nil?
|
|
96
92
|
quantifier = ""
|
|
97
93
|
# if there is a quantifier
|
|
@@ -156,7 +152,13 @@ class RepeatablePattern < PatternBase
|
|
|
156
152
|
|
|
157
153
|
# (see PatternBase#do_evaluate_self)
|
|
158
154
|
def do_evaluate_self(groups)
|
|
159
|
-
|
|
155
|
+
match = add_quantifier_options_to(@match, groups)
|
|
156
|
+
if self.needs_to_capture?
|
|
157
|
+
match = "(#{match})"
|
|
158
|
+
elsif not string_single_entity?(match)
|
|
159
|
+
match = "(?:#{match})"
|
|
160
|
+
end
|
|
161
|
+
return match
|
|
160
162
|
end
|
|
161
163
|
|
|
162
164
|
# controls weather @arguments[:at_most] et. al. set @at_most et. al.
|
|
@@ -177,7 +179,7 @@ class RepeatablePattern < PatternBase
|
|
|
177
179
|
if quantifying_allowed?
|
|
178
180
|
output += ",\n#{indent} at_least: " + @arguments[:at_least].to_s if @arguments[:at_least]
|
|
179
181
|
output += ",\n#{indent} at_most: " + @arguments[:at_most].to_s if @arguments[:at_most]
|
|
180
|
-
output += ",\n#{indent} how_many_times
|
|
182
|
+
output += ",\n#{indent} how_many_times?: " + @arguments[:how_many_times?].to_s if @arguments[:how_many_times?]
|
|
181
183
|
output += ",\n#{indent} word_cannot_be_any_of: " + @arguments[:word_cannot_be_any_of].to_s if @arguments[:word_cannot_be_any_of]
|
|
182
184
|
end
|
|
183
185
|
output += ",\n#{indent} dont_back_track?: " + @arguments[:dont_back_track?].to_s if @arguments[:dont_back_track?]
|
|
@@ -5,20 +5,25 @@
|
|
|
5
5
|
#
|
|
6
6
|
class ResolvePlaceholders < GrammarTransform
|
|
7
7
|
def pre_transform(pattern, options)
|
|
8
|
+
# skip past anything that isn't a pattern
|
|
8
9
|
return pattern unless pattern.is_a? PatternBase
|
|
9
10
|
pattern_copy = pattern.__deep_clone__
|
|
11
|
+
# recursively fill in all of the placeholders by looking them up
|
|
10
12
|
pattern_copy.map!(true) do |each_pattern_like|
|
|
11
13
|
|
|
12
14
|
arguments = each_pattern_like.arguments
|
|
13
15
|
repository = options[:repository]
|
|
14
|
-
|
|
16
|
+
name_of_placeholder = arguments[:placeholder]
|
|
17
|
+
#
|
|
18
|
+
# PlaceholderPattern
|
|
19
|
+
#
|
|
15
20
|
if each_pattern_like.is_a?(PlaceholderPattern)
|
|
16
|
-
|
|
17
|
-
unless repository[
|
|
18
|
-
raise ":#{
|
|
21
|
+
# error if can't find thing the placeholder is reffering to
|
|
22
|
+
unless repository[name_of_placeholder].is_a? PatternBase
|
|
23
|
+
raise ":#{name_of_placeholder} is not a pattern and cannot be substituted"
|
|
19
24
|
end
|
|
20
|
-
|
|
21
|
-
each_pattern_like.match = repository[
|
|
25
|
+
# if the pattern exists though, make the substitution
|
|
26
|
+
each_pattern_like.match = repository[name_of_placeholder].__deep_clone__
|
|
22
27
|
#
|
|
23
28
|
# token pattern
|
|
24
29
|
#
|
|
@@ -50,72 +55,4 @@ end
|
|
|
50
55
|
|
|
51
56
|
# resolving placeholders has no dependencies and makes analyzing patterns much nicer
|
|
52
57
|
# so it happens fairly early
|
|
53
|
-
Grammar.register_transform(ResolvePlaceholders.new, 0)
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
# # frozen_string_literal: true
|
|
60
|
-
|
|
61
|
-
# #
|
|
62
|
-
# # Resolves any embedded placeholders
|
|
63
|
-
# #
|
|
64
|
-
# class ResolvePlaceholders < GrammarTransform
|
|
65
|
-
# def pre_transform(pattern, options)
|
|
66
|
-
# # skip past anything that isn't a pattern
|
|
67
|
-
# return pattern unless pattern.is_a? PatternBase
|
|
68
|
-
|
|
69
|
-
# pattern_copy = pattern.__deep_clone__
|
|
70
|
-
# # recursively fill in all of the placeholders by looking them up
|
|
71
|
-
# repository = options[:repository]
|
|
72
|
-
# pattern_copy.map!(true) do |each_pattern_like|
|
|
73
|
-
# arguments = each_pattern_like.arguments
|
|
74
|
-
# name_of_placeholder = arguments[:placeholder]
|
|
75
|
-
# #
|
|
76
|
-
# # placeholder pattern
|
|
77
|
-
# #
|
|
78
|
-
# if each_pattern_like.is_a?(PlaceholderPattern)
|
|
79
|
-
# # error if can't find thing the placeholder is reffering to
|
|
80
|
-
# if !repository[name_of_placeholder].is_a?(PatternBase)
|
|
81
|
-
# raise "\n#{arguments[:placeholder]} is not a pattern and cannot be substituted"
|
|
82
|
-
# end
|
|
83
|
-
|
|
84
|
-
# # if the pattern exists though, make the substitution
|
|
85
|
-
# arguments = { match:repository[arguments[:placeholder]].__deep_clone__ }
|
|
86
|
-
# for each_key, each_value in each_pattern_like.arguments
|
|
87
|
-
# arguments[each_key] = each_value
|
|
88
|
-
# end
|
|
89
|
-
# each_pattern_like = Pattern.new(arguments)
|
|
90
|
-
# #
|
|
91
|
-
# # token pattern
|
|
92
|
-
# #
|
|
93
|
-
# elsif each_pattern_like.is_a?(TokenPattern)
|
|
94
|
-
# qualifying_patterns = []
|
|
95
|
-
# for each_key, each_value in repository
|
|
96
|
-
# next unless each_value.is_a?(PatternBase)
|
|
97
|
-
# qualifying_patterns << each_value if arguments[:pattern_filter][each_value]
|
|
98
|
-
# end
|
|
99
|
-
# if qualifying_patterns.size == 0
|
|
100
|
-
# raise <<-HEREDOC.remove_indent
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
# When creating a token filter #{arguments[:pattern_filter]}
|
|
104
|
-
# all the patterns that are in the grammar repository were searched
|
|
105
|
-
# but none of thier adjective lists matched the token filter
|
|
106
|
-
# HEREDOC
|
|
107
|
-
# end
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
# # change this pattern right before the grammar is generated
|
|
111
|
-
# each_pattern_like.match = oneOf(qualifying_patterns)
|
|
112
|
-
# end
|
|
113
|
-
# each_pattern_like
|
|
114
|
-
# end
|
|
115
|
-
# return pattern_copy
|
|
116
|
-
# end
|
|
117
|
-
# end
|
|
118
|
-
|
|
119
|
-
# # resolving placeholders has no dependencies and makes analyzing patterns much nicer
|
|
120
|
-
# # so it happens fairly early
|
|
121
|
-
# Grammar.register_transform(ResolvePlaceholders.new, 0)
|
|
58
|
+
Grammar.register_transform(ResolvePlaceholders.new, 0)
|
|
@@ -52,21 +52,23 @@ end
|
|
|
52
52
|
# @return [Boolean] if the string represents an single regex entity
|
|
53
53
|
def string_single_entity?(regex_string)
|
|
54
54
|
normal_char = '[a-zA-Z0-9_\-@&%#\'"<>=\/\.,`~\s;:!]'
|
|
55
|
+
escape_sequence = '\\\\[\w\W]'
|
|
56
|
+
character_class_that_doesnt_contain_bracket = '\[[^\]]*\]'
|
|
55
57
|
# normal char
|
|
56
58
|
if regex_string =~ /^#{normal_char}$/
|
|
57
59
|
return true
|
|
58
60
|
end
|
|
59
61
|
# escape sequence (all are valid, even stuff like \@ ("\\@") or "\\" + "\n" )
|
|
60
|
-
if regex_string =~
|
|
62
|
+
if regex_string =~ /^#{escape_sequence}$/
|
|
61
63
|
return true
|
|
62
64
|
end
|
|
63
65
|
# character class that doesn't contain ]
|
|
64
|
-
if regex_string =~
|
|
66
|
+
if regex_string =~ /^#{character_class_that_doesnt_contain_bracket}$/
|
|
65
67
|
return true
|
|
66
68
|
end
|
|
67
69
|
|
|
68
70
|
# fail if more than one of any of the above
|
|
69
|
-
if regex_string =~ /^(#{normal_char}
|
|
71
|
+
if regex_string =~ /^(#{normal_char}|#{escape_sequence}|#{character_class_that_doesnt_contain_bracket}){2,}$/
|
|
70
72
|
return false
|
|
71
73
|
end
|
|
72
74
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ruby_grammar_builder
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.1.
|
|
4
|
+
version: 1.1.12
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Jeff Hykin
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2023-02
|
|
12
|
+
date: 2023-06-02 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: deep_clone
|