ruby_grammar_builder 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/lib/textmate_grammar/generated/grammar.rb +32 -0
  4. data/lib/textmate_grammar/generated/rule.rb +144 -0
  5. data/lib/textmate_grammar/grammar.rb +670 -0
  6. data/lib/textmate_grammar/grammar_plugin.rb +189 -0
  7. data/lib/textmate_grammar/import_patterns.rb +14 -0
  8. data/lib/textmate_grammar/linters/flat_includes.rb +32 -0
  9. data/lib/textmate_grammar/linters/includes_then_tag_as.rb +48 -0
  10. data/lib/textmate_grammar/linters/standard_naming.rb +226 -0
  11. data/lib/textmate_grammar/linters/start_match_empty.rb +49 -0
  12. data/lib/textmate_grammar/linters/tests.rb +19 -0
  13. data/lib/textmate_grammar/linters/unused_unresolved.rb +9 -0
  14. data/lib/textmate_grammar/pattern_extensions/look_ahead_for.rb +32 -0
  15. data/lib/textmate_grammar/pattern_extensions/look_ahead_to_avoid.rb +31 -0
  16. data/lib/textmate_grammar/pattern_extensions/look_behind_for.rb +31 -0
  17. data/lib/textmate_grammar/pattern_extensions/look_behind_to_avoid.rb +31 -0
  18. data/lib/textmate_grammar/pattern_extensions/lookaround_pattern.rb +169 -0
  19. data/lib/textmate_grammar/pattern_extensions/match_result_of.rb +67 -0
  20. data/lib/textmate_grammar/pattern_extensions/maybe.rb +50 -0
  21. data/lib/textmate_grammar/pattern_extensions/one_of.rb +107 -0
  22. data/lib/textmate_grammar/pattern_extensions/one_or_more_of.rb +42 -0
  23. data/lib/textmate_grammar/pattern_extensions/or_pattern.rb +55 -0
  24. data/lib/textmate_grammar/pattern_extensions/placeholder.rb +102 -0
  25. data/lib/textmate_grammar/pattern_extensions/recursively_match.rb +76 -0
  26. data/lib/textmate_grammar/pattern_extensions/zero_or_more_of.rb +50 -0
  27. data/lib/textmate_grammar/pattern_variations/base_pattern.rb +870 -0
  28. data/lib/textmate_grammar/pattern_variations/legacy_pattern.rb +61 -0
  29. data/lib/textmate_grammar/pattern_variations/pattern.rb +9 -0
  30. data/lib/textmate_grammar/pattern_variations/pattern_range.rb +233 -0
  31. data/lib/textmate_grammar/pattern_variations/repeatable_pattern.rb +204 -0
  32. data/lib/textmate_grammar/regex_operator.rb +182 -0
  33. data/lib/textmate_grammar/regex_operators/alternation.rb +24 -0
  34. data/lib/textmate_grammar/regex_operators/concat.rb +23 -0
  35. data/lib/textmate_grammar/stdlib/common.rb +20 -0
  36. data/lib/textmate_grammar/tokens.rb +110 -0
  37. data/lib/textmate_grammar/transforms/add_ending.rb +25 -0
  38. data/lib/textmate_grammar/transforms/bailout.rb +92 -0
  39. data/lib/textmate_grammar/transforms/fix_repeated_tag_as.rb +75 -0
  40. data/lib/textmate_grammar/transforms/resolve_placeholders.rb +121 -0
  41. data/lib/textmate_grammar/util.rb +198 -0
  42. data/lib/textmate_grammar.rb +4 -0
  43. metadata +85 -0
@@ -0,0 +1,182 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Split arr in two
5
+ # Walks arr from left to right and splits it on the first element that the
6
+ # block returns false
7
+ # this means that the block returned true for all lements in the left half
8
+ # and false for the first element of the right half
9
+ # the order of elements is not changed
10
+ #
11
+ # @param [Array] arr The array to break
12
+ #
13
+ # @yield [RegexOperator,String] the element to check
14
+ #
15
+ # @return [Array<(Array,Array)>] The two halfs
16
+ #
17
+ def break_left(arr)
18
+ left = arr.take_while do |elem|
19
+ next !(yield elem)
20
+ end
21
+ [left, arr[(left.length)..-1]]
22
+ end
23
+
24
+ #
25
+ # (@see break_left)
26
+ # Walks the array from right to left spliting where the block returns false
27
+ #
28
+ def break_right(arr)
29
+ right = arr.reverse.take_while do |elem|
30
+ next !(yield elem)
31
+ end.reverse
32
+ [arr[0..-(right.length+1)], right]
33
+ end
34
+
35
+ #
36
+ # RegexOperator is used to provide complicated combining behavior that is not possible
37
+ # to implement in PatternBase#do_evaluate_self
38
+ #
39
+ # Each PatternBase when evaluated produces a RegexOperator and a regexstring
40
+ # RegexOperator::evaluate takes that array and produces a single regexstring
41
+ #
42
+ class RegexOperator
43
+ # @return [number] The precedence of the operator, lower numbers are processed earlier
44
+ attr_accessor :precedence
45
+ # @return [:left, :right] is the operator left of right associative
46
+ # right associative is processed first
47
+ attr_accessor :association
48
+
49
+ #
50
+ # Evaluate the array of RegexStrings and RegexOperators
51
+ #
52
+ # @param [Array<String,RegexOperator>] arr the array to evaluate
53
+ #
54
+ # @return [String] The evaluated string
55
+ #
56
+ def self.evaluate(arr)
57
+ # grab all operators and sort by precedence and association
58
+ ops = arr.reject { |v| v.is_a?(String) }.sort do |a, b|
59
+ if a.precedence == b.precedence
60
+ next 0 if a.association == b.association
61
+ next -1 if a.association == :left
62
+
63
+ next 1
64
+ end
65
+
66
+ a.precedence - b.precedence
67
+ end
68
+
69
+ ops.each do |op|
70
+ # TODO: consolidate left and right paths
71
+ split = []
72
+ if op.association == :right
73
+ elems = break_right(arr) { |elem| elem == op }
74
+ next if elems[0].empty?
75
+
76
+ split = [elems[0][0..-2], elems[1]]
77
+ else
78
+ elems = break_left(arr) { |elem| elem == op }
79
+ next if elems[1].empty?
80
+
81
+ split = [elems[0], elems[1][1..-1]]
82
+ end
83
+ arr = op.do_evaluate_self(split[0], split[1])
84
+ end
85
+ if arr.length != 1
86
+ puts "evaluate did not result in a length of 1"
87
+ raise "see above error"
88
+ end
89
+
90
+ arr.first
91
+ end
92
+
93
+ #
94
+ # <Description>
95
+ #
96
+ # @param [Array<RegexOperator,String>] arr_left the parse array to the left of self
97
+ # @param [Array<RegexOperator,String>] arr_right the parse array to the right of self
98
+ #
99
+ # @abstract override to provide evaluate the operator
100
+ #
101
+ # @return [Array<RegexOperator,String>] the parse array as a result of evaluating self
102
+ #
103
+ # @note arr_left and arr_right contain the entire parse array use {#fold_left} and
104
+ # {#fold_right} to collect only the portions that this operator is responsible for
105
+ #
106
+ def do_evaluate_self(arr_left, arr_right) # rubocop:disable Lint/UnusedMethodArgument
107
+ raise NotImplementedError
108
+ end
109
+
110
+ #
111
+ # Compares for equality
112
+ #
113
+ # @param [RegexOperator] other the operator to compare to
114
+ #
115
+ # @return [Boolean] are they equal
116
+ #
117
+ def ==(other)
118
+ return false unless other.instance_of?(self.class)
119
+ return false unless @precedence == other.precedence
120
+ return false unless @association == other.association
121
+
122
+ true
123
+ end
124
+
125
+ #
126
+ # <Description>
127
+ #
128
+ # @param [Array<String,RegexOperator>] arr the array to fold
129
+ #
130
+ # @return [Array<(String,Array<String,RegexOperator>)>] the folded array and leftovers
131
+ # @note the leftover portion is not suitable for evaluation
132
+ # (it begins or ends with a RegexOperator or is an empty string)
133
+ #
134
+ def fold_left(arr)
135
+ # go left until:
136
+ # - the precedence of self is greater than the token being tested
137
+ # - the precedence is the same and the association of self is :left
138
+ fold = (arr.reverse.take_while do |t|
139
+ next true if t.is_a? String
140
+ next true if t.precedence > @precedence
141
+
142
+ next false if t.precedence < @precedence
143
+ next false if @association == :left
144
+
145
+ true
146
+ end).reverse
147
+
148
+ if fold.empty? || !fold[0].is_a?(String) || !fold[-1].is_a?(String)
149
+ puts "fold_left generated an invalid fold expression"
150
+ raise "see above error"
151
+ end
152
+
153
+ # [0..-(fold.length+1)] peels the elements that are a part of fold off the end
154
+ # of arr
155
+ [RegexOperator.evaluate(fold), arr[0..-(fold.length+1)]]
156
+ end
157
+
158
+ #
159
+ # (see #fold_left)
160
+ #
161
+ def fold_right(arr)
162
+ # go right until:
163
+ # - the precedence of self is greater than the token being tested
164
+ # - the precedence is the same and the association of self is :right
165
+ fold = arr.take_while do |t|
166
+ next true if t.is_a? String
167
+ next true if t.precedence > @precedence
168
+
169
+ next false if t.precedence < @precedence
170
+ next false if @association == :right
171
+
172
+ true
173
+ end
174
+
175
+ if fold.empty? || !fold[0].is_a?(String) || !fold[-1].is_a?(String)
176
+ puts "fold_right generated an invalid fold expression"
177
+ raise "see above error"
178
+ end
179
+
180
+ [RegexOperator.evaluate(fold), arr[(fold.length)..-1]]
181
+ end
182
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../regex_operator'
4
+
5
+ #
6
+ # Provides alternation as described by OrPattern
7
+ #
8
+ class AlternationOperator < RegexOperator
9
+ def initialize
10
+ @precedence = 1
11
+ @association = :right
12
+ end
13
+
14
+ # (see RegexOperator#do_evaluate_self)
15
+ def do_evaluate_self(arr_left, arr_right)
16
+ left = fold_left(arr_left)
17
+ # fold right is not applied as only the immediate right is a part of the alternation
18
+ # (?:#{foo}) is not needed as alternation has the lowest precedence (in regex)
19
+ # that could be generated (anything lower is required to be closed)
20
+ self_string = "(?:#{left[0]}|#{arr_right[0]})"
21
+
22
+ [left[1], self_string, arr_right[1..-1]].flatten
23
+ end
24
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../regex_operator'
4
+
5
+ #
6
+ # The standard RegexOperator, provides concatination
7
+ #
8
+ class ConcatOperator < RegexOperator
9
+ def initialize
10
+ @precedence = 2
11
+ @association = :left
12
+ end
13
+
14
+ # (see RegexOperator#do_evaluate_self)
15
+ def do_evaluate_self(arr_left, arr_right)
16
+ left = fold_left(arr_left)
17
+ right = fold_right(arr_right)
18
+
19
+ self_string = left[0]+right[0]
20
+
21
+ [left[1], self_string, right[1]].flatten
22
+ end
23
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ @space = Pattern.new(/\s/)
4
+ @spaces = oneOrMoreOf(@space)
5
+ @digit = Pattern.new(/\d/)
6
+ @digits = oneOrMoreOf(@digit)
7
+ @standard_character = Pattern.new(/\w/)
8
+ @word = oneOrMoreOf(@standard_character)
9
+ @word_boundary = Pattern.new(/\b/)
10
+ @white_space_start_boundary = lookBehindFor(/\s/).lookAheadFor(/\S/)
11
+ @white_space_end_boundary = lookBehindFor(/\S/).lookAheadFor(/\s/)
12
+ @start_of_document = Pattern.new(/\A/)
13
+ @end_of_document = Pattern.new(/\Z/)
14
+ @start_of_line = Pattern.new(/^/)
15
+ @end_of_line = oneOf(
16
+ [
17
+ Pattern.new(/\n/),
18
+ Pattern.new(/$/),
19
+ ],
20
+ )
@@ -0,0 +1,110 @@
1
+ require_relative "./pattern_variations/base_pattern"
2
+ require_relative "./pattern_extensions/placeholder"
3
+
4
+ # Take advantage of the placeholder system since this is just a dynamic form of a placeholder
5
+ class TokenPattern < PatternBase
6
+ end
7
+
8
+ class Grammar
9
+ #
10
+ # convert a regex value into a proc filter used to select patterns
11
+ #
12
+ # @param [Regexp] argument A value that uses the tokenParsing syntax (explained below)
13
+ #
14
+ # @note The syntax for tokenParsing is simple, there are:
15
+ # - `adjectives` ex: isAClass
16
+ # - the `not` operator ex: !isAClass
17
+ # - the `or` operator ex: isAClass || isAPrimitive
18
+ # - the `and` operator ex: isAClass && isAPrimitive
19
+ # - paraentheses ex: (!isAClass) && isAPrimitive
20
+ # _
21
+ # anything matching /[a-zA-Z0-9_]+/ is considered an "adjective"
22
+ # whitespace, including newlines, are removed/ignored
23
+ # all other characters are invalid
24
+ # _
25
+ # using only an adjective, ex: /isAClass/ means to only include
26
+ # Patterns that have that adjective in their adjective list
27
+ #
28
+ # @return [TokenPattern]
29
+ #
30
+ def tokenMatching(token_pattern)
31
+ # create the normal pattern that will act as a placeholder until the very end
32
+ token_pattern = TokenPattern.new({
33
+ match: /(?#tokens)/,
34
+ pattern_filter: parseTokenSyntax(token_pattern),
35
+ })
36
+ # tell it what it needs to select-later
37
+ return token_pattern
38
+ end
39
+
40
+ #
41
+ # convert a regex value into a proc filter used to select patterns
42
+ #
43
+ # @param [Regexp] argument A value that uses the tokenParsing syntax (explained below)
44
+ #
45
+ # @note The syntax for tokenParsing is simple, there are:
46
+ # - `adjectives` ex: isAClass
47
+ # - the `not` operator ex: !isAClass
48
+ # - the `or` operator ex: isAClass || isAPrimitive
49
+ # - the `and` operator ex: isAClass && isAPrimitive
50
+ # - paraentheses ex: (!isAClass) && isAPrimitive
51
+ # _
52
+ # anything matching /[a-zA-Z0-9_]+/ is considered an "adjective"
53
+ # whitespace, including newlines, are removed/ignored
54
+ # all other characters are invalid
55
+ # _
56
+ # using only an adjective, ex: /isAClass/ means to only include
57
+ # Patterns that have that adjective in their adjective list
58
+ #
59
+ # @return [proc] a function that accepts a Pattern as input, and returns
60
+ # a boolean of whether or not that pattern should
61
+ # be included
62
+ #
63
+
64
+ def parseTokenSyntax(argument)
65
+ # validate input type
66
+ if !argument.is_a?(Regexp)
67
+ raise <<~HEREDOC
68
+
69
+
70
+ Trying to call parseTokenSyntax() but the argument isn't Regexp its #{argument.class}
71
+ value: #{argument}
72
+ HEREDOC
73
+ end
74
+ # just remove the //'s from the string
75
+ regex_content = argument.inspect[1...-1]
76
+
77
+ # remove all invalid characters, make sure length didn't change
78
+ invalid_characters_removed = regex_content.gsub(/[^a-zA-Z0-9_&|\(\)! \n]/, "")
79
+ if invalid_characters_removed.length != regex_content.length
80
+ raise <<~HEREDOC
81
+
82
+
83
+ It appears the tokenSyntax #{argument.inspect} contains some invalid characters
84
+ with invalid characters: #{regex_content.inspect}
85
+ without invalid characters: #{invalid_characters_removed.inspect}
86
+ HEREDOC
87
+ end
88
+
89
+ # find broken syntax
90
+ if regex_content =~ /[a-zA-Z0-9_]+\s+[a-zA-Z0-9_]+/
91
+ raise <<~HEREDOC
92
+
93
+ Inside a tokenSyntax: #{argument.inspect}
94
+ this part of the syntax is invalid: #{$&.inspect}
95
+ (theres a space between two adjectives)
96
+ My guess is that it was half-edited
97
+ or an accidental space was added
98
+ HEREDOC
99
+ end
100
+
101
+ # convert all adjectives into inclusion checks
102
+ regex_content.gsub!(/\s+/," ")
103
+ regex_content.gsub!(/[a-zA-Z0-9_]+/, 'pattern.arguments[:adjectives].include?(:\0)')
104
+ # convert it into a proc
105
+ return ->(pattern) do
106
+ puts "regex_content is: #{regex_content} "
107
+ eval(regex_content) if pattern.is_a?(PatternBase) && pattern.arguments[:adjectives].is_a?(Array)
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Adds the last portion of the scope name to each tag_as if not already present
5
+ #
6
+ class AddEnding < GrammarTransform
7
+ #
8
+ # adds the ending to any tag_as in pattern if needed
9
+ #
10
+ def pre_transform(pattern, options)
11
+ return pattern.map { |v| pre_transform(v, options) } if pattern.is_a? Array
12
+ return pattern unless pattern.is_a? PatternBase
13
+
14
+ ending = options[:grammar].scope_name.split(".")[-1]
15
+ pattern.transform_tag_as do |tag_as|
16
+ tag_as.split(" ").map do |tag|
17
+ next tag if tag.end_with?(ending)
18
+
19
+ tag + "." + ending
20
+ end.join(" ")
21
+ end
22
+ end
23
+ end
24
+
25
+ Grammar.register_transform(AddEnding.new)
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ class BailoutTransform < GrammarTransform
4
+ def initialize(prefix, pattern)
5
+ @prefix = prefix
6
+ @end_bailout = lookAheadFor(pattern).to_r
7
+ @while_bailout = Pattern.new(/^/).maybe(/\s+/).lookAheadToAvoid(pattern).to_r
8
+ end
9
+
10
+ def rewrite_rule(non_duplicate, rule)
11
+ return rule if rule["match"]
12
+
13
+ if rule["includes"] && !non_duplicate.include?(rule["includes"])
14
+ rule["includes"] = "##{@prefix}_#{rule['includes'][1..-1]}"
15
+ end
16
+
17
+ rule["end"] = "#{rule['end']}|#{@end_bailout}" if rule["end"]
18
+ rule["while"] = "#{rule['while']}|(?:#{@while_bailout})" if rule["while"]
19
+
20
+ rule["patterns"]&.map! { |pat| rewrite_rule(non_duplicate, pat) }
21
+
22
+ if rule[:repository]
23
+ rule[:repository] = Hash[
24
+ rule[:repository].map do |key, pat|
25
+ next [key, pat] if non_duplicate.include? key
26
+
27
+ [
28
+ "#{@prefix}_#{key}",
29
+ rewrite_rule(non_duplicate, pat),
30
+ ]
31
+ end
32
+ ]
33
+ end
34
+
35
+ rule["captures"]&.transform_values { |pat| rewrite_rule(non_duplicate, pat) }
36
+ rule["beginCaptures"]&.transform_values { |pat| rewrite_rule(non_duplicate, pat) }
37
+ rule["endCaptures"]&.transform_values { |pat| rewrite_rule(non_duplicate, pat) }
38
+ rule["whileCaptures"]&.transform_values { |pat| rewrite_rule(non_duplicate, pat) }
39
+
40
+ rule
41
+ end
42
+
43
+ def collect_non_duplicate(rule, repository_name = nil)
44
+ if rule["match"]
45
+ return [repository_name] if repository_name
46
+
47
+ return []
48
+ end
49
+
50
+ if rule["patterns"]
51
+ if rule.length == 1
52
+ non_duplicate = rule["patterns"].reduce([]) do |memo, pat|
53
+ next memo if memo.nil?
54
+
55
+ non_duplicate_nested = collect_non_duplicate(pat)
56
+ next nil if non_duplicate_nested.nil?
57
+
58
+ next memo.concat(non_duplicate_nested)
59
+ end
60
+
61
+ unless non_duplicate.nil?
62
+ return [repository_name] if repository_name
63
+
64
+ return []
65
+ end
66
+ end
67
+ end
68
+
69
+ if rule[:repository]
70
+ return rule[:repository].keys.reduce([]) do |memo, key|
71
+ non_duplicate = collect_non_duplicate(rule[:repository][key], key)
72
+ memo.concat(non_duplicate) if non_duplicate
73
+
74
+ next memo
75
+ end
76
+ end
77
+ nil
78
+ end
79
+
80
+ def post_transform(grammar_hash)
81
+ non_duplicate = collect_non_duplicate(grammar_hash)
82
+ pp non_duplicate
83
+ duplicate = rewrite_rule(
84
+ (non_duplicate.nil? ? [] : non_duplicate),
85
+ grammar_hash.__deep_clone__,
86
+ )
87
+
88
+ pp grammar_hash[:repository]
89
+
90
+ grammar_hash.__deep_clone__.merge(duplicate)
91
+ end
92
+ end