textmate_grammar 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/lib/textmate_grammar/generated/grammar.rb +32 -0
  4. data/lib/textmate_grammar/generated/rule.rb +144 -0
  5. data/lib/textmate_grammar/grammar.rb +670 -0
  6. data/lib/textmate_grammar/grammar_plugin.rb +189 -0
  7. data/lib/textmate_grammar/import_patterns.rb +14 -0
  8. data/lib/textmate_grammar/linters/flat_includes.rb +32 -0
  9. data/lib/textmate_grammar/linters/includes_then_tag_as.rb +48 -0
  10. data/lib/textmate_grammar/linters/standard_naming.rb +226 -0
  11. data/lib/textmate_grammar/linters/start_match_empty.rb +49 -0
  12. data/lib/textmate_grammar/linters/tests.rb +19 -0
  13. data/lib/textmate_grammar/linters/unused_unresolved.rb +9 -0
  14. data/lib/textmate_grammar/pattern_extensions/look_ahead_for.rb +32 -0
  15. data/lib/textmate_grammar/pattern_extensions/look_ahead_to_avoid.rb +31 -0
  16. data/lib/textmate_grammar/pattern_extensions/look_behind_for.rb +31 -0
  17. data/lib/textmate_grammar/pattern_extensions/look_behind_to_avoid.rb +31 -0
  18. data/lib/textmate_grammar/pattern_extensions/lookaround_pattern.rb +169 -0
  19. data/lib/textmate_grammar/pattern_extensions/match_result_of.rb +67 -0
  20. data/lib/textmate_grammar/pattern_extensions/maybe.rb +50 -0
  21. data/lib/textmate_grammar/pattern_extensions/one_of.rb +107 -0
  22. data/lib/textmate_grammar/pattern_extensions/one_or_more_of.rb +42 -0
  23. data/lib/textmate_grammar/pattern_extensions/or_pattern.rb +55 -0
  24. data/lib/textmate_grammar/pattern_extensions/placeholder.rb +102 -0
  25. data/lib/textmate_grammar/pattern_extensions/recursively_match.rb +76 -0
  26. data/lib/textmate_grammar/pattern_extensions/zero_or_more_of.rb +50 -0
  27. data/lib/textmate_grammar/pattern_variations/base_pattern.rb +870 -0
  28. data/lib/textmate_grammar/pattern_variations/legacy_pattern.rb +61 -0
  29. data/lib/textmate_grammar/pattern_variations/pattern.rb +9 -0
  30. data/lib/textmate_grammar/pattern_variations/pattern_range.rb +233 -0
  31. data/lib/textmate_grammar/pattern_variations/repeatable_pattern.rb +204 -0
  32. data/lib/textmate_grammar/regex_operator.rb +182 -0
  33. data/lib/textmate_grammar/regex_operators/alternation.rb +24 -0
  34. data/lib/textmate_grammar/regex_operators/concat.rb +23 -0
  35. data/lib/textmate_grammar/stdlib/common.rb +20 -0
  36. data/lib/textmate_grammar/tokens.rb +110 -0
  37. data/lib/textmate_grammar/transforms/add_ending.rb +25 -0
  38. data/lib/textmate_grammar/transforms/bailout.rb +92 -0
  39. data/lib/textmate_grammar/transforms/fix_repeated_tag_as.rb +75 -0
  40. data/lib/textmate_grammar/transforms/resolve_placeholders.rb +121 -0
  41. data/lib/textmate_grammar/util.rb +198 -0
  42. data/lib/textmate_grammar.rb +4 -0
  43. metadata +85 -0
@@ -0,0 +1,182 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Split arr in two
5
+ # Walks arr from left to right and splits it on the first element that the
6
+ # block returns false
7
+ # this means that the block returned true for all lements in the left half
8
+ # and false for the first element of the right half
9
+ # the order of elements is not changed
10
+ #
11
+ # @param [Array] arr The array to break
12
+ #
13
+ # @yield [RegexOperator,String] the element to check
14
+ #
15
+ # @return [Array<(Array,Array)>] The two halfs
16
+ #
17
+ def break_left(arr)
18
+ left = arr.take_while do |elem|
19
+ next !(yield elem)
20
+ end
21
+ [left, arr[(left.length)..-1]]
22
+ end
23
+
24
+ #
25
+ # (@see break_left)
26
+ # Walks the array from right to left spliting where the block returns false
27
+ #
28
+ def break_right(arr)
29
+ right = arr.reverse.take_while do |elem|
30
+ next !(yield elem)
31
+ end.reverse
32
+ [arr[0..-(right.length+1)], right]
33
+ end
34
+
35
+ #
36
+ # RegexOperator is used to provide complicated combining behavior that is not possible
37
+ # to implement in PatternBase#do_evaluate_self
38
+ #
39
+ # Each PatternBase when evaluated produces a RegexOperator and a regexstring
40
+ # RegexOperator::evaluate takes that array and produces a single regexstring
41
+ #
42
+ class RegexOperator
43
+ # @return [number] The precedence of the operator, lower numbers are processed earlier
44
+ attr_accessor :precedence
45
+ # @return [:left, :right] is the operator left of right associative
46
+ # right associative is processed first
47
+ attr_accessor :association
48
+
49
+ #
50
+ # Evaluate the array of RegexStrings and RegexOperators
51
+ #
52
+ # @param [Array<String,RegexOperator>] arr the array to evaluate
53
+ #
54
+ # @return [String] The evaluated string
55
+ #
56
+ def self.evaluate(arr)
57
+ # grab all operators and sort by precedence and association
58
+ ops = arr.reject { |v| v.is_a?(String) }.sort do |a, b|
59
+ if a.precedence == b.precedence
60
+ next 0 if a.association == b.association
61
+ next -1 if a.association == :left
62
+
63
+ next 1
64
+ end
65
+
66
+ a.precedence - b.precedence
67
+ end
68
+
69
+ ops.each do |op|
70
+ # TODO: consolidate left and right paths
71
+ split = []
72
+ if op.association == :right
73
+ elems = break_right(arr) { |elem| elem == op }
74
+ next if elems[0].empty?
75
+
76
+ split = [elems[0][0..-2], elems[1]]
77
+ else
78
+ elems = break_left(arr) { |elem| elem == op }
79
+ next if elems[1].empty?
80
+
81
+ split = [elems[0], elems[1][1..-1]]
82
+ end
83
+ arr = op.do_evaluate_self(split[0], split[1])
84
+ end
85
+ if arr.length != 1
86
+ puts "evaluate did not result in a length of 1"
87
+ raise "see above error"
88
+ end
89
+
90
+ arr.first
91
+ end
92
+
93
+ #
94
+ # <Description>
95
+ #
96
+ # @param [Array<RegexOperator,String>] arr_left the parse array to the left of self
97
+ # @param [Array<RegexOperator,String>] arr_right the parse array to the right of self
98
+ #
99
+ # @abstract override to provide evaluate the operator
100
+ #
101
+ # @return [Array<RegexOperator,String>] the parse array as a result of evaluating self
102
+ #
103
+ # @note arr_left and arr_right contain the entire parse array use {#fold_left} and
104
+ # {#fold_right} to collect only the portions that this operator is responsible for
105
+ #
106
+ def do_evaluate_self(arr_left, arr_right) # rubocop:disable Lint/UnusedMethodArgument
107
+ raise NotImplementedError
108
+ end
109
+
110
+ #
111
+ # Compares for equality
112
+ #
113
+ # @param [RegexOperator] other the operator to compare to
114
+ #
115
+ # @return [Boolean] are they equal
116
+ #
117
+ def ==(other)
118
+ return false unless other.instance_of?(self.class)
119
+ return false unless @precedence == other.precedence
120
+ return false unless @association == other.association
121
+
122
+ true
123
+ end
124
+
125
+ #
126
+ # <Description>
127
+ #
128
+ # @param [Array<String,RegexOperator>] arr the array to fold
129
+ #
130
+ # @return [Array<(String,Array<String,RegexOperator>)>] the folded array and leftovers
131
+ # @note the leftover portion is not suitable for evaluation
132
+ # (it begins or ends with a RegexOperator or is an empty string)
133
+ #
134
+ def fold_left(arr)
135
+ # go left until:
136
+ # - the precedence of self is greater than the token being tested
137
+ # - the precedence is the same and the association of self is :left
138
+ fold = (arr.reverse.take_while do |t|
139
+ next true if t.is_a? String
140
+ next true if t.precedence > @precedence
141
+
142
+ next false if t.precedence < @precedence
143
+ next false if @association == :left
144
+
145
+ true
146
+ end).reverse
147
+
148
+ if fold.empty? || !fold[0].is_a?(String) || !fold[-1].is_a?(String)
149
+ puts "fold_left generated an invalid fold expression"
150
+ raise "see above error"
151
+ end
152
+
153
+ # [0..-(fold.length+1)] peels the elements that are a part of fold off the end
154
+ # of arr
155
+ [RegexOperator.evaluate(fold), arr[0..-(fold.length+1)]]
156
+ end
157
+
158
+ #
159
+ # (see #fold_left)
160
+ #
161
+ def fold_right(arr)
162
+ # go right until:
163
+ # - the precedence of self is greater than the token being tested
164
+ # - the precedence is the same and the association of self is :right
165
+ fold = arr.take_while do |t|
166
+ next true if t.is_a? String
167
+ next true if t.precedence > @precedence
168
+
169
+ next false if t.precedence < @precedence
170
+ next false if @association == :right
171
+
172
+ true
173
+ end
174
+
175
+ if fold.empty? || !fold[0].is_a?(String) || !fold[-1].is_a?(String)
176
+ puts "fold_right generated an invalid fold expression"
177
+ raise "see above error"
178
+ end
179
+
180
+ [RegexOperator.evaluate(fold), arr[(fold.length)..-1]]
181
+ end
182
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../regex_operator'
4
+
5
+ #
6
+ # Provides alternation as described by OrPattern
7
+ #
8
+ class AlternationOperator < RegexOperator
9
+ def initialize
10
+ @precedence = 1
11
+ @association = :right
12
+ end
13
+
14
+ # (see RegexOperator#do_evaluate_self)
15
+ def do_evaluate_self(arr_left, arr_right)
16
+ left = fold_left(arr_left)
17
+ # fold right is not applied as only the immediate right is a part of the alternation
18
+ # (?:#{foo}) is not needed as alternation has the lowest precedence (in regex)
19
+ # that could be generated (anything lower is required to be closed)
20
+ self_string = "(?:#{left[0]}|#{arr_right[0]})"
21
+
22
+ [left[1], self_string, arr_right[1..-1]].flatten
23
+ end
24
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../regex_operator'
4
+
5
+ #
6
+ # The standard RegexOperator, provides concatination
7
+ #
8
+ class ConcatOperator < RegexOperator
9
+ def initialize
10
+ @precedence = 2
11
+ @association = :left
12
+ end
13
+
14
+ # (see RegexOperator#do_evaluate_self)
15
+ def do_evaluate_self(arr_left, arr_right)
16
+ left = fold_left(arr_left)
17
+ right = fold_right(arr_right)
18
+
19
+ self_string = left[0]+right[0]
20
+
21
+ [left[1], self_string, right[1]].flatten
22
+ end
23
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ @space = Pattern.new(/\s/)
4
+ @spaces = oneOrMoreOf(@space)
5
+ @digit = Pattern.new(/\d/)
6
+ @digits = oneOrMoreOf(@digit)
7
+ @standard_character = Pattern.new(/\w/)
8
+ @word = oneOrMoreOf(@standard_character)
9
+ @word_boundary = Pattern.new(/\b/)
10
+ @white_space_start_boundary = lookBehindFor(/\s/).lookAheadFor(/\S/)
11
+ @white_space_end_boundary = lookBehindFor(/\S/).lookAheadFor(/\s/)
12
+ @start_of_document = Pattern.new(/\A/)
13
+ @end_of_document = Pattern.new(/\Z/)
14
+ @start_of_line = Pattern.new(/^/)
15
+ @end_of_line = oneOf(
16
+ [
17
+ Pattern.new(/\n/),
18
+ Pattern.new(/$/),
19
+ ],
20
+ )
@@ -0,0 +1,110 @@
1
+ require_relative "./pattern_variations/base_pattern"
2
+ require_relative "./pattern_extensions/placeholder"
3
+
4
+ # Take advantage of the placeholder system since this is just a dynamic form of a placeholder
5
+ class TokenPattern < PatternBase
6
+ end
7
+
8
+ class Grammar
9
+ #
10
+ # convert a regex value into a proc filter used to select patterns
11
+ #
12
+ # @param [Regexp] argument A value that uses the tokenParsing syntax (explained below)
13
+ #
14
+ # @note The syntax for tokenParsing is simple, there are:
15
+ # - `adjectives` ex: isAClass
16
+ # - the `not` operator ex: !isAClass
17
+ # - the `or` operator ex: isAClass || isAPrimitive
18
+ # - the `and` operator ex: isAClass && isAPrimitive
19
+ # - paraentheses ex: (!isAClass) && isAPrimitive
20
+ # _
21
+ # anything matching /[a-zA-Z0-9_]+/ is considered an "adjective"
22
+ # whitespace, including newlines, are removed/ignored
23
+ # all other characters are invalid
24
+ # _
25
+ # using only an adjective, ex: /isAClass/ means to only include
26
+ # Patterns that have that adjective in their adjective list
27
+ #
28
+ # @return [TokenPattern]
29
+ #
30
+ def tokenMatching(token_pattern)
31
+ # create the normal pattern that will act as a placeholder until the very end
32
+ token_pattern = TokenPattern.new({
33
+ match: /(?#tokens)/,
34
+ pattern_filter: parseTokenSyntax(token_pattern),
35
+ })
36
+ # tell it what it needs to select-later
37
+ return token_pattern
38
+ end
39
+
40
+ #
41
+ # convert a regex value into a proc filter used to select patterns
42
+ #
43
+ # @param [Regexp] argument A value that uses the tokenParsing syntax (explained below)
44
+ #
45
+ # @note The syntax for tokenParsing is simple, there are:
46
+ # - `adjectives` ex: isAClass
47
+ # - the `not` operator ex: !isAClass
48
+ # - the `or` operator ex: isAClass || isAPrimitive
49
+ # - the `and` operator ex: isAClass && isAPrimitive
50
+ # - paraentheses ex: (!isAClass) && isAPrimitive
51
+ # _
52
+ # anything matching /[a-zA-Z0-9_]+/ is considered an "adjective"
53
+ # whitespace, including newlines, are removed/ignored
54
+ # all other characters are invalid
55
+ # _
56
+ # using only an adjective, ex: /isAClass/ means to only include
57
+ # Patterns that have that adjective in their adjective list
58
+ #
59
+ # @return [proc] a function that accepts a Pattern as input, and returns
60
+ # a boolean of whether or not that pattern should
61
+ # be included
62
+ #
63
+
64
+ def parseTokenSyntax(argument)
65
+ # validate input type
66
+ if !argument.is_a?(Regexp)
67
+ raise <<~HEREDOC
68
+
69
+
70
+ Trying to call parseTokenSyntax() but the argument isn't Regexp its #{argument.class}
71
+ value: #{argument}
72
+ HEREDOC
73
+ end
74
+ # just remove the //'s from the string
75
+ regex_content = argument.inspect[1...-1]
76
+
77
+ # remove all invalid characters, make sure length didn't change
78
+ invalid_characters_removed = regex_content.gsub(/[^a-zA-Z0-9_&|\(\)! \n]/, "")
79
+ if invalid_characters_removed.length != regex_content.length
80
+ raise <<~HEREDOC
81
+
82
+
83
+ It appears the tokenSyntax #{argument.inspect} contains some invalid characters
84
+ with invalid characters: #{regex_content.inspect}
85
+ without invalid characters: #{invalid_characters_removed.inspect}
86
+ HEREDOC
87
+ end
88
+
89
+ # find broken syntax
90
+ if regex_content =~ /[a-zA-Z0-9_]+\s+[a-zA-Z0-9_]+/
91
+ raise <<~HEREDOC
92
+
93
+ Inside a tokenSyntax: #{argument.inspect}
94
+ this part of the syntax is invalid: #{$&.inspect}
95
+ (theres a space between two adjectives)
96
+ My guess is that it was half-edited
97
+ or an accidental space was added
98
+ HEREDOC
99
+ end
100
+
101
+ # convert all adjectives into inclusion checks
102
+ regex_content.gsub!(/\s+/," ")
103
+ regex_content.gsub!(/[a-zA-Z0-9_]+/, 'pattern.arguments[:adjectives].include?(:\0)')
104
+ # convert it into a proc
105
+ return ->(pattern) do
106
+ puts "regex_content is: #{regex_content} "
107
+ eval(regex_content) if pattern.is_a?(PatternBase) && pattern.arguments[:adjectives].is_a?(Array)
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Adds the last portion of the scope name to each tag_as if not already present
5
+ #
6
+ class AddEnding < GrammarTransform
7
+ #
8
+ # adds the ending to any tag_as in pattern if needed
9
+ #
10
+ def pre_transform(pattern, options)
11
+ return pattern.map { |v| pre_transform(v, options) } if pattern.is_a? Array
12
+ return pattern unless pattern.is_a? PatternBase
13
+
14
+ ending = options[:grammar].scope_name.split(".")[-1]
15
+ pattern.transform_tag_as do |tag_as|
16
+ tag_as.split(" ").map do |tag|
17
+ next tag if tag.end_with?(ending)
18
+
19
+ tag + "." + ending
20
+ end.join(" ")
21
+ end
22
+ end
23
+ end
24
+
25
+ Grammar.register_transform(AddEnding.new)
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ class BailoutTransform < GrammarTransform
4
+ def initialize(prefix, pattern)
5
+ @prefix = prefix
6
+ @end_bailout = lookAheadFor(pattern).to_r
7
+ @while_bailout = Pattern.new(/^/).maybe(/\s+/).lookAheadToAvoid(pattern).to_r
8
+ end
9
+
10
+ def rewrite_rule(non_duplicate, rule)
11
+ return rule if rule["match"]
12
+
13
+ if rule["includes"] && !non_duplicate.include?(rule["includes"])
14
+ rule["includes"] = "##{@prefix}_#{rule['includes'][1..-1]}"
15
+ end
16
+
17
+ rule["end"] = "#{rule['end']}|#{@end_bailout}" if rule["end"]
18
+ rule["while"] = "#{rule['while']}|(?:#{@while_bailout})" if rule["while"]
19
+
20
+ rule["patterns"]&.map! { |pat| rewrite_rule(non_duplicate, pat) }
21
+
22
+ if rule[:repository]
23
+ rule[:repository] = Hash[
24
+ rule[:repository].map do |key, pat|
25
+ next [key, pat] if non_duplicate.include? key
26
+
27
+ [
28
+ "#{@prefix}_#{key}",
29
+ rewrite_rule(non_duplicate, pat),
30
+ ]
31
+ end
32
+ ]
33
+ end
34
+
35
+ rule["captures"]&.transform_values { |pat| rewrite_rule(non_duplicate, pat) }
36
+ rule["beginCaptures"]&.transform_values { |pat| rewrite_rule(non_duplicate, pat) }
37
+ rule["endCaptures"]&.transform_values { |pat| rewrite_rule(non_duplicate, pat) }
38
+ rule["whileCaptures"]&.transform_values { |pat| rewrite_rule(non_duplicate, pat) }
39
+
40
+ rule
41
+ end
42
+
43
+ def collect_non_duplicate(rule, repository_name = nil)
44
+ if rule["match"]
45
+ return [repository_name] if repository_name
46
+
47
+ return []
48
+ end
49
+
50
+ if rule["patterns"]
51
+ if rule.length == 1
52
+ non_duplicate = rule["patterns"].reduce([]) do |memo, pat|
53
+ next memo if memo.nil?
54
+
55
+ non_duplicate_nested = collect_non_duplicate(pat)
56
+ next nil if non_duplicate_nested.nil?
57
+
58
+ next memo.concat(non_duplicate_nested)
59
+ end
60
+
61
+ unless non_duplicate.nil?
62
+ return [repository_name] if repository_name
63
+
64
+ return []
65
+ end
66
+ end
67
+ end
68
+
69
+ if rule[:repository]
70
+ return rule[:repository].keys.reduce([]) do |memo, key|
71
+ non_duplicate = collect_non_duplicate(rule[:repository][key], key)
72
+ memo.concat(non_duplicate) if non_duplicate
73
+
74
+ next memo
75
+ end
76
+ end
77
+ nil
78
+ end
79
+
80
+ def post_transform(grammar_hash)
81
+ non_duplicate = collect_non_duplicate(grammar_hash)
82
+ pp non_duplicate
83
+ duplicate = rewrite_rule(
84
+ (non_duplicate.nil? ? [] : non_duplicate),
85
+ grammar_hash.__deep_clone__,
86
+ )
87
+
88
+ pp grammar_hash[:repository]
89
+
90
+ grammar_hash.__deep_clone__.merge(duplicate)
91
+ end
92
+ end