RubyGems - ruby_grammar_builder - Versions diffs - 0.0.1 - Mend

ruby_grammar_builder 0.0.1

Files changed (43) hide show

checksums.yaml +7 -0
data/LICENSE +21 -0
data/lib/textmate_grammar/generated/grammar.rb +32 -0
data/lib/textmate_grammar/generated/rule.rb +144 -0
data/lib/textmate_grammar/grammar.rb +670 -0
data/lib/textmate_grammar/grammar_plugin.rb +189 -0
data/lib/textmate_grammar/import_patterns.rb +14 -0
data/lib/textmate_grammar/linters/flat_includes.rb +32 -0
data/lib/textmate_grammar/linters/includes_then_tag_as.rb +48 -0
data/lib/textmate_grammar/linters/standard_naming.rb +226 -0
data/lib/textmate_grammar/linters/start_match_empty.rb +49 -0
data/lib/textmate_grammar/linters/tests.rb +19 -0
data/lib/textmate_grammar/linters/unused_unresolved.rb +9 -0
data/lib/textmate_grammar/pattern_extensions/look_ahead_for.rb +32 -0
data/lib/textmate_grammar/pattern_extensions/look_ahead_to_avoid.rb +31 -0
data/lib/textmate_grammar/pattern_extensions/look_behind_for.rb +31 -0
data/lib/textmate_grammar/pattern_extensions/look_behind_to_avoid.rb +31 -0
data/lib/textmate_grammar/pattern_extensions/lookaround_pattern.rb +169 -0
data/lib/textmate_grammar/pattern_extensions/match_result_of.rb +67 -0
data/lib/textmate_grammar/pattern_extensions/maybe.rb +50 -0
data/lib/textmate_grammar/pattern_extensions/one_of.rb +107 -0
data/lib/textmate_grammar/pattern_extensions/one_or_more_of.rb +42 -0
data/lib/textmate_grammar/pattern_extensions/or_pattern.rb +55 -0
data/lib/textmate_grammar/pattern_extensions/placeholder.rb +102 -0
data/lib/textmate_grammar/pattern_extensions/recursively_match.rb +76 -0
data/lib/textmate_grammar/pattern_extensions/zero_or_more_of.rb +50 -0
data/lib/textmate_grammar/pattern_variations/base_pattern.rb +870 -0
data/lib/textmate_grammar/pattern_variations/legacy_pattern.rb +61 -0
data/lib/textmate_grammar/pattern_variations/pattern.rb +9 -0
data/lib/textmate_grammar/pattern_variations/pattern_range.rb +233 -0
data/lib/textmate_grammar/pattern_variations/repeatable_pattern.rb +204 -0
data/lib/textmate_grammar/regex_operator.rb +182 -0
data/lib/textmate_grammar/regex_operators/alternation.rb +24 -0
data/lib/textmate_grammar/regex_operators/concat.rb +23 -0
data/lib/textmate_grammar/stdlib/common.rb +20 -0
data/lib/textmate_grammar/tokens.rb +110 -0
data/lib/textmate_grammar/transforms/add_ending.rb +25 -0
data/lib/textmate_grammar/transforms/bailout.rb +92 -0
data/lib/textmate_grammar/transforms/fix_repeated_tag_as.rb +75 -0
data/lib/textmate_grammar/transforms/resolve_placeholders.rb +121 -0
data/lib/textmate_grammar/util.rb +198 -0
data/lib/textmate_grammar.rb +4 -0
metadata +85 -0

data/lib/textmate_grammar/regex_operator.rb ADDED Viewed

@@ -0,0 +1,182 @@
+# frozen_string_literal: true
+#
+# Split arr in two
+# Walks arr from left to right and splits it on the first element that the
+# block returns false
+# this means that the block returned true for all lements in the left half
+# and false for the first element of the right half
+# the order of elements is not changed
+#
+# @param [Array] arr The array to break
+#
+# @yield [RegexOperator,String] the element to check
+#
+# @return [Array<(Array,Array)>] The two halfs
+#
+def break_left(arr)
+    left = arr.take_while do |elem|
+        next !(yield elem)
+    end
+    [left, arr[(left.length)..-1]]
+end
+#
+# (@see break_left)
+# Walks the array from right to left spliting where the block returns false
+#
+def break_right(arr)
+    right = arr.reverse.take_while do |elem|
+        next !(yield elem)
+    end.reverse
+    [arr[0..-(right.length+1)], right]
+end
+#
+# RegexOperator is used to provide complicated combining behavior that is not possible
+# to implement in PatternBase#do_evaluate_self
+#
+# Each PatternBase when evaluated produces a RegexOperator and a regexstring
+# RegexOperator::evaluate takes that array and produces a single regexstring
+#
+class RegexOperator
+    # @return [number] The precedence of the operator, lower numbers are processed earlier
+    attr_accessor :precedence
+    # @return [:left, :right] is the operator left of right associative
+    # right associative is processed first
+    attr_accessor :association
+    #
+    # Evaluate the array of RegexStrings and RegexOperators
+    #
+    # @param [Array<String,RegexOperator>] arr the array to evaluate
+    #
+    # @return [String] The evaluated string
+    #
+    def self.evaluate(arr)
+        # grab all operators and sort by precedence and association
+        ops = arr.reject { |v| v.is_a?(String) }.sort do |a, b|
+            if a.precedence == b.precedence
+                next 0 if a.association == b.association
+                next -1 if a.association == :left
+                next 1
+            end
+            a.precedence - b.precedence
+        end
+        ops.each do |op|
+            # TODO: consolidate left and right paths
+            split = []
+            if op.association == :right
+                elems = break_right(arr) { |elem| elem == op }
+                next if elems[0].empty?
+                split = [elems[0][0..-2], elems[1]]
+            else
+                elems = break_left(arr) { |elem| elem == op }
+                next if elems[1].empty?
+                split = [elems[0], elems[1][1..-1]]
+            end
+            arr = op.do_evaluate_self(split[0], split[1])
+        end
+        if arr.length != 1
+            puts "evaluate did not result in a length of 1"
+            raise "see above error"
+        end
+        arr.first
+    end
+    #
+    # <Description>
+    #
+    # @param [Array<RegexOperator,String>] arr_left the parse array to the left of self
+    # @param [Array<RegexOperator,String>] arr_right the parse array to the right of self
+    #
+    # @abstract override to provide evaluate the operator
+    #
+    # @return [Array<RegexOperator,String>] the parse array as a result of evaluating self
+    #
+    # @note arr_left and arr_right contain the entire parse array use {#fold_left} and
+    #  {#fold_right} to collect only the portions that this operator is responsible for
+    #
+    def do_evaluate_self(arr_left, arr_right) # rubocop:disable Lint/UnusedMethodArgument
+        raise NotImplementedError
+    end
+    #
+    # Compares for equality
+    #
+    # @param [RegexOperator] other the operator to compare to
+    #
+    # @return [Boolean] are they equal
+    #
+    def ==(other)
+        return false unless other.instance_of?(self.class)
+        return false unless @precedence == other.precedence
+        return false unless @association == other.association
+        true
+    end
+    #
+    # <Description>
+    #
+    # @param [Array<String,RegexOperator>] arr the array to fold
+    #
+    # @return [Array<(String,Array<String,RegexOperator>)>] the folded array and leftovers
+    # @note the leftover portion is not suitable for evaluation
+    #   (it begins or ends with a RegexOperator or is an empty string)
+    #
+    def fold_left(arr)
+        # go left until:
+        #  - the precedence of self is greater than the token being tested
+        #  - the precedence is the same and the association of self is :left
+        fold = (arr.reverse.take_while do |t|
+            next true if t.is_a? String
+            next true if t.precedence > @precedence
+            next false if t.precedence < @precedence
+            next false if @association == :left
+            true
+        end).reverse
+        if fold.empty? || !fold[0].is_a?(String) || !fold[-1].is_a?(String)
+            puts "fold_left generated an invalid fold expression"
+            raise "see above error"
+        end
+        # [0..-(fold.length+1)] peels the elements that are a part of fold off the end
+        # of arr
+        [RegexOperator.evaluate(fold), arr[0..-(fold.length+1)]]
+    end
+    #
+    # (see #fold_left)
+    #
+    def fold_right(arr)
+        # go right until:
+        #  - the precedence of self is greater than the token being tested
+        #  - the precedence is the same and the association of self is :right
+        fold = arr.take_while do |t|
+            next true if t.is_a? String
+            next true if t.precedence > @precedence
+            next false if t.precedence < @precedence
+            next false if @association == :right
+            true
+        end
+        if fold.empty? || !fold[0].is_a?(String) || !fold[-1].is_a?(String)
+            puts "fold_right generated an invalid fold expression"
+            raise "see above error"
+        end
+        [RegexOperator.evaluate(fold), arr[(fold.length)..-1]]
+    end
+end

data/lib/textmate_grammar/regex_operators/alternation.rb ADDED Viewed

@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+require_relative '../regex_operator'
+#
+# Provides alternation as described by OrPattern
+#
+class AlternationOperator < RegexOperator
+    def initialize
+        @precedence = 1
+        @association = :right
+    end
+    # (see RegexOperator#do_evaluate_self)
+    def do_evaluate_self(arr_left, arr_right)
+        left = fold_left(arr_left)
+        # fold right is not applied as only the immediate right is a part of the alternation
+        # (?:#{foo}) is not needed as alternation has the lowest precedence (in regex)
+        # that could be generated (anything lower is required to be closed)
+        self_string = "(?:#{left[0]}|#{arr_right[0]})"
+        [left[1], self_string, arr_right[1..-1]].flatten
+    end
+end

data/lib/textmate_grammar/regex_operators/concat.rb ADDED Viewed

@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+require_relative '../regex_operator'
+#
+# The standard RegexOperator, provides concatination
+#
+class ConcatOperator < RegexOperator
+    def initialize
+        @precedence = 2
+        @association = :left
+    end
+    # (see RegexOperator#do_evaluate_self)
+    def do_evaluate_self(arr_left, arr_right)
+        left = fold_left(arr_left)
+        right = fold_right(arr_right)
+        self_string = left[0]+right[0]
+        [left[1], self_string, right[1]].flatten
+    end
+end

data/lib/textmate_grammar/stdlib/common.rb ADDED Viewed

@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+@space = Pattern.new(/\s/)
+@spaces = oneOrMoreOf(@space)
+@digit = Pattern.new(/\d/)
+@digits = oneOrMoreOf(@digit)
+@standard_character = Pattern.new(/\w/)
+@word = oneOrMoreOf(@standard_character)
+@word_boundary = Pattern.new(/\b/)
+@white_space_start_boundary = lookBehindFor(/\s/).lookAheadFor(/\S/)
+@white_space_end_boundary = lookBehindFor(/\S/).lookAheadFor(/\s/)
+@start_of_document = Pattern.new(/\A/)
+@end_of_document = Pattern.new(/\Z/)
+@start_of_line = Pattern.new(/^/)
+@end_of_line = oneOf(
+    [
+        Pattern.new(/\n/),
+        Pattern.new(/$/),
+    ],
+)

data/lib/textmate_grammar/tokens.rb ADDED Viewed

@@ -0,0 +1,110 @@
+require_relative "./pattern_variations/base_pattern"
+require_relative "./pattern_extensions/placeholder"
+# Take advantage of the placeholder system since this is just a dynamic form of a placeholder
+class TokenPattern < PatternBase
+end
+class Grammar
+    #
+    # convert a regex value into a proc filter used to select patterns
+    #
+    # @param [Regexp] argument A value that uses the tokenParsing syntax (explained below)
+    #
+    # @note The syntax for tokenParsing is simple, there are:
+    #  - `adjectives` ex: isAClass
+    #  - the `not` operator ex: !isAClass
+    #  - the `or` operator ex: isAClass || isAPrimitive
+    #  - the `and` operator ex: isAClass && isAPrimitive
+    #  - paraentheses ex: (!isAClass) && isAPrimitive
+    #  _
+    #  anything matching /[a-zA-Z0-9_]+/ is considered an "adjective"
+    #  whitespace, including newlines, are removed/ignored
+    #  all other characters are invalid
+    #  _
+    #  using only an adjective, ex: /isAClass/ means to only include
+    #  Patterns that have that adjective in their adjective list
+    #
+    # @return [TokenPattern]
+    #
+    def tokenMatching(token_pattern)
+        # create the normal pattern that will act as a placeholder until the very end
+        token_pattern = TokenPattern.new({
+            match: /(?#tokens)/,
+            pattern_filter: parseTokenSyntax(token_pattern),
+        })
+        # tell it what it needs to select-later
+        return token_pattern
+    end
+    #
+    # convert a regex value into a proc filter used to select patterns
+    #
+    # @param [Regexp] argument A value that uses the tokenParsing syntax (explained below)
+    #
+    # @note The syntax for tokenParsing is simple, there are:
+    #  - `adjectives` ex: isAClass
+    #  - the `not` operator ex: !isAClass
+    #  - the `or` operator ex: isAClass || isAPrimitive
+    #  - the `and` operator ex: isAClass && isAPrimitive
+    #  - paraentheses ex: (!isAClass) && isAPrimitive
+    #  _
+    #  anything matching /[a-zA-Z0-9_]+/ is considered an "adjective"
+    #  whitespace, including newlines, are removed/ignored
+    #  all other characters are invalid
+    #  _
+    #  using only an adjective, ex: /isAClass/ means to only include
+    #  Patterns that have that adjective in their adjective list
+    #
+    # @return [proc] a function that accepts a Pattern as input, and returns
+    #                a boolean of whether or not that pattern should
+    #                be included
+    #
+    def parseTokenSyntax(argument)
+        # validate input type
+        if !argument.is_a?(Regexp)
+            raise <<~HEREDOC
+                Trying to call parseTokenSyntax() but the argument isn't Regexp its #{argument.class}
+                value: #{argument}
+            HEREDOC
+        end
+        # just remove the //'s from the string
+        regex_content = argument.inspect[1...-1]
+        # remove all invalid characters, make sure length didn't change
+        invalid_characters_removed = regex_content.gsub(/[^a-zA-Z0-9_&|\(\)! \n]/, "")
+        if invalid_characters_removed.length != regex_content.length
+            raise <<~HEREDOC
+                It appears the tokenSyntax #{argument.inspect} contains some invalid characters
+                with invalid characters: #{regex_content.inspect}
+                without invalid characters: #{invalid_characters_removed.inspect}
+            HEREDOC
+        end
+        # find broken syntax
+        if regex_content =~ /[a-zA-Z0-9_]+\s+[a-zA-Z0-9_]+/
+            raise <<~HEREDOC
+                Inside a tokenSyntax: #{argument.inspect}
+                this part of the syntax is invalid: #{$&.inspect}
+                (theres a space between two adjectives)
+                My guess is that it was half-edited
+                or an accidental space was added
+            HEREDOC
+        end
+        # convert all adjectives into inclusion checks
+        regex_content.gsub!(/\s+/," ")
+        regex_content.gsub!(/[a-zA-Z0-9_]+/, 'pattern.arguments[:adjectives].include?(:\0)')
+        # convert it into a proc
+        return ->(pattern) do
+            puts "regex_content is: #{regex_content} "
+            eval(regex_content) if pattern.is_a?(PatternBase) && pattern.arguments[:adjectives].is_a?(Array)
+        end
+    end
+end

data/lib/textmate_grammar/transforms/add_ending.rb ADDED Viewed

@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+#
+# Adds the last portion of the scope name to each tag_as if not already present
+#
+class AddEnding < GrammarTransform
+    #
+    # adds the ending to any tag_as in pattern if needed
+    #
+    def pre_transform(pattern, options)
+        return pattern.map { |v| pre_transform(v, options) } if pattern.is_a? Array
+        return pattern unless pattern.is_a? PatternBase
+        ending = options[:grammar].scope_name.split(".")[-1]
+        pattern.transform_tag_as do |tag_as|
+            tag_as.split(" ").map do |tag|
+                next tag if tag.end_with?(ending)
+                tag + "." + ending
+            end.join(" ")
+        end
+    end
+end
+Grammar.register_transform(AddEnding.new)

data/lib/textmate_grammar/transforms/bailout.rb ADDED Viewed

@@ -0,0 +1,92 @@
+# frozen_string_literal: true
+class BailoutTransform < GrammarTransform
+    def initialize(prefix, pattern)
+        @prefix = prefix
+        @end_bailout = lookAheadFor(pattern).to_r
+        @while_bailout = Pattern.new(/^/).maybe(/\s+/).lookAheadToAvoid(pattern).to_r
+    end
+    def rewrite_rule(non_duplicate, rule)
+        return rule if rule["match"]
+        if rule["includes"] && !non_duplicate.include?(rule["includes"])
+            rule["includes"] = "##{@prefix}_#{rule['includes'][1..-1]}"
+        end
+        rule["end"] = "#{rule['end']}|#{@end_bailout}" if rule["end"]
+        rule["while"] = "#{rule['while']}|(?:#{@while_bailout})" if rule["while"]
+        rule["patterns"]&.map! { |pat| rewrite_rule(non_duplicate, pat) }
+        if rule[:repository]
+            rule[:repository] = Hash[
+                rule[:repository].map do |key, pat|
+                    next [key, pat] if non_duplicate.include? key
+                    [
+                        "#{@prefix}_#{key}",
+                        rewrite_rule(non_duplicate, pat),
+                    ]
+                end
+            ]
+        end
+        rule["captures"]&.transform_values { |pat| rewrite_rule(non_duplicate, pat) }
+        rule["beginCaptures"]&.transform_values { |pat| rewrite_rule(non_duplicate, pat) }
+        rule["endCaptures"]&.transform_values { |pat| rewrite_rule(non_duplicate, pat) }
+        rule["whileCaptures"]&.transform_values { |pat| rewrite_rule(non_duplicate, pat) }
+        rule
+    end
+    def collect_non_duplicate(rule, repository_name = nil)
+        if rule["match"]
+            return [repository_name] if repository_name
+            return []
+        end
+        if rule["patterns"]
+            if rule.length == 1
+                non_duplicate = rule["patterns"].reduce([]) do |memo, pat|
+                    next memo if memo.nil?
+                    non_duplicate_nested = collect_non_duplicate(pat)
+                    next nil if non_duplicate_nested.nil?
+                    next memo.concat(non_duplicate_nested)
+                end
+                unless non_duplicate.nil?
+                    return [repository_name] if repository_name
+                    return []
+                end
+            end
+        end
+        if rule[:repository]
+            return rule[:repository].keys.reduce([]) do |memo, key|
+                non_duplicate = collect_non_duplicate(rule[:repository][key], key)
+                memo.concat(non_duplicate) if non_duplicate
+                next memo
+            end
+        end
+        nil
+    end
+    def post_transform(grammar_hash)
+        non_duplicate = collect_non_duplicate(grammar_hash)
+        pp non_duplicate
+        duplicate = rewrite_rule(
+            (non_duplicate.nil? ? [] : non_duplicate),
+            grammar_hash.__deep_clone__,
+        )
+        pp grammar_hash[:repository]
+        grammar_hash.__deep_clone__.merge(duplicate)
+    end
+end