RubyGems - fop_lang - Versions diffs - 0.2.0 → 0.3.0 - Mend

fop_lang 0.2.0 → 0.3.0

Files changed (8) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 711af4fad2907616e057049dcb84bce16ffdb381b3601a387dc7260cc16057d3
-  data.tar.gz: 40da554ca0cb21f275748593921bdc616dcf8cdfe5d4fa31494f588f2c25d66c
+  metadata.gz: b5f19a543b81c0046dc63fcc1c0769989628017d2c1d1da74ef0db9866a0f2f7
+  data.tar.gz: 03b6597f9cab97c95ccda8396693bb43d9da729137cb916cc74f7fbecc314b32
 SHA512:
-  metadata.gz: 0253a6446b88b6de112f00a95c81f1d5f710a859998e5b3d8df21d64caecc8e013ab1edec2b68047558f8053ce135b60082078f85db1f7aa16a93b86aa487093
-  data.tar.gz: c7f0cb0387df52a3ea121e3e91a35e0444afe8862aabe054ec433d889955ee9221129fd0c07f7e36d6c71e6b9ab33f30cd16264447dfda24d29babb671112140
+  metadata.gz: 3a17c82a561e20cbc5cb8abbad5be4f94f02110d60b6130e3e1e9489672c5c134befc6b1daca2f590f083a67934e600fb5d6fa0ea5433181ba3014514c558232
+  data.tar.gz: 790250c8a79dcf04b381f2dd33cbaa048fd070688ab45446ff87652dcb18844c2d6139d0ead060fa338a57b8590eee0167ea2c25abd84e1d71571f33c49bcbda

data/README.md CHANGED Viewed

@@ -1,6 +1,10 @@
 # fop_lang
-Fop (Filter and OPperations language) is an experimental, tiny expression language in the vein of awk and sed. This is a Ruby implementation. It is useful for simultaneously matching and transforming text input.
+Fop (Filter and OPerations language) is an experimental, tiny expression language in the vein of awk and sed. This is a Ruby implementation. It is useful for simultaneously matching and transforming text input.
+```ruby
+gem 'fop_lang'
+```
 ## Release Number Example
@@ -33,13 +37,14 @@ If `\` (escape) is used before the special characters `*`, `{` or `}`, then that
 Operations are the interesting part of Fop, and are specified between `{` and `}`. An Operation can consist of one to three parts:
-1. Matching character class (required): Defines what characters the operation will match and operate on.
+1. Matching class (required): Defines what characters the operation will match and operate on.
   * `N` is the numeric class and will match one or more digits.
   * `A` is the alpha class and will match one or more letters (lower or upper case).
   * `W` is the word class and matches alphanumeric chars and underscores.
   * `*` is the wildcard class and greedily matches everything after it.
+  * `/.../` matches on the supplied regex between the `/`'s. If you're regex contains a `/`, it must be escaped.
 3. Operator (optional): What to do to the matching characters.
-  * `=` Replace the matching character(s) with the given argument. If no argument is given, drop the matching chars.
+  * `=` Replace the matching character(s) with the given argument. If no argument is given, drop the matching chars. Note that any `/` chars must be escaped, so as not to be mistaken for a regex.
   * `+` Perform addition on the matching number and the argument (`N` only).
   * `-` Subtract the argument from the matching number (`N` only).
 5. Operator argument (required for some operators): meaning varies by operator.
@@ -53,6 +58,16 @@ Operations are the interesting part of Fop, and are specified between `{` and `}
   =>           'release-5.100.0'
 ```
+```ruby
+  f = Fop('rel{/(ease)?/}-{N=5}.{N+1}.{N=0}')
+  puts f.apply('release-4.99.1')
+  =>           'release-5.100.0'
+  puts f.apply('rel-4.99.1')
+  =>           'rel-5.100.0'
+```
 ```ruby
   f = Fop('release-*{N=5}.{N+100}.{N=0}')

data/lib/fop/nodes.rb CHANGED Viewed

@@ -12,59 +12,19 @@ module Fop
       end
     end
-    Match = Struct.new(:wildcard, :tokens) do
-      NUM = "N".freeze
-      WORD = "W".freeze
-      ALPHA = "A".freeze
-      WILD = "*".freeze
-      BLANK = "".freeze
+    Op = Struct.new(:wildcard, :match, :regex_match, :regex, :operator, :operator_arg, :expression) do
       def consume!(input)
-        if (val = input.slice!(@regex))
-          @expression && val != BLANK ? @expression.call(val) : val
+        if (val = input.slice!(regex))
+          found_val = regex_match || val != Parser::BLANK
+          expression && found_val ? expression.call(val) : val
         end
       end
       def to_s
         w = wildcard ? "*" : nil
-        @op ? "#{w}#{@match} #{@op} #{@arg}" : "#{w}#{@match}"
-      end
-      def parse!
-        match = tokens.shift || raise(ParserError, "Empty match")
-        raise ParserError, "Unexpected #{match}" unless match.is_a? Tokenizer::Char
-        @match = match.char
-        @regex =
-          case @match
-          when NUM then Regexp.new((wildcard ? ".*?" : "^") + "[0-9]+")
-          when WORD then Regexp.new((wildcard ? ".*?" : "^") + "\\w+")
-          when ALPHA then Regexp.new((wildcard ? ".*?" : "^") + "[a-zA-Z]+")
-          when WILD then /.*/
-          else raise ParserError, "Unknown match type '#{@match}'"
-          end
-        if (op = tokens.shift)
-          raise ParserError, "Unexpected #{op}" unless op.is_a? Tokenizer::Char
-          arg = tokens.reduce("") { |acc, t|
-            raise ParserError, "Unexpected #{t}" unless t.is_a? Tokenizer::Char
-            acc + t.char
-          }
-          @op = op.char
-          @arg = arg == BLANK ? nil : arg
-          @expression =
-            case @op
-            when "=" then ->(_) { @arg || BLANK }
-            when "+", "-", "*", "/"
-              raise ParserError, "Operator #{@op} is only available for numeric matches" unless @match == NUM
-              raise ParserError, "Operator #{@op} expects an argument" if @arg.nil?
-              ->(x) { x.to_i.send(@op, @arg.to_i) }
-            else raise ParserError, "Unknown operator #{@op}"
-            end
-        else
-          @op, @arg, @expression = nil, nil, nil
-        end
+        s = "#{w}#{match}"
+        s << " #{operator} #{operator_arg}" if operator
+        s
       end
     end
   end

data/lib/fop/parser.rb CHANGED Viewed

@@ -4,89 +4,132 @@ module Fop
   module Parser
     Error = Class.new(StandardError)
+    MATCH_NUM = "N".freeze
+    MATCH_WORD = "W".freeze
+    MATCH_ALPHA = "A".freeze
+    MATCH_WILD = "*".freeze
+    BLANK = "".freeze
+    OP_REPLACE = "=".freeze
+    OP_ADD = "+".freeze
+    OP_SUB = "-".freeze
+    OP_MUL = "*".freeze
+    OP_DIV = "/".freeze
     def self.parse!(tokens)
-      stack = []
-      current_el = nil
+      nodes = []
+      curr_node = nil
       tokens.each { |token|
-        case current_el
+        case curr_node
         when nil
-          current_el = new_element token
+          curr_node = new_node token
         when :wildcard
-          current_el = new_element token, true
-          raise Error, "Unexpected * after wildcard" if current_el == :wildcard
+          curr_node = new_node token, true
+          raise Error, "Unexpected * after wildcard" if curr_node == :wildcard
         when Nodes::Text
-          current_el = parse_text stack, current_el, token
-        when Nodes::Match
-          current_el = parse_match stack, current_el, token
+          curr_node, finished_node = parse_text curr_node, token
+          nodes << finished_node if finished_node
+        when Nodes::Op
+          nodes << curr_node
+          curr_node = new_node token
         else
-          raise Error, "Unexpected token #{token} in #{current_el}"
+          raise Error, "Unexpected node #{curr_node}"
         end
       }
-      case current_el
+      case curr_node
       when nil
         # noop
       when :wildcard
-        stack << Nodes::Text.new(true, "")
-      when Nodes::Text
-        stack << current_el
-      when Nodes::Match
-        raise Error, "Unclosed match"
+        nodes << Nodes::Text.new(true, "")
+      when Nodes::Text, Nodes::Op
+        nodes << curr_node
+      else
+        raise "Unexpected end node #{curr_node}"
       end
-      stack
+      nodes
     end
     private
-    def self.new_element(token, wildcard = false)
+    def self.new_node(token, wildcard = false)
       case token
       when Tokenizer::Char
         Nodes::Text.new(wildcard, token.char.clone)
-      when :match_open
-        Nodes::Match.new(wildcard, [])
-      when :match_close
-        raise ParserError, "Unmatched }"
+      when Tokenizer::Op
+        op = Nodes::Op.new(wildcard)
+        parse_op! op, token.tokens
+        op
       when :wildcard
         :wildcard
       else
-        raise ParserError, "Unexpected #{token}"
+        raise Error, "Unexpected #{token}"
       end
     end
-    def self.parse_text(stack, text_el, token)
+    # @return current node
+    # @return finished node
+    def self.parse_text(node, token)
       case token
-      when :match_open
-        stack << text_el
-        Nodes::Match.new(false, [])
-      when :match_close
-        raise ParserError.new, "Unexpected }"
       when Tokenizer::Char
-        text_el.str << token.char
-        text_el
+        node.str << token.char
+        return node, nil
+      when Tokenizer::Op
+        op = new_node token
+        return op, node
       when :wildcard
-        stack << text_el
-        :wildcard
+        return :wildcard, node
       else
-        raise ParserError, "Unexpected #{token}"
+        raise Error, "Unexpected #{token}"
       end
     end
-    def self.parse_match(stack, match_el, token)
-      case token
-      when Tokenizer::Char
-        match_el.tokens << token
-        match_el
-      when :wildcard
-        match_el.tokens << Tokenizer::Char.new("*").freeze
-        match_el
-      when :match_close
-        match_el.parse!
-        stack << match_el
-        nil
-      else
-        raise ParserError, "Unexpected #{token}"
+    def self.parse_op!(node, tokens)
+      t = tokens[0] || raise(Error, "Empty operation")
+      # parse the matching type
+      node.regex =
+        case t
+        when Tokenizer::Char
+          node.match = t.char
+          node.regex_match = false
+          case t.char
+          when MATCH_NUM then Regexp.new((node.wildcard ? ".*?" : "^") + "[0-9]+")
+          when MATCH_WORD then Regexp.new((node.wildcard ? ".*?" : "^") + "\\w+")
+          when MATCH_ALPHA then Regexp.new((node.wildcard ? ".*?" : "^") + "[a-zA-Z]+")
+          when MATCH_WILD then /.*/
+          else raise Error, "Unknown match type '#{t.char}'"
+          end
+        when Tokenizer::Regex
+          node.match = "/#{t.src}/"
+          node.regex_match = true
+          Regexp.new((node.wildcard ? ".*?" : "^") + t.src)
+        else
+          raise Error, "Unexpected token #{t}"
+        end
+      # parse the operator (if any)
+      if (op = tokens[1])
+        raise Error, "Unexpected #{op}" unless op.is_a? Tokenizer::Char
+        node.operator = op.char
+        arg = tokens[2..-1].reduce("") { |acc, t|
+          raise Error, "Unexpected #{t}" unless t.is_a? Tokenizer::Char
+          acc + t.char
+        }
+        node.operator_arg = arg == BLANK ? nil : arg
+        node.expression =
+          case node.operator
+          when OP_REPLACE
+            ->(_) { node.operator_arg || BLANK }
+          when OP_ADD, OP_SUB, OP_MUL, OP_DIV
+            raise Error, "Operator #{node.operator} is only available for numeric matches" unless node.match == MATCH_NUM
+            raise Error, "Operator #{node.operator} expects an argument" if node.operator_arg.nil?
+            ->(x) { x.to_i.send(node.operator, node.operator_arg.to_i) }
+          else
+            raise(Error, "Unknown operator #{node.operator}")
+          end
       end
     end
   end

data/lib/fop/program.rb CHANGED Viewed

@@ -6,7 +6,7 @@ module Fop
     attr_reader :nodes
     def initialize(src)
-      tokens = Tokenizer.tokenize! src
+      tokens = Tokenizer.new(src).tokenize!
       @nodes = Parser.parse! tokens
     end

data/lib/fop/tokenizer.rb CHANGED Viewed

@@ -1,34 +1,123 @@
 module Fop
-  module Tokenizer
+  class Tokenizer
     Char = Struct.new(:char)
+    Op = Struct.new(:tokens)
+    Regex = Struct.new(:src)
     Error = Class.new(StandardError)
-    def self.tokenize!(src)
+    OP_OPEN = "{".freeze
+    OP_CLOSE = "}".freeze
+    ESCAPE = "\\".freeze
+    WILDCARD = "*".freeze
+    REGEX_MARKER = "/".freeze
+    def initialize(src)
+      @src = src
+      @end = src.size - 1
+    end
+    def tokenize!
       tokens = []
       escape = false
-      src.each_char { |char|
+      i = 0
+      until i > @end do
+        char = @src[i]
         if escape
           tokens << Char.new(char)
           escape = false
+          i += 1
           next
         end
         case char
-        when "\\".freeze
+        when ESCAPE
           escape = true
-        when "{".freeze
-          tokens << :match_open
-        when "}".freeze
-          tokens << :match_close
-        when "*".freeze
+          i += 1
+        when OP_OPEN
+          i, op = operation! i + 1
+          tokens << op
+        when OP_CLOSE
+          raise "Unexpected #{OP_CLOSE}"
+        when WILDCARD
           tokens << :wildcard
+          i += 1
         else
           tokens << Char.new(char)
+          i += 1
         end
-      }
+      end
       raise Error, "Trailing escape" if escape
       tokens
     end
+    private
+    def operation!(i)
+      escape = false
+      found_close = false
+      tokens = []
+      until found_close or i > @end do
+        char = @src[i]
+        if escape
+          tokens << Char.new(char)
+          escape = false
+          i += 1
+          next
+        end
+        case char
+        when ESCAPE
+          escape = true
+          i += 1
+        when OP_OPEN
+          raise "Unexpected #{OP_OPEN}"
+        when OP_CLOSE
+          found_close = true
+          i += 1
+        when REGEX_MARKER
+          i, reg = regex! i + 1
+          tokens << reg
+        else
+          tokens << Char.new(char)
+          i += 1
+        end
+      end
+      raise Error, "Unclosed operation" if !found_close
+      raise Error, "Trailing escape" if escape
+      return i, Op.new(tokens)
+    end
+    def regex!(i)
+      escape = false
+      found_close = false
+      src = ""
+      until found_close or i > @end
+        char = @src[i]
+        i += 1
+        if escape
+          src << char
+          escape = false
+          next
+        end
+        case char
+        when ESCAPE
+          escape = true
+        when REGEX_MARKER
+          found_close = true
+        else
+          src << char
+        end
+      end
+      raise Error, "Unclosed regex" if !found_close
+      raise Error, "Trailing escape" if escape
+      return i, Regex.new(src)
+    end
   end
 end

data/lib/fop/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Fop
-  VERSION = "0.2.0"
+  VERSION = "0.3.0"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: fop_lang
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.3.0
 platform: ruby
 authors:
 - Jordan Hollinger
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2021-08-15 00:00:00.000000000 Z
+date: 2021-08-16 00:00:00.000000000 Z
 dependencies: []
 description: A micro expression language for Filter and OPerations on text
 email: jordan.hollinger@gmail.com