RubyGems - sparkql - Versions diffs - 1.2.1 → 1.2.2 - Mend

sparkql 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +8 -8
data/CHANGELOG.md +4 -0
data/GRAMMAR.md +63 -38
data/VERSION +1 -1
data/lib/sparkql/lexer.rb +11 -1
data/lib/sparkql/parser.rb +318 -181
data/lib/sparkql/parser.y +93 -64
data/lib/sparkql/parser_tools.rb +122 -10
data/lib/sparkql/token.rb +8 -1
data/test/unit/parser_test.rb +258 -0
metadata +2 -2

data/lib/sparkql/parser.y CHANGED Viewed

@@ -6,7 +6,7 @@ class Sparkql::Parser
 ###############################################################################
 # READ THIS!
-# The grammar documentation is parsed from this file and is in a sensitive
+# The grammar documentation is parsed from this file and is in a sensitive
 # syntax between the START_MARKDOWN and STOP_MARKDOWN keywords. In general, all
 # line comments will be treated as markdown text, and everything else is padded
 # for code formatting
@@ -15,24 +15,26 @@ class Sparkql::Parser
 #START_MARKDOWN
 ### SparkQL BNF Grammar
-#
+#
 # This document explains the rules for the Spark API filter language syntax and
-# is a living document generated from the reference implementation at
+# is a living document generated from the reference implementation at
 # https://github.com/sparkapi/sparkql.
 #### Precedence Rules
-#
-# Unless otherwise specified, SparkQL follows SQL precendence conventions for
+#
+# Unless otherwise specified, SparkQL follows SQL precendence conventions for
 # operators and conjunctions.
-#
+#
 # Unary minus is always tied to value, such as for negative numbers.
 prechigh
   nonassoc UMINUS
+  left MUL DIV MOD
+  left ADD SUB
 preclow
 #### Grammar Rules
-#
+#
 # A filter (target) is a composition of filter basic filter expressions.
 rule
   target
@@ -41,7 +43,7 @@ rule
     ;
 ##### Expressions
-#
+#
 # One or more expressions
   expressions
     : expression
@@ -50,66 +52,84 @@ rule
     ;
 ##### Expression
-#
-# The core of the filtering system, the expression requires a field, a condition
-# and criteria for comparing the value of the field to the value(s) of the
-# condition. The result of evaluating the expression on a resource is a true of
-# false for matching the criteria.
+#
+# The core of the filtering system, the expression requires a field, a condition
+# and criteria for comparing the value of the field to the value(s) of the
+# condition. The result of evaluating the expression on a resource is a true of
+# false for matching the criteria. We are separating functions and arithmetic
+# based on if we are acting on the field side or the literal side. This is to
+# allow literal folding on the literal side.
   expression
-    : field OPERATOR condition { result = tokenize_expression(val[0], val[1],val[2]) }
-    | field RANGE_OPERATOR range { result = tokenize_expression(val[0], val[1], val[2]) }
+    : field_expression OPERATOR condition { result = tokenize_expression(val[0], val[1],val[2]) }
+    | field_expression RANGE_OPERATOR range { result = tokenize_expression(val[0], val[1], val[2]) }
     | group
     ;
 ##### Unary Conjunction
-#
-# Some conjunctions don't need to expression at all times (e.g. 'NOT').
+#
+# Some conjunctions don't need to expression at all times (e.g. 'NOT').
   unary_conjunction
     : UNARY_CONJUNCTION expression { result = tokenize_unary_conjunction(val[0], val[1]) }
-    ;
+    ;
 ##### Conjunction
-#
+#
 # Two expressions joined together using a supported conjunction
   conjunction
     : expressions CONJUNCTION expression { result = tokenize_conjunction(val[0], val[1],val[2]) }
     | expressions UNARY_CONJUNCTION expression { result = tokenize_conjunction(val[0], val[1],val[2]) }
     ;
 ##### Group
-#
+#
 # One or more expressions encased in parenthesis. There are limitations on nesting depth at the time of this writing.
   group
-  	: LPAREN expressions RPAREN { result = tokenize_group(val[1]) }
-  	;
+    : LPAREN expressions RPAREN { result = tokenize_group(val[1]) }
+    ;
+  field_expression
+    : field_arithmetic_expression
+    ;
+  field_arithmetic_expression
+    : field_arithmetic_expression ADD field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
+    | field_arithmetic_expression SUB field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
+    | field_arithmetic_expression MUL field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
+    | field_arithmetic_expression DIV field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
+    | field_arithmetic_expression MOD field_arithmetic_expression { result = tokenize_arithmetic(val[0], val[1], val[2]) }
+    | literals
+    | field_function_expression
+    ;
+  field_function_expression
+    : field
+    | function
+    ;
-##### Field
-#
-# Keyword for searching on, these fields should be discovered using the metadata
-# rules. In general, Keywords that cannot be found will be dropped from the
-# filter.
-  field
-  	: STANDARD_FIELD
-  	| CUSTOM_FIELD
-  	| function
-  	;
 ##### Condition
-#
-# The determinant of the filter, this is typically a value or set of values of
-# a type that the field supports (review the field meta data for support).
+#
+# The determinant of the filter, this is typically a value or set of values of
+# a type that the field supports (review the field meta data for support).
 # Functions are also supported on some field types, and provide more flexibility
 # on filtering values
   condition
-    : literal
-    | literal_function
+    : arithmetic_condition
     | literal_list { result = tokenize_list(val[0]) }
+    | literal
     ;
+  arithmetic_condition
+    : condition ADD condition { result = add_fold(val[0], val[2]) }
+    | condition SUB condition { result = sub_fold(val[0], val[2]) }
+    | condition MUL condition { result = mul_fold(val[0], val[2]) }
+    | condition DIV condition { result = div_fold(val[0], val[2]) }
+    | condition MOD condition { result = mod_fold(val[0], val[2]) }
 ##### Function
-#
-# Functions may replace static values for conditions with supported field
-# types. Functions may have parameters that match types supported by
+#
+# Functions may replace static values for conditions with supported field
+# types. Functions may have parameters that match types supported by
 # fields.
   function
     : function_name LPAREN RPAREN { result = tokenize_function(val[0], []) }
@@ -120,13 +140,13 @@ rule
     : function_name LPAREN RPAREN { result = tokenize_function(val[0], []) }
     | function_name LPAREN literal_function_args RPAREN { result = tokenize_function(val[0], val[2]) }
     ;
   function_name
     : KEYWORD
     ;
 ##### Function Arguments
-#
+#
 # Functions may optionally have a comma delimited list of parameters.
   function_args
     : function_arg
@@ -134,9 +154,9 @@ rule
     ;
   function_arg
-    : literal
+    : field_function_expression { result = tokenize_field_arg(val[0]) }
+    | literal
     | literals
-    | field { result = tokenize_field_arg(val[0]) }
     ;
   literal_function_args
@@ -147,11 +167,10 @@ rule
   literal_function_arg
     : literal
     | literals
-    | literal_function
     ;
 ##### Literal List
-#
+#
 # A comma delimited list of functions and values.
   literal_list
     : literals
@@ -159,17 +178,17 @@ rule
     | literal_list COMMA literals { result = tokenize_multiple(val[0], val[2]) }
     | literal_list COMMA function { result = tokenize_multiple(val[0], val[2]) }
     ;
 ##### Range List
-#
-# A comma delimited list of values that support ranges for the Between operator
+#
+# A comma delimited list of values that support ranges for the Between operator
 # (see rangeable).
-  range
+  range
     : rangeable COMMA rangeable { result = tokenize_multiple(val[0], val[2]) }
     ;
 ##### Literals
-#
+#
 # Literals that support multiple values in a list for a condition
   literals
     : INTEGER
@@ -178,9 +197,9 @@ rule
     | LPAREN literals RPAREN { result = val[1] }
     | UMINUS literals { result = tokenize_literal_negation(val[1]) }
     ;
 ##### Literal
-#
+#
 # Literals only support a single value in a condition
   literal
     : DATE
@@ -191,8 +210,8 @@ rule
     ;
 ##### Range List
-#
-# Functions, and literals that can be used in a range
+#
+# Functions, and literals that can be used in a range
   rangeable
     : INTEGER
     | DECIMAL
@@ -202,9 +221,19 @@ rule
     | function
     ;
+##### Field
+#
+# Keyword for searching on, these fields should be discovered using the metadata
+# rules. In general, Keywords that cannot be found will be dropped from the
+# filter.
+  field
+    : STANDARD_FIELD
+    | CUSTOM_FIELD
+    ;
 #STOP_MARKDOWN
 end
 ---- header
@@ -212,7 +241,7 @@ end
 ---- inner
   include Sparkql::ParserTools
   include Sparkql::ParserCompatibility
 ---- footer
 # END PARSER

data/lib/sparkql/parser_tools.rb CHANGED Viewed

@@ -4,7 +4,8 @@ module Sparkql::ParserTools
   # Coercible types from highest precision to lowest
   DATE_TYPES = [:datetime, :date]
   NUMBER_TYPES = [:decimal, :integer]
+  ARITHMETIC_TYPES = [:decimal, :integer, :field, :arithmetic]
   def parse(str)
     @lexer = Sparkql::Lexer.new(str)
     @expression_count = 0
@@ -21,7 +22,34 @@ module Sparkql::ParserTools
     end
     t
   end
+  def arithmetic_field(nested_representation)
+    lhs = nested_representation[:lhs]
+    rhs = nested_representation[:rhs]
+    if lhs[:type] == :field
+      lhs[:value]
+    elsif rhs[:type] == :field
+      rhs[:value]
+    elsif lhs.key?(:field)
+      lhs[:field]
+    elsif rhs.key?(:field)
+      rhs[:field]
+    elsif lhs[:type] == :arithmetic
+      arithmetic_field(lhs)
+    elsif rhs[:type] == :arithmetic
+      arithmetic_field(rhs)
+    else
+      nil
+    end
+  end
+  def no_field_error(field, operator)
+    tokenizer_error(:token => field,
+                    :expression => {operator: operator, conjuction: 'And', conjunction_level: 0, level: @lexer.level},
+                    :message => "Each expression must evaluate a field", :status => :fatal )
+  end
   def tokenize_expression(field, op, val)
     operator = get_operator(val,op) unless val.nil?
@@ -34,6 +62,12 @@ module Sparkql::ParserTools
       end
       field_manipulations = field
       field = field[:field]
+    elsif field.is_a?(Hash) && field[:type] == :arithmetic
+      field_manipulations = field
+      field = arithmetic_field(field)
+      no_field_error(field, operator) if field.nil?
+    elsif field.is_a?(Hash)
+      no_field_error(field, operator)
     end
     custom_field = !field.nil? && field.is_a?(String) && field.start_with?('"')
@@ -45,10 +79,13 @@ module Sparkql::ParserTools
     if !field_manipulations.nil?
       # Keeping field_function and field_function_type for backward compatibility with datacon
-      expression.merge!(field_manipulations: field_manipulations,
-                        field_function: field_manipulations[:function_name],
-                        field_function_type: field_manipulations[:return_type],
-                        args: field_manipulations[:function_parameters])
+      expression.merge!(field_manipulations: field_manipulations)
+      if field_manipulations[:type] == :function
+        expression.merge!(field_function: field_manipulations[:function_name],
+                          field_function_type: field_manipulations[:return_type],
+                          args: field_manipulations[:function_parameters])
+      end
     end
     expression = val.merge(expression) unless val.nil?
@@ -93,6 +130,7 @@ module Sparkql::ParserTools
   end
   def tokenize_list(list)
+    return if list.nil?
     validate_multiple_values list[:value]
     list[:condition] ||= list[:value]
     list
@@ -147,13 +185,13 @@ module Sparkql::ParserTools
   end
   def tokenize_field_arg(field)
-    if field.is_a?(Hash) && field[:type] == :function
-      field
-    else
+    if field.is_a?(String)
       {
         :type => :field,
         :value => field,
       }
+    else
+      field
     end
   end
@@ -182,7 +220,81 @@ module Sparkql::ParserTools
       result.nil? ? result : result.merge(:condition => "#{name}(#{condition_list.join(',')})")
     end
   end
+  def tokenize_arithmetic(lhs, operator, rhs)
+    lhs = {type: :field, value: lhs} if lhs.is_a?(String)
+    rhs = {type: :field, value: rhs} if rhs.is_a?(String)
+    arithmetic_error?(lhs)
+    arithmetic_error?(rhs)
+    {
+      type: :arithmetic,
+      op: operator,
+      lhs: lhs,
+      rhs: rhs
+    }
+  end
+  def arithmetic_error?(side)
+    side_type = side[:type] == :function ? side[:return_type] : side[:type]
+    return false unless (!ARITHMETIC_TYPES.include?(side_type) || !ARITHMETIC_TYPES.include?(side_type))
+    compile_error(:token => side[:value], :expression => side,
+          :message => "Error attempting arithmetic with type: #{side_type}",
+          :status => :fatal, :syntax => false, :constraint => true )
+    true
+  end
+  def add_fold(n1, n2)
+    return if arithmetic_error?(n1) || arithmetic_error?(n2)
+    { type: arithmetic_type(n1, n2), value: (escape_value(n1) + escape_value(n2)).to_s }
+  end
+  def sub_fold(n1, n2)
+    return if arithmetic_error?(n1) || arithmetic_error?(n2)
+    { type: arithmetic_type(n1, n2), value: (escape_value(n1) - escape_value(n2)).to_s }
+  end
+  def mul_fold(n1, n2)
+    return if arithmetic_error?(n1) || arithmetic_error?(n2)
+    { type: arithmetic_type(n1, n2), value: (escape_value(n1) * escape_value(n2)).to_s }
+  end
+  def div_fold(n1, n2)
+    return if arithmetic_error?(n1) ||
+      arithmetic_error?(n2) ||
+      zero_error?(n2)
+    { type: arithmetic_type(n1, n2), value: (escape_value(n1) / escape_value(n2)).to_s }
+  end
+  def mod_fold(n1, n2)
+    return if arithmetic_error?(n1) ||
+      arithmetic_error?(n2) ||
+      zero_error?(n2)
+    { type: arithmetic_type(n1, n2), value: (escape_value(n1) % escape_value(n2)).to_s }
+  end
+  def arithmetic_type(num1, num2)
+    if (num1[:type] == :decimal || num2[:type] == :decimal)
+      :decimal
+    else
+      :integer
+    end
+  end
+  def zero_error?(number)
+    return unless escape_value(number) == 0
+    compile_error(:token => "#{number[:value]}", :expression => number,
+          :message => "Error attempting to divide by zero",
+          :status => :fatal, :syntax => false, :constraint => true )
+  end
   def on_error(error_token_id, error_value, value_stack)
     token_name = token_to_str(error_token_id)
     token_name.downcase!

data/lib/sparkql/token.rb CHANGED Viewed

@@ -4,6 +4,14 @@ module Sparkql::Token
   LPAREN = /\(/
   RPAREN = /\)/
   KEYWORD = /[A-Za-z]+/
+  ADD = 'Add'
+  SUB = 'Sub'
+  MUL = 'Mul'
+  DIV = 'Div'
+  MOD = 'Mod'
   STANDARD_FIELD = /[A-Z]+[A-Za-z0-9]*/
   CUSTOM_FIELD = /^(\"([^$."][^."]+)\".\"([^$."][^."]*)\")/
   INTEGER = /^\-?[0-9]+/
@@ -20,5 +28,4 @@ module Sparkql::Token
   OPERATORS = ['Gt','Ge','Lt','Le'] + EQUALITY_OPERATORS
   UNARY_CONJUNCTIONS = ['Not']
   CONJUNCTIONS = ['And','Or']
 end