RubyGems - sparkql - Versions diffs - 1.0.3 → 1.1.0 - Mend

sparkql 1.0.3 → 1.1.0

Files changed (10) hide show

checksums.yaml +8 -8
data/CHANGELOG.md +4 -0
data/VERSION +1 -1
data/lib/sparkql/evaluator.rb +152 -0
data/lib/sparkql/expression_resolver.rb +11 -0
data/lib/sparkql/parser_tools.rb +5 -4
data/lib/sparkql.rb +2 -0
data/test/support/boolean_or_bust_expression_resolver.rb +12 -0
data/test/unit/evaluator_test.rb +84 -0
metadata +8 -2

checksums.yaml CHANGED Viewed

@@ -1,15 +1,15 @@
 ---
 !binary "U0hBMQ==":
   metadata.gz: !binary |-
-    Mjk5YmNlMjA3ZDg1N2VjMzIzZDczMWMzNzdiZGU2YTMwY2E0ZTZlYQ==
+    Y2ExNzBkYzg0N2RiZmFkOWRlMGI2NTg0NzJiOGQ5NzNlNTA2OTA5Mw==
   data.tar.gz: !binary |-
-    MzJlZGNmM2Q5YTdiODA3M2UwNDNhNWNlZjdlZDU5MjMwY2RhNGU3OQ==
+    YmYyYTBkZGQxMTFkNGZhNDg2MTA1NmZjN2M4YjcyZTZiMWQ0Njk5Ng==
 SHA512:
   metadata.gz: !binary |-
-    YjdhZDliN2QxZDY3NzkyNzc0Zjk4NTRlZGYwZmUzODRlNDUzMDBkNjY1ZDVh
-    MjczZjI2ODc1Zjk0MzBlYWRlY2FhMTY3ZDAzYjgwOGIzYzY1NjM0MjQ0ZDQ5
-    ODRlMmI1YTUyNTRjMmIyZmIwOTE2NjA2MTVhNTIxMjBhZTA4OGQ=
+    Mzk4MmY3OTE2MzNiYmEyOTg3NGMzOTdhZTRjY2NhZjhmZjgxMzE1NTJkM2Vj
+    OWIxZGU0YTBjNTI3M2ZhZjUxNjdlOTY4MTgzMDRmNjhiOWE3MjEwNTU4YWU0
+    NmQ4MDYwZDRkMDIxYzkyNWI1MjQxMThhYTFiNGFmOTZmNzA1MjI=
   data.tar.gz: !binary |-
-    NDc1NjI2NjQ0OTg4ZTEzNzU1NmNkMjZiOGY4Mzk0MWM1OThlNDUyMGYxOWM4
-    MzQ4MmVhZWE5NDVmZmUzNmRkZmNmYmFkNmNlNzYwN2Y0MGMyYjQyZTk4ZGMw
-    NjRlMzg2MmEyMDc0MGU0MjJiNjM3NzQwNzA3NmVhMTNlZTAyZTU=
+    ZGEyMzQ1YzU2NDU5YjNhNmVkOTEwNjBlYTI2MWIxMmRiY2E3NWU1MjU3Nzg1
+    OTgxMzc3MTc3ZGI5Njc1NWZmNGJkY2FkMWNhZDdkNmY4NThkOWUyZmUzMWRj
+    NDViYmQzNzEzNjBkOThiMmViYjQ4YTRhNjBhYmVlYzE1NWQwMDg=

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,7 @@
+v1.1.0, 2016-07-28 ([changes](https://github.com/sparkapi/sparkql/compare/v1.0.3...v1.1.0))
+-------------------
+  * [IMPROVEMENT] Evaluation class for sparkql boolean algebra processing
 v1.0.3, 2016-06-06 ([changes](https://github.com/sparkapi/sparkql/compare/v1.0.2...v1.0.3))
 -------------------
   * [IMPROVEMENT] Expression limit lifted to 75 expressions

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 1.0.3
1	+ 1.1.0

data/lib/sparkql/evaluator.rb ADDED Viewed

@@ -0,0 +1,152 @@
+# Using an instance of ExpressionResolver to resolve the individual expressions,
+# this class will evaluate the rest of a parsed sparkql string to true or false.
+# Namely, this class will handle all the nesting, boolean algebra, and dropped
+# fields. Plus, it has some optimizations built in to skip the processing for
+# any expressions that don't contribute to the net result of the filter.
+class Sparkql::Evaluator
+  attr_reader :processed_count
+  def initialize expression_resolver
+    @resolver = expression_resolver
+  end
+  def evaluate(expressions)
+    @processed_count = 0
+    @index = {
+      level: 0,
+      block_group: 0,
+      conjunction: "And",
+      conjunction_level: 0,
+      match: true,
+      good_ors: false,
+      expressions: 0
+    }
+    @groups = [@index]
+    expressions.each do |expression|
+      handle_group(expression)
+      adjust_expression_for_dropped_field(expression)
+      check_for_good_ors(expression)
+      next if skip?(expression)
+      evaluate_expression(expression)
+    end
+    cleanup
+    return @index[:match]
+  end
+  private
+  # prepare the group stack for the next expression
+  def handle_group(expression)
+    if @index[:block_group] == expression[:block_group]
+      # Noop
+    elsif @index[:block_group] < expression[:block_group]
+      @index = new_group(expression)
+      @groups.push(@index)
+    else
+      # Turn the group into an expression, resolve down to previous group(s)
+      smoosh_group(expression)
+    end
+  end
+  # Here's the real meat. We use an internal stack to represent the result of
+  # each block_group. This logic is re-used when merging the final result of one
+  # block group with the previous.
+  def evaluate_expression(expression)
+      @processed_count += 1
+    evaluate_node(expression, @resolver.resolve(expression))
+  end
+  def evaluate_node(node, result)
+    if result == :drop
+      @dropped_expression = node
+      return result
+    end
+    if node[:unary] == "Not"
+      result = !result
+    end
+    if node[:conjunction] == 'Not' &&
+       (node[:conjunction_level] == node[:level] ||
+      node[:conjunction_level] == @index[:level])
+      @index[:match] = !result
+    elsif node[:conjunction] == 'And' || @index[:expressions] == 0
+      @index[:match] = result if @index[:match]
+    elsif node[:conjunction] == 'Or' && result
+      @index[:match] = result
+    end
+    @index[:expressions] += 1
+    result
+  end
+  # Optimization logic, once we find any set of And'd expressions that pass and
+  # run into an Or at the same level, we can skip further processing at that
+  # level.
+  def check_for_good_ors(expression)
+    if expression[:conjunction] == 'Or'
+      good_index = @index
+      unless expression[:conjunction_level] == @index[:level]
+        good_index = nil
+        # Well crap, now we need to go back and find that level by hand
+        @groups.reverse_each do |i|
+          if i[:level] == expression[:conjunction_level]
+            good_index = i
+          end
+        end
+      end
+      if !good_index.nil? && good_index[:expressions] > 0 && good_index[:match]
+        good_index[:good_ors] = true
+      end
+    end
+  end
+  # We can skip further expression processing when And-d with a false expression
+  # or a "good Or" was already encountered.
+  def skip?(expression)
+    @index[:good_ors] ||
+      !@index[:match] && expression[:conjunction] == 'And'
+  end
+  def new_group(expression)
+    {
+      level: expression[:level],
+      block_group: expression[:block_group],
+      conjunction: expression[:conjunction],
+      conjunction_level: expression[:conjunction_level],
+      match: true,
+      good_ors: false,
+      expressions: 0
+    }
+  end
+  # When the last expression was dropped, we need to repair the filter by
+  # stealing the conjunction of that dropped field.
+  def adjust_expression_for_dropped_field(expression)
+    if @dropped_expression.nil?
+      return
+    elsif @dropped_expression[:block_group] == expression[:block_group]
+      expression[:conjunction] = @dropped_expression[:conjunction]
+      expression[:conjunction_level] = @dropped_expression[:conjunction_level]
+    end
+    @dropped_expression = nil
+  end
+  # This is similar to the cleanup step, but happens when we return from a
+  # nesting level. Before we can proceed, we need wrap up the result of the
+  # nested group.
+  def smoosh_group(expression)
+    until  @groups.last[:block_group] == expression[:block_group]
+      last = @groups.pop
+      @index = @groups.last
+      evaluate_node(last, last[:match])
+    end
+  end
+  # pop off the group stack, evaluating each group with the previous as we go.
+  def cleanup
+    while @groups.size > 1
+      last = @groups.pop
+      @index = @groups.last
+      evaluate_node(last, last[:match])
+    end
+    @groups.last[:match]
+  end
+end

data/lib/sparkql/expression_resolver.rb ADDED Viewed

@@ -0,0 +1,11 @@
+# Base class for handling expression resolution
+class Sparkql::ExpressionResolver
+  VALID_RESULTS = [true, false, :drop]
+  # Evaluate the result of this expression. Allows for any of the values in
+  # VALID_RESULTS
+  def resolve(expression)
+    true
+  end
+end

data/lib/sparkql/parser_tools.rb CHANGED Viewed

@@ -60,16 +60,17 @@ module Sparkql::ParserTools
   end
   def tokenize_unary_conjunction(conj, exp)
     # Handles the case when a SparkQL filter string
     # begins with a unary operator, and is nested, such as:
-    # Not (Not Field Eq 1)
+    #   Not (Not Field Eq 1)
+    # In this instance we treat the outer unary as a conjunction.
     if @expression_count == 1 && @lexer.level > 0
-      exp.first[:conjunction] = conj
+      exp.first[:conjunction] = conj
+      exp.first[:conjunction_level] = @lexer.level - 1
     end
     exp.first[:unary] = conj
     exp.first[:unary_level] = @lexer.level
     exp
   end

data/lib/sparkql.rb CHANGED Viewed

@@ -2,6 +2,8 @@ require "sparkql/version"
 require "sparkql/token"
 require "sparkql/errors"
 require "sparkql/expression_state"
+require "sparkql/expression_resolver"
+require "sparkql/evaluator"
 require "sparkql/lexer"
 require "sparkql/function_resolver"
 require "sparkql/parser_tools"

data/test/support/boolean_or_bust_expression_resolver.rb ADDED Viewed

@@ -0,0 +1,12 @@
+# A super simple expression resolver for testing... returns the boolean value as
+# the result for the expression, or when not a boolean, drops the expression.
+class BooleanOrBustExpressionResolver < Sparkql::ExpressionResolver
+  def resolve(expression)
+    if expression[:type] == :boolean
+      "true" == expression[:value]
+    else
+      :drop
+    end
+  end
+end

data/test/unit/evaluator_test.rb ADDED Viewed

@@ -0,0 +1,84 @@
+require 'test_helper'
+require 'support/boolean_or_bust_expression_resolver'
+class EvaluatorTest < Test::Unit::TestCase
+  include Sparkql
+  def test_simple
+    assert sample('Test Eq true')
+    assert !sample('Test Eq false')
+    assert sample("Test Eq 'Drop'")
+  end
+  def test_conjunction
+    assert sample('Test Eq true And Test Eq true')
+    assert !sample('Test Eq false And Test Eq true')
+    assert !sample('Test Eq false And Test Eq false')
+    # Ors
+    assert sample("Test Eq true Or Test Eq true")
+    assert sample("Test Eq true Or Test Eq false")
+    assert sample("Test Eq false Or Test Eq true")
+    assert !sample("Test Eq false Or Test Eq false")
+  end
+  def test_dropped_field_handling
+    assert sample("Test Eq 'Drop' And Test Eq true")
+    assert !sample("Test Eq 'Drop' And Test Eq false")
+    assert !sample("Test Eq 'Drop' Or Test Eq false")
+    assert sample("Test Eq 'Drop' Or Test Eq true")
+    assert sample("Test Eq false And Test Eq 'Drop' Or Test Eq true")
+    assert sample("Test Eq false Or (Test Eq 'Drop' And Test Eq true)")
+  end
+  def test_nesting
+    assert sample("Test Eq true Or (Test Eq true) And Test Eq false And (Test Eq true)")
+    assert sample("Test Eq true Or ((Test Eq false) And Test Eq false) And (Test Eq false)")
+    assert sample("(Test Eq false Or Test Eq true) Or (Test Eq false Or Test Eq false)")
+    assert sample("(Test Eq true And Test Eq true) Or (Test Eq false)")
+    assert sample("(Test Eq true And Test Eq true) Or (Test Eq false And Test Eq true)")
+    assert !sample("(Test Eq false And Test Eq true) Or (Test Eq false)")
+    assert sample("Test Eq true And ((Test Eq true And Test Eq false) Or Test Eq true)")
+    assert !sample("Test Eq true And ((Test Eq true And Test Eq false) Or Test Eq false) And Test Eq true")
+    assert !sample("Test Eq true And ((Test Eq true And Test Eq false) Or Test Eq false) Or Test Eq false")
+    assert sample("Test Eq true And ((Test Eq true And Test Eq false) Or Test Eq false) Or Test Eq true")
+  end
+  def test_nots
+    assert !sample("Not Test Eq true")
+    assert sample("Not Test Eq false")
+    assert !sample("Not (Test Eq true)")
+    assert sample("Not (Test Eq false)")
+    assert sample("Test Eq true Not Test Eq false")
+    assert !sample("Test Eq true Not Test Eq true")
+    assert sample("Test Eq true Not (Test Eq false Or Test Eq false)")
+    assert sample("Test Eq true Not (Test Eq false And Test Eq false)")
+    assert !sample("Test Eq true Not (Test Eq false Or Test Eq true)")
+    assert !sample("Test Eq true Not (Test Eq true Or Test Eq false)")
+    assert !sample("Test Eq true Not (Not Test Eq false)")
+    assert sample("Not (Not Test Eq true)")
+    assert sample("Not (Not(Not Test Eq true))")
+  end
+  def test_optimizations
+    assert sample("Test Eq true Or Test Eq false And Test Eq false")
+    assert_equal 1, @evaluator.processed_count
+    assert sample("Test Eq false Or Test Eq true And Test Eq true")
+    assert_equal 3, @evaluator.processed_count
+    assert sample("(Test Eq true Or Test Eq false) And Test Eq true")
+    assert_equal 2, @evaluator.processed_count
+    assert sample("(Test Eq false Or Test Eq true) And Test Eq true")
+    assert_equal 3, @evaluator.processed_count
+  end
+  # Here's some examples from prospector's tests that have been simplified a bit.
+  def test_advanced
+    assert !sample("MlsStatus Eq false And PropertyType Eq true And (City Eq true Or City Eq false)")
+  end
+  def sample filter
+    @parser = Parser.new
+    @expressions = @parser.parse(filter)
+    @evaluator = Evaluator.new(BooleanOrBustExpressionResolver.new())
+    @evaluator.evaluate(@expressions)
+  end
+end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: sparkql
 version: !ruby/object:Gem::Version
-  version: 1.0.3
+  version: 1.1.0
 platform: ruby
 authors:
 - Wade McEwen
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-07-06 00:00:00.000000000 Z
+date: 2016-07-28 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: georuby
@@ -111,6 +111,8 @@ files:
 - VERSION
 - lib/sparkql.rb
 - lib/sparkql/errors.rb
+- lib/sparkql/evaluator.rb
+- lib/sparkql/expression_resolver.rb
 - lib/sparkql/expression_state.rb
 - lib/sparkql/function_resolver.rb
 - lib/sparkql/geo.rb
@@ -127,8 +129,10 @@ files:
 - script/markdownify.rb
 - script/release
 - sparkql.gemspec
+- test/support/boolean_or_bust_expression_resolver.rb
 - test/test_helper.rb
 - test/unit/errors_test.rb
+- test/unit/evaluator_test.rb
 - test/unit/expression_state_test.rb
 - test/unit/function_resolver_test.rb
 - test/unit/geo/record_circle_test.rb
@@ -160,8 +164,10 @@ signing_key:
 specification_version: 4
 summary: API Parser engine for filter searching
 test_files:
+- test/support/boolean_or_bust_expression_resolver.rb
 - test/test_helper.rb
 - test/unit/errors_test.rb
+- test/unit/evaluator_test.rb
 - test/unit/expression_state_test.rb
 - test/unit/function_resolver_test.rb
 - test/unit/geo/record_circle_test.rb