veritas-optimizer 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +4 -2
- data/Gemfile +8 -9
- data/Guardfile +3 -2
- data/README.rdoc +55 -0
- data/Rakefile +2 -2
- data/TODO +100 -98
- data/config/flay.yml +2 -2
- data/config/flog.yml +1 -1
- data/config/roodi.yml +2 -2
- data/config/site.reek +2 -2
- data/lib/veritas/optimizer/algebra/difference.rb +1 -1
- data/lib/veritas/optimizer/algebra/intersection.rb +1 -1
- data/lib/veritas/optimizer/algebra/join.rb +117 -1
- data/lib/veritas/optimizer/algebra/product.rb +1 -1
- data/lib/veritas/optimizer/algebra/rename.rb +12 -1
- data/lib/veritas/optimizer/algebra/restriction.rb +148 -0
- data/lib/veritas/optimizer/algebra/union.rb +1 -1
- data/lib/veritas/optimizer/function/connective/binary.rb +61 -8
- data/lib/veritas/optimizer/function/connective/conjunction.rb +1 -1
- data/lib/veritas/optimizer/function/connective/disjunction.rb +1 -1
- data/lib/veritas/optimizer/function/predicate/comparable.rb +4 -4
- data/lib/veritas/optimizer/relation/operation/binary.rb +1 -1
- data/lib/veritas/optimizer/support/predicate_partition.rb +182 -0
- data/lib/veritas/optimizer/version.rb +1 -1
- data/lib/veritas/optimizer.rb +2 -0
- data/spec/integration/veritas/algebra/rename/optimize_spec.rb +30 -30
- data/spec/integration/veritas/algebra/restriction/optimize_spec.rb +3 -3
- data/spec/integration/veritas/relation/operation/limit/optimize_spec.rb +1 -1
- data/spec/integration/veritas/relation/operation/offset/optimize_spec.rb +1 -1
- data/spec/integration/veritas/relation/operation/order/optimize_spec.rb +6 -6
- data/spec/integration/veritas/relation/operation/reverse/optimize_spec.rb +3 -3
- data/spec/spec_helper.rb +2 -1
- data/spec/unit/veritas/optimizer/algebra/extension/order_operand/optimizable_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/algebra/extension/order_operand/optimize_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/algebra/join/left_materialized_operand/optimizable_spec.rb +42 -0
- data/spec/unit/veritas/optimizer/algebra/join/left_materialized_operand/optimize_spec.rb +55 -0
- data/spec/unit/veritas/optimizer/algebra/join/right_materialized_operand/optimizable_spec.rb +42 -0
- data/spec/unit/veritas/optimizer/algebra/join/right_materialized_operand/optimize_spec.rb +55 -0
- data/spec/unit/veritas/optimizer/algebra/rename/limit_operand/optimizable_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/algebra/rename/limit_operand/optimize_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/algebra/rename/offset_operand/optimizable_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/algebra/rename/offset_operand/optimize_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/algebra/rename/order_operand/optimizable_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/algebra/rename/order_operand/optimize_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/algebra/rename/reverse_operand/optimizable_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/algebra/rename/reverse_operand/optimize_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/algebra/restriction/combination_operand/optimizable_spec.rb +41 -0
- data/spec/unit/veritas/optimizer/algebra/restriction/combination_operand/optimize_spec.rb +35 -0
- data/spec/unit/veritas/optimizer/algebra/restriction/join_operand/optimizable_spec.rb +51 -0
- data/spec/unit/veritas/optimizer/algebra/restriction/join_operand/optimize_spec.rb +48 -0
- data/spec/unit/veritas/optimizer/algebra/restriction/order_operand/optimizable_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/algebra/restriction/order_operand/optimize_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/algebra/restriction/product_operand/optimizable_spec.rb +44 -0
- data/spec/unit/veritas/optimizer/algebra/restriction/product_operand/optimize_spec.rb +48 -0
- data/spec/unit/veritas/optimizer/algebra/restriction/unoptimized_operand/optimize_spec.rb +4 -4
- data/spec/unit/veritas/optimizer/algebra/summarization/empty_operand/optimize_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/algebra/summarization/empty_summarize_per/optimize_spec.rb +7 -7
- data/spec/unit/veritas/optimizer/algebra/summarization/order_operand/optimizable_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/algebra/summarization/order_operand/optimize_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/function/connective/conjunction/optimizable_to_exclusion/optimizable_spec.rb +38 -6
- data/spec/unit/veritas/optimizer/function/connective/conjunction/optimizable_to_exclusion/optimize_spec.rb +44 -6
- data/spec/unit/veritas/optimizer/function/connective/disjunction/optimizable_to_inclusion/optimizable_spec.rb +38 -6
- data/spec/unit/veritas/optimizer/function/connective/disjunction/optimizable_to_inclusion/optimize_spec.rb +44 -6
- data/spec/unit/veritas/optimizer/predicate_partition/left_spec.rb +149 -0
- data/spec/unit/veritas/optimizer/predicate_partition/remainder_spec.rb +149 -0
- data/spec/unit/veritas/optimizer/predicate_partition/right_spec.rb +149 -0
- data/spec/unit/veritas/optimizer/relation/operation/binary/left_order_operand/optimizable_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/relation/operation/binary/left_order_operand/optimize_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/relation/operation/binary/{materialized_operand → materialized_operands}/optimizable_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/relation/operation/binary/{materialized_operand → materialized_operands}/optimize_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/relation/operation/binary/right_order_operand/optimizable_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/relation/operation/binary/right_order_operand/optimize_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/relation/operation/limit/equal_limit_operand/optimizable_spec.rb +3 -3
- data/spec/unit/veritas/optimizer/relation/operation/limit/equal_limit_operand/optimize_spec.rb +4 -4
- data/spec/unit/veritas/optimizer/relation/operation/limit/limit_operand/optimizable_spec.rb +3 -3
- data/spec/unit/veritas/optimizer/relation/operation/limit/limit_operand/optimize_spec.rb +3 -3
- data/spec/unit/veritas/optimizer/relation/operation/limit/unoptimized_operand/optimizable_spec.rb +4 -4
- data/spec/unit/veritas/optimizer/relation/operation/limit/unoptimized_operand/optimize_spec.rb +4 -4
- data/spec/unit/veritas/optimizer/relation/operation/limit/zero_limit/optimizable_spec.rb +3 -3
- data/spec/unit/veritas/optimizer/relation/operation/limit/zero_limit/optimize_spec.rb +4 -4
- data/spec/unit/veritas/optimizer/relation/operation/offset/offset_operand/optimizable_spec.rb +3 -3
- data/spec/unit/veritas/optimizer/relation/operation/offset/offset_operand/optimize_spec.rb +3 -3
- data/spec/unit/veritas/optimizer/relation/operation/offset/unoptimized_operand/optimizable_spec.rb +4 -4
- data/spec/unit/veritas/optimizer/relation/operation/offset/unoptimized_operand/optimize_spec.rb +4 -4
- data/spec/unit/veritas/optimizer/relation/operation/offset/zero_offset/optimizable_spec.rb +3 -3
- data/spec/unit/veritas/optimizer/relation/operation/offset/zero_offset/optimize_spec.rb +3 -3
- data/spec/unit/veritas/optimizer/relation/operation/order/one_limit_operand/optimizable_spec.rb +3 -3
- data/spec/unit/veritas/optimizer/relation/operation/order/one_limit_operand/optimize_spec.rb +2 -2
- data/spec/unit/veritas/optimizer/relation/operation/order/order_operand/optimizable_spec.rb +2 -2
- data/spec/unit/veritas/optimizer/relation/operation/order/order_operand/optimize_spec.rb +2 -2
- data/spec/unit/veritas/optimizer/relation/operation/order/unoptimized_operand/optimizable_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/relation/operation/order/unoptimized_operand/optimize_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/relation/operation/reverse/order_operand/optimizable_spec.rb +2 -2
- data/spec/unit/veritas/optimizer/relation/operation/reverse/order_operand/optimize_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/relation/operation/reverse/reverse_operand/optimizable_spec.rb +3 -3
- data/spec/unit/veritas/optimizer/relation/operation/reverse/reverse_operand/optimize_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/relation/operation/reverse/unoptimized_operand/optimizable_spec.rb +3 -3
- data/spec/unit/veritas/optimizer/relation/operation/reverse/unoptimized_operand/optimize_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/relation/operation/unary/order_operand/optimizable_spec.rb +1 -1
- data/spec/unit/veritas/optimizer/relation/operation/unary/order_operand/optimize_spec.rb +1 -1
- data/tasks/metrics/heckle.rake +1 -0
- data/veritas-optimizer.gemspec +33 -19
- metadata +37 -23
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -2,22 +2,21 @@
|
|
2
2
|
|
3
3
|
source :rubygems
|
4
4
|
|
5
|
-
gem 'veritas', '0.0.
|
5
|
+
gem 'veritas', '~> 0.0.5', :git => 'git://github.com/dkubb/veritas.git'
|
6
6
|
|
7
7
|
group :development do
|
8
|
-
gem 'backports', '~> 2.
|
9
|
-
gem 'jeweler', '~> 1.6.
|
10
|
-
gem 'rake', '~> 0.9.
|
8
|
+
gem 'backports', '~> 2.3.0'
|
9
|
+
gem 'jeweler', '~> 1.6.4'
|
10
|
+
gem 'rake', '~> 0.9.2'
|
11
11
|
gem 'rspec', '~> 1.3.2'
|
12
|
-
gem 'yard', '~> 0.7.
|
12
|
+
gem 'yard', '~> 0.7.2'
|
13
13
|
end
|
14
14
|
|
15
15
|
group :guard do
|
16
|
-
gem '
|
17
|
-
gem 'guard', '~> 0.3.4'
|
16
|
+
gem 'guard', '~> 0.5.1'
|
18
17
|
gem 'guard-bundler', '~> 0.1.3'
|
19
18
|
gem 'guard-ego', '~> 0.0.1'
|
20
|
-
gem 'guard-rspec', '~> 0.
|
19
|
+
gem 'guard-rspec', '~> 0.4.0'
|
21
20
|
end
|
22
21
|
|
23
22
|
platform :jruby do
|
@@ -31,7 +30,7 @@ platforms :mri_18 do
|
|
31
30
|
gem 'flay', '~> 1.4.2'
|
32
31
|
gem 'flog', '~> 2.5.1'
|
33
32
|
gem 'heckle', '~> 1.4.3'
|
34
|
-
gem 'json', '~> 1.5.
|
33
|
+
gem 'json', '~> 1.5.3'
|
35
34
|
gem 'metric_fu', '~> 2.1.1'
|
36
35
|
gem 'mspec', '~> 1.5.17'
|
37
36
|
gem 'rcov', '~> 0.9.9'
|
data/Guardfile
CHANGED
@@ -14,8 +14,9 @@ guard 'rspec' do
|
|
14
14
|
watch(%r{\Aspec/(?:lib|support|shared)/.+\.rb\z}) { 'spec' }
|
15
15
|
|
16
16
|
# run unit specs if associated lib code is modified
|
17
|
-
watch(%r{\Alib/(.+)\.rb\z}) { |m| Dir["spec/unit/#{m[1]}"]
|
18
|
-
watch(
|
17
|
+
watch(%r{\Alib/(.+)\.rb\z}) { |m| Dir["spec/unit/#{m[1]}"] }
|
18
|
+
watch(%r{\Alib/(veritas/optimizer)/support/(.+)\.rb\z}) { |m| Dir["spec/unit/#{m[1]}/#{m[2]}"] }
|
19
|
+
watch("lib/#{File.basename(File.expand_path('../', __FILE__))}.rb") { 'spec' }
|
19
20
|
|
20
21
|
# run a spec if it is modified
|
21
22
|
watch(%r{\Aspec/(?:unit|integration)/.+_spec\.rb\z})
|
data/README.rdoc
CHANGED
@@ -4,6 +4,61 @@ Relational algebra optimizer
|
|
4
4
|
|
5
5
|
http://travis-ci.org/dkubb/veritas-optimizer.png
|
6
6
|
|
7
|
+
== Usage
|
8
|
+
|
9
|
+
# optimize a relation
|
10
|
+
new_relation = relation.optimize
|
11
|
+
new_relation = relation.optimize(optimizer)
|
12
|
+
|
13
|
+
# optimize a scalar function
|
14
|
+
new_function = function.optimize
|
15
|
+
new_function = function.optimize(optimizer)
|
16
|
+
|
17
|
+
# optimize an aggregate function
|
18
|
+
new_aggregate = function.aggregate
|
19
|
+
new_aggregate = function.aggregate(optimizer)
|
20
|
+
|
21
|
+
== Description
|
22
|
+
|
23
|
+
The purpose of this gem is to provide a simple API that can be used to optimize a veritas[https://github.com/dkubb/veritas] relation, scalar or aggregate function. An optional optimizer can be passed in to the #optimize method and return an equivalent but simplified version of the object.
|
24
|
+
|
25
|
+
One of the primary benefits of Relational Algebra is that it's based on logic, and the rules for simplifying logic are well known and studied. An optimizer can pass through user-generated objects and typically find ways to simplify or organize them in a way that will be more efficient when the operation is executed.
|
26
|
+
|
27
|
+
The goal is not to replace the advanced optimizers that are inside most databases and datastores, but to augment it with some simple optimizations that make the user provided query easier for the datastore to accept. On the ruby side we have knowledge about intent and can perform semantic optimization that the datastore otherwise would not be able to perform. In many cases we have richer constraints and data than many datastores and we can use that information to simplify and possibly short-circuit queries that could otherwise never return valid results.
|
28
|
+
|
29
|
+
With the ability to provide custom optimizers we can even target output to a structure optimized for specific datastores. All operations in relational algebra can be transformed into other equivalent operations, ones that are more efficient for the target datastore to execute. The built-in optimizers included in this gem are only a starting point; the intention is to expand them as well as help others create custom optimizers that are optimized for each datastore.
|
30
|
+
|
31
|
+
== Design
|
32
|
+
|
33
|
+
The contract for an optimizer instance is simple:
|
34
|
+
|
35
|
+
a) it must respond to #call, and accept an optimizable object as it's only argument
|
36
|
+
b) it must return an equivalent object
|
37
|
+
c) it must return the exact object when it cannot perform further optimizations
|
38
|
+
|
39
|
+
The optimizer can perform whatever logic it wishes on the object or any of it's contained objects as far down the tree as it likes as long as the requirements for (b) and (c) are met.
|
40
|
+
|
41
|
+
Inside this gem we have the concept of an optimizer chain. It's a chain of responsibility, which means it's a set of objects chained together to form a pipeline. The object is passed into the head of the pipeline and is either matched and returned by one of the optimizers, or it is already fully optimized and passes through to the end of the chain and is returned as-is. This chain organization has proven to be extremely effective, and it is trivial to re-order or add new optimizers into the middle of the chain as needed. Further work will be made to provide APIs to make this even simpler.
|
42
|
+
|
43
|
+
Here is an example of an optimizer chain for the restriction operator (think WHERE clause in SQL):
|
44
|
+
|
45
|
+
Veritas::Algebra::Restriction.optimizer = chain(
|
46
|
+
Tautology, # does the predicate match everything?
|
47
|
+
Contradiction, # does the predicate match nothing?
|
48
|
+
RestrictionOperand, # does the restriction contain another restriction?
|
49
|
+
SetOperand, # does the restriction contain a set operation?
|
50
|
+
OrderOperand, # does the restriction contain an order?
|
51
|
+
EmptyOperand, # does the restriction contain an empty relation?
|
52
|
+
MaterializedOperand, # does the restriction contain a materialized relation?
|
53
|
+
UnoptimizedOperand # does the restriction contain an unoptimized relation?
|
54
|
+
)
|
55
|
+
|
56
|
+
The restriction operator enters this pipeline, and tests are made at each stage. If the test returns true, then an optimization is performed. Usually the goal is to eliminate work performed by the system or collapse the tree of operations down into something simpler. More aggressive optimizations are usually checked first because we would like to prune as much of the tree as possible up-front. In this case, the first test checks to see if the restriction matches everything, in which case it's pretty much a no-op, and we can drop it altogether. If that's not the case, we then test to see if it matches nothing, if that's the case then we can return an empty relation. Then we test if it's another restriction, in which case we can "AND" the predicates for both restrictions together and return a single restriction operation. And so on down the list with the first match winning.
|
57
|
+
|
58
|
+
We always perform at least two optimization passes on each object, because once a tree has been simplified there could be further optimizations possible. Essentially we keep passing the objects into their corresponding optimizer chains until it passes through unchanged. This may seem rather expensive, and I guess it is, but optimization is very fast. Also it doesn't appear to affect performance much in practice due to our convention of testing for the most aggressive optimizations first; often it results in something that is completely optimized on the first try.
|
59
|
+
|
60
|
+
Once the optimization passes are finished, and no further optimization is possible, the result of an #optimize call is memoized. Further calls to #optimize will always return the same object.
|
61
|
+
|
7
62
|
== Note on Patches/Pull Requests
|
8
63
|
|
9
64
|
* If you want your code merged into the mainline, please discuss
|
data/Rakefile
CHANGED
@@ -5,7 +5,7 @@ require 'rake'
|
|
5
5
|
require File.expand_path('../lib/veritas/optimizer/version', __FILE__)
|
6
6
|
|
7
7
|
begin
|
8
|
-
gem('jeweler', '~> 1.6.
|
8
|
+
gem('jeweler', '~> 1.6.2') if respond_to?(:gem, true)
|
9
9
|
require 'jeweler'
|
10
10
|
|
11
11
|
Jeweler::Tasks.new do |gem|
|
@@ -23,5 +23,5 @@ begin
|
|
23
23
|
|
24
24
|
FileList['tasks/**/*.rake'].each { |task| import task }
|
25
25
|
rescue LoadError
|
26
|
-
puts 'Jeweler (or a dependency) not available. Install it with: gem install jeweler -v 1.6.
|
26
|
+
puts 'Jeweler (or a dependency) not available. Install it with: gem install jeweler -v 1.6.2'
|
27
27
|
end
|
data/TODO
CHANGED
@@ -1,25 +1,56 @@
|
|
1
|
-
*
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
* Pull up Rename rather than pushing it down
|
2
|
+
* Currently a Rename applies to every tuple in a result-set, and
|
3
|
+
doing the rename prematurely means that lots of tuples that would
|
4
|
+
otherwise be filtered out are going to be processed. A better
|
5
|
+
approach should be to push the rename up, dropping renames that
|
6
|
+
are projected away. It should not distribute over Binary ops
|
7
|
+
unless the other side of the binary op has he same aliases.
|
7
8
|
|
8
|
-
*
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
9
|
+
* For Binary operations, when the left or right is materialized then
|
10
|
+
change the other relation to use a restriction to filter the records.
|
11
|
+
* This will help optimize the fetching of the underlying records.
|
12
|
+
|
13
|
+
* Convert all the direct Veritas class usage to use the corresponding
|
14
|
+
methods. So for example instead of Algebra::Join.new(left, right) I
|
15
|
+
would want left.join(right).
|
16
|
+
* The reason for this is that as the Relation Gateway is created
|
17
|
+
it will have operands that will not be directly joinable without
|
18
|
+
gateway specific processing. By using the method directly I give the
|
19
|
+
chance for the gateway to step in and perform other work before
|
20
|
+
delegating up to the built-in operators in veritas.
|
21
|
+
|
22
|
+
* More optimizations:
|
23
|
+
* Union
|
24
|
+
* A Union of relations with the same base, header, and restrictions should
|
25
|
+
try to combine into a single relation with the restrictions using OR.
|
26
|
+
|
27
|
+
* Intersection
|
28
|
+
* An Intersection of relations with the same base, header, and restrictions
|
29
|
+
should try to combine into a single relation with the restrictions using
|
30
|
+
AND.
|
31
|
+
* Use the Join::RightMaterializedOperand and Join::LeftMaterializedOperand
|
32
|
+
optimizers to simplify Intersection operations
|
33
|
+
|
34
|
+
* Difference
|
35
|
+
* A Difference of relations with the same base and restrictions should
|
36
|
+
try to combine into a single relation with the restrictions using NOT.
|
37
|
+
|
38
|
+
* Summarization
|
39
|
+
* Add OrderSummarizePer for factoring out Order objects inside a Summarization
|
40
|
+
* When there are no aggregate functions, drop the Summarization and
|
41
|
+
return the summarize_per (?)
|
42
|
+
* Use the UnchangedHeader optimizer as a base class
|
43
|
+
* When summarize_per is an Order, the Order can be dropped.
|
15
44
|
|
16
|
-
* Add further optimizations:
|
17
45
|
* Projection
|
18
|
-
* When it contains a Rename, if the renamed attributes are removed,
|
19
|
-
then the rename can be removed.
|
20
46
|
* When it contains a Restriction, if the removed attributes are *not*
|
21
47
|
used in the predicate, then move the Restriction to contain the
|
22
48
|
Projection.
|
49
|
+
* When a Projection contains a Restriction, wrap the Projection
|
50
|
+
in the Restriction, projecting away any attributes not used in
|
51
|
+
the restriction. If there are any remaining attributes, then
|
52
|
+
wrap the operation in a Projection removing those attributes.
|
53
|
+
* If all attributes are being used in the Restriction do nothing
|
23
54
|
* When renames or extensions or summarizations are projected away and
|
24
55
|
not used in any intermediary operation, then they can be dropped
|
25
56
|
altogether. There's no point in going through all that work to
|
@@ -27,90 +58,61 @@
|
|
27
58
|
the renames/extensions/summarizations to not add the attribute, and
|
28
59
|
then call optimize again on the op to potentially remove the op altogether
|
29
60
|
(like in the case of an extension adding one attribute that is removed).
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
*
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
* "attr > 5 OR attr == 5" -> "attr >= 5"
|
54
|
-
* "attr < 5 OR attr == 5" -> "attr <= 5"
|
55
|
-
* "attr" = "string" AND "attr" =~ /string/ -> "attr" = "string"
|
56
|
-
* If the regexp matches the constant, then it should be
|
57
|
-
optimized down to a constant match. If it does not match
|
58
|
-
then it should be optimized to a Contradiction.
|
59
|
-
* Constant folding, eg:
|
60
|
-
"attr1 > attr2 AND attr1 = 5" -> "5 > attr2 AND attr1 = 5"
|
61
|
-
* This will probably only work across Conjunctions.
|
62
|
-
* "attr > 5 AND attr = 6" -> "attr = 6", because attr must be
|
63
|
-
equal to 6. this will probably be related to constant folding;
|
64
|
-
the first expression will become 6 > 5, which evaluates to a
|
65
|
-
Tautology, then the expression is a Tautology AND attr = 6,
|
66
|
-
which simplifies down to attr = 6.
|
67
|
-
* "attr < 5 AND attr = 6" -> "Contradiction", because attr must be equal to
|
68
|
-
6, and 6 < 5 evaluates to a Contradiction. A Contradiction AND attr = 6
|
69
|
-
simplifies down to a Contradiction.
|
70
|
-
* Figure out how to reorganize the Restriction predicates so that all
|
71
|
-
similar operations are closer together to allow more efficient
|
72
|
-
optimizations. This would allow optimizations of stuff like this:
|
61
|
+
* It does not distribute over Intersection or Difference, but see if
|
62
|
+
perhaps an exception can be made if there is a functional dependency
|
63
|
+
between the columns projected away and the one remaining. Then I *think*
|
64
|
+
it might still work, but more research will be needed.
|
65
|
+
* When a Projection contains a Join, wrap the Join with a Projection
|
66
|
+
of all the headers, minus those used in the Join. If there were
|
67
|
+
any used, then wrap the whole operation in a Projection with
|
68
|
+
the remaining attributes.
|
69
|
+
* If all the attributes are used in the Join, do nothing
|
70
|
+
* Try to use the same approach for Product
|
71
|
+
* Test if it's possible to fully distribute projections over
|
72
|
+
joins rather than splitting it up like this.
|
73
|
+
|
74
|
+
* Rename
|
75
|
+
* When wrapping a Summarization or Extension, and renaming the new attribute,
|
76
|
+
it should change the new attribute name, and remove it from the rename.
|
77
|
+
|
78
|
+
* Restriction
|
79
|
+
* Figure out how to reorganize the Restriction predicates so that all
|
80
|
+
similar operations are closer together to allow more efficient
|
81
|
+
optimizations. This would allow optimizations of stuff like this:
|
82
|
+
|
83
|
+
"attr1 = ? OR attr2 = ? OR attr1 = ?"
|
73
84
|
|
74
|
-
|
85
|
+
Into:
|
75
86
|
|
76
|
-
|
87
|
+
"attr1 IN(..) OR attr2 = ?"
|
77
88
|
|
78
|
-
|
79
|
-
|
80
|
-
*
|
81
|
-
|
82
|
-
throughout the whole tree.
|
83
|
-
* A Union of relations with the same base, header, and restrictions should
|
84
|
-
try to combine into a single relation with the restrictions using OR.
|
85
|
-
* An Intersection of relations with the same base, header, and restrictions
|
86
|
-
should try to combine into a single relation with the restrictions using
|
87
|
-
AND.
|
88
|
-
* A Difference of relations with the same base and restrictions should
|
89
|
-
try to combine into a single relation with the restrictions using NOT.
|
90
|
-
* Join Optimizations
|
91
|
-
* When a Join contains a Join, and the size of the base relations is
|
92
|
-
known, join the smallest with the largest, and then join that result
|
93
|
-
with the remaining relation.
|
94
|
-
* Make sure the smallest relation (with a known size) is always the
|
95
|
-
right-most operation.
|
96
|
-
* When a restriction uses a unique index:
|
97
|
-
* Order can be factored out
|
98
|
-
* Limit with a limit >= 1 can be factored out
|
99
|
-
* Offset with an offset > 0 can be transformed into an empty
|
100
|
-
relation, since at most there can be only one match.
|
89
|
+
* When it has an equality on a unique attribute:
|
90
|
+
* Limit with a limit >= 1 can be factored out
|
91
|
+
* Offset with an offset > 0 can be transformed into an empty
|
92
|
+
relation, since at most there can be only one match.
|
101
93
|
|
102
|
-
*
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
*
|
110
|
-
|
94
|
+
* Connective
|
95
|
+
* "attr > ? OR attr > ?" -> "attr > ?", with the least restrictive value
|
96
|
+
* Do the same for >=, <, <=
|
97
|
+
* "attr > ? AND attr > ?" -> "attr > ?", with the most restrictive value
|
98
|
+
* Do the same for >=, <, <=
|
99
|
+
* "attr > 5 OR attr == 5" -> "attr >= 5"
|
100
|
+
* "attr < 5 OR attr == 5" -> "attr <= 5"
|
101
|
+
* "attr" = "string" AND "attr" =~ /string/ -> "attr" = "string"
|
102
|
+
* If the regexp matches the constant, then it should be
|
103
|
+
optimized down to a constant match. If it does not match
|
104
|
+
then it should be optimized to a Contradiction.
|
105
|
+
* Constant folding, eg:
|
106
|
+
"attr1 > attr2 AND attr1 = 5" -> "5 > attr2 AND attr1 = 5"
|
107
|
+
* This will probably only work across Conjunctions.
|
108
|
+
* "attr > 5 AND attr = 6" -> "attr = 6", because attr must be
|
109
|
+
equal to 6. this will probably be related to constant folding;
|
110
|
+
the first expression will become 6 > 5, which evaluates to a
|
111
|
+
Tautology, then the expression is a Tautology AND attr = 6,
|
112
|
+
which simplifies down to attr = 6.
|
113
|
+
* "attr < 5 AND attr = 6" -> "Contradiction", because attr must be equal to
|
114
|
+
6, and 6 < 5 evaluates to a Contradiction. A Contradiction AND attr = 6
|
115
|
+
simplifies down to a Contradiction.
|
111
116
|
|
112
|
-
*
|
113
|
-
|
114
|
-
perhaps an exception can be made if there is a functional dependency
|
115
|
-
between the columns projected away and the one remaining. Then I *think*
|
116
|
-
it might still work, but more research will be needed.
|
117
|
+
* Inclusion/Exclusion
|
118
|
+
* When the enumerable is a contiguous sequence transform it into a Range
|
data/config/flay.yml
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
---
|
2
|
-
threshold:
|
3
|
-
total_score:
|
2
|
+
threshold: 120
|
3
|
+
total_score: 942
|
data/config/flog.yml
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
---
|
2
|
-
threshold:
|
2
|
+
threshold: 19.9
|
data/config/roodi.yml
CHANGED
@@ -5,11 +5,11 @@ CaseMissingElseCheck: { }
|
|
5
5
|
ClassLineCountCheck: { line_count: 396 }
|
6
6
|
ClassNameCheck: { pattern: !ruby/regexp /\A(?:[A-Z]+|[A-Z][a-z](?:[A-Z]?[a-z])+)\z/ }
|
7
7
|
ClassVariableCheck: { }
|
8
|
-
CyclomaticComplexityBlockCheck: { complexity:
|
8
|
+
CyclomaticComplexityBlockCheck: { complexity: 5 }
|
9
9
|
CyclomaticComplexityMethodCheck: { complexity: 6 }
|
10
10
|
EmptyRescueBodyCheck: { }
|
11
11
|
ForLoopCheck: { }
|
12
|
-
MethodLineCountCheck: { line_count:
|
12
|
+
MethodLineCountCheck: { line_count: 11 }
|
13
13
|
MethodNameCheck: { pattern: !ruby/regexp /\A(?:[a-z\d](?:_?[a-z\d])+[?!=]?|\[\]=?|==|<=>|[+*&|-])\z/ }
|
14
14
|
ModuleLineCountCheck: { line_count: 398 }
|
15
15
|
ModuleNameCheck: { pattern: !ruby/regexp /\A(?:[A-Z]+|[A-Z][a-z](?:[A-Z]?[a-z])+)\z/ }
|
data/config/site.reek
CHANGED
@@ -8,10 +8,10 @@ UncommunicativeParameterName:
|
|
8
8
|
- !ruby/regexp /[0-9]$/
|
9
9
|
- !ruby/regexp /[A-Z]/
|
10
10
|
LargeClass:
|
11
|
-
max_methods:
|
11
|
+
max_methods: 14 # TODO: decrease max_methods to 10-15 or less
|
12
12
|
exclude: []
|
13
13
|
enabled: true
|
14
|
-
max_instance_variables:
|
14
|
+
max_instance_variables: 5
|
15
15
|
UncommunicativeMethodName:
|
16
16
|
accept: []
|
17
17
|
exclude: []
|
@@ -7,6 +7,30 @@ module Veritas
|
|
7
7
|
# Abstract base class representing Join optimizations
|
8
8
|
class Join < Relation::Operation::Combination
|
9
9
|
|
10
|
+
CONTRADICTION = Veritas::Function::Proposition::Contradiction.instance
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
# Return the key to join the operations with
|
15
|
+
#
|
16
|
+
# @return [Header]
|
17
|
+
#
|
18
|
+
# @todo find a minimal key from the header
|
19
|
+
#
|
20
|
+
# @api private
|
21
|
+
def join_key
|
22
|
+
operation.join_header
|
23
|
+
end
|
24
|
+
|
25
|
+
# Return a predicate that matches every tuple in the materialized operand
|
26
|
+
#
|
27
|
+
# @return [Function]
|
28
|
+
#
|
29
|
+
# @api private
|
30
|
+
def materialized_predicate
|
31
|
+
matching_projection.reduce(CONTRADICTION) { |predicate, tuple| predicate.or(tuple.predicate) }.optimize
|
32
|
+
end
|
33
|
+
|
10
34
|
# Optimize when operands' headers are equal
|
11
35
|
class EqualHeaders < self
|
12
36
|
|
@@ -30,16 +54,108 @@ module Veritas
|
|
30
54
|
|
31
55
|
end # class EqualHeaders
|
32
56
|
|
57
|
+
# Optimize when the left operand is materialized
|
58
|
+
class LeftMaterializedOperand < self
|
59
|
+
|
60
|
+
# Test if the left operand is materialized
|
61
|
+
#
|
62
|
+
# @return [Boolean]
|
63
|
+
#
|
64
|
+
# @api private
|
65
|
+
def optimizable?
|
66
|
+
left.materialized? && !right_matching_left?
|
67
|
+
end
|
68
|
+
|
69
|
+
# Return the join of the left and right with the right restricted
|
70
|
+
#
|
71
|
+
# @return [Algebra::Join]
|
72
|
+
#
|
73
|
+
# @api private
|
74
|
+
def optimize
|
75
|
+
operation.class.new(left, right.restrict { materialized_predicate })
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
# Test if the right operand is a restriction matching the left
|
81
|
+
#
|
82
|
+
# @return [Boolean]
|
83
|
+
#
|
84
|
+
# @api private
|
85
|
+
def right_matching_left?
|
86
|
+
right = self.right
|
87
|
+
right.kind_of?(Veritas::Algebra::Restriction) && right.predicate.eql?(materialized_predicate)
|
88
|
+
end
|
89
|
+
|
90
|
+
# Return a the matching projection of the materializd relation
|
91
|
+
#
|
92
|
+
# @return [Projection]
|
93
|
+
#
|
94
|
+
# @api private
|
95
|
+
def matching_projection
|
96
|
+
left.project(join_key)
|
97
|
+
end
|
98
|
+
|
99
|
+
end # class LeftMaterializedOperand
|
100
|
+
|
101
|
+
# Optimize when the right operand is materialized
|
102
|
+
class RightMaterializedOperand < self
|
103
|
+
|
104
|
+
# Test if the right operand is materialized
|
105
|
+
#
|
106
|
+
# @return [Boolean]
|
107
|
+
#
|
108
|
+
# @api private
|
109
|
+
def optimizable?
|
110
|
+
right.materialized? && !left_matching_right?
|
111
|
+
end
|
112
|
+
|
113
|
+
# Return the join of the left and right with the left restricted
|
114
|
+
#
|
115
|
+
# @return [Algebra::Join]
|
116
|
+
#
|
117
|
+
# @api private
|
118
|
+
def optimize
|
119
|
+
operation.class.new(left.restrict { materialized_predicate }, right)
|
120
|
+
end
|
121
|
+
|
122
|
+
private
|
123
|
+
|
124
|
+
# Test if the left operand is a restriction matching the right
|
125
|
+
#
|
126
|
+
# @return [Boolean]
|
127
|
+
#
|
128
|
+
# @api private
|
129
|
+
def left_matching_right?
|
130
|
+
left = self.left
|
131
|
+
left.kind_of?(Veritas::Algebra::Restriction) && left.predicate.eql?(materialized_predicate)
|
132
|
+
end
|
133
|
+
|
134
|
+
# Return a the matching projection of the materializd relation
|
135
|
+
#
|
136
|
+
# @return [Projection]
|
137
|
+
#
|
138
|
+
# @api private
|
139
|
+
def matching_projection
|
140
|
+
right.project(join_key)
|
141
|
+
end
|
142
|
+
|
143
|
+
end # class RightMaterializedOperand
|
144
|
+
|
33
145
|
Veritas::Algebra::Join.optimizer = chain(
|
34
146
|
EmptyLeft,
|
35
147
|
EmptyRight,
|
36
148
|
EqualHeaders,
|
37
149
|
LeftOrderOperand,
|
38
150
|
RightOrderOperand,
|
39
|
-
|
151
|
+
MaterializedOperands,
|
152
|
+
LeftMaterializedOperand,
|
153
|
+
RightMaterializedOperand,
|
40
154
|
UnoptimizedOperands
|
41
155
|
)
|
42
156
|
|
157
|
+
memoize :materialized_predicate
|
158
|
+
|
43
159
|
end # class Join
|
44
160
|
end # module Algebra
|
45
161
|
end # class Optimizer
|
@@ -258,7 +258,18 @@ module Veritas
|
|
258
258
|
#
|
259
259
|
# @api private
|
260
260
|
def optimize
|
261
|
-
operand.class.new(wrap_operand,
|
261
|
+
operand.class.new(wrap_operand, directions)
|
262
|
+
end
|
263
|
+
|
264
|
+
private
|
265
|
+
|
266
|
+
# Return the renamed directions
|
267
|
+
#
|
268
|
+
# @return [Relation::Operation::Order::DirectionSet]
|
269
|
+
#
|
270
|
+
# @api private
|
271
|
+
def directions
|
272
|
+
operand.directions.rename(aliases)
|
262
273
|
end
|
263
274
|
|
264
275
|
end # class OrderOperand
|