veritas-optimizer 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (103) hide show
  1. data/.travis.yml +4 -2
  2. data/Gemfile +8 -9
  3. data/Guardfile +3 -2
  4. data/README.rdoc +55 -0
  5. data/Rakefile +2 -2
  6. data/TODO +100 -98
  7. data/config/flay.yml +2 -2
  8. data/config/flog.yml +1 -1
  9. data/config/roodi.yml +2 -2
  10. data/config/site.reek +2 -2
  11. data/lib/veritas/optimizer/algebra/difference.rb +1 -1
  12. data/lib/veritas/optimizer/algebra/intersection.rb +1 -1
  13. data/lib/veritas/optimizer/algebra/join.rb +117 -1
  14. data/lib/veritas/optimizer/algebra/product.rb +1 -1
  15. data/lib/veritas/optimizer/algebra/rename.rb +12 -1
  16. data/lib/veritas/optimizer/algebra/restriction.rb +148 -0
  17. data/lib/veritas/optimizer/algebra/union.rb +1 -1
  18. data/lib/veritas/optimizer/function/connective/binary.rb +61 -8
  19. data/lib/veritas/optimizer/function/connective/conjunction.rb +1 -1
  20. data/lib/veritas/optimizer/function/connective/disjunction.rb +1 -1
  21. data/lib/veritas/optimizer/function/predicate/comparable.rb +4 -4
  22. data/lib/veritas/optimizer/relation/operation/binary.rb +1 -1
  23. data/lib/veritas/optimizer/support/predicate_partition.rb +182 -0
  24. data/lib/veritas/optimizer/version.rb +1 -1
  25. data/lib/veritas/optimizer.rb +2 -0
  26. data/spec/integration/veritas/algebra/rename/optimize_spec.rb +30 -30
  27. data/spec/integration/veritas/algebra/restriction/optimize_spec.rb +3 -3
  28. data/spec/integration/veritas/relation/operation/limit/optimize_spec.rb +1 -1
  29. data/spec/integration/veritas/relation/operation/offset/optimize_spec.rb +1 -1
  30. data/spec/integration/veritas/relation/operation/order/optimize_spec.rb +6 -6
  31. data/spec/integration/veritas/relation/operation/reverse/optimize_spec.rb +3 -3
  32. data/spec/spec_helper.rb +2 -1
  33. data/spec/unit/veritas/optimizer/algebra/extension/order_operand/optimizable_spec.rb +1 -1
  34. data/spec/unit/veritas/optimizer/algebra/extension/order_operand/optimize_spec.rb +1 -1
  35. data/spec/unit/veritas/optimizer/algebra/join/left_materialized_operand/optimizable_spec.rb +42 -0
  36. data/spec/unit/veritas/optimizer/algebra/join/left_materialized_operand/optimize_spec.rb +55 -0
  37. data/spec/unit/veritas/optimizer/algebra/join/right_materialized_operand/optimizable_spec.rb +42 -0
  38. data/spec/unit/veritas/optimizer/algebra/join/right_materialized_operand/optimize_spec.rb +55 -0
  39. data/spec/unit/veritas/optimizer/algebra/rename/limit_operand/optimizable_spec.rb +1 -1
  40. data/spec/unit/veritas/optimizer/algebra/rename/limit_operand/optimize_spec.rb +1 -1
  41. data/spec/unit/veritas/optimizer/algebra/rename/offset_operand/optimizable_spec.rb +1 -1
  42. data/spec/unit/veritas/optimizer/algebra/rename/offset_operand/optimize_spec.rb +1 -1
  43. data/spec/unit/veritas/optimizer/algebra/rename/order_operand/optimizable_spec.rb +1 -1
  44. data/spec/unit/veritas/optimizer/algebra/rename/order_operand/optimize_spec.rb +1 -1
  45. data/spec/unit/veritas/optimizer/algebra/rename/reverse_operand/optimizable_spec.rb +1 -1
  46. data/spec/unit/veritas/optimizer/algebra/rename/reverse_operand/optimize_spec.rb +1 -1
  47. data/spec/unit/veritas/optimizer/algebra/restriction/combination_operand/optimizable_spec.rb +41 -0
  48. data/spec/unit/veritas/optimizer/algebra/restriction/combination_operand/optimize_spec.rb +35 -0
  49. data/spec/unit/veritas/optimizer/algebra/restriction/join_operand/optimizable_spec.rb +51 -0
  50. data/spec/unit/veritas/optimizer/algebra/restriction/join_operand/optimize_spec.rb +48 -0
  51. data/spec/unit/veritas/optimizer/algebra/restriction/order_operand/optimizable_spec.rb +1 -1
  52. data/spec/unit/veritas/optimizer/algebra/restriction/order_operand/optimize_spec.rb +1 -1
  53. data/spec/unit/veritas/optimizer/algebra/restriction/product_operand/optimizable_spec.rb +44 -0
  54. data/spec/unit/veritas/optimizer/algebra/restriction/product_operand/optimize_spec.rb +48 -0
  55. data/spec/unit/veritas/optimizer/algebra/restriction/unoptimized_operand/optimize_spec.rb +4 -4
  56. data/spec/unit/veritas/optimizer/algebra/summarization/empty_operand/optimize_spec.rb +1 -1
  57. data/spec/unit/veritas/optimizer/algebra/summarization/empty_summarize_per/optimize_spec.rb +7 -7
  58. data/spec/unit/veritas/optimizer/algebra/summarization/order_operand/optimizable_spec.rb +1 -1
  59. data/spec/unit/veritas/optimizer/algebra/summarization/order_operand/optimize_spec.rb +1 -1
  60. data/spec/unit/veritas/optimizer/function/connective/conjunction/optimizable_to_exclusion/optimizable_spec.rb +38 -6
  61. data/spec/unit/veritas/optimizer/function/connective/conjunction/optimizable_to_exclusion/optimize_spec.rb +44 -6
  62. data/spec/unit/veritas/optimizer/function/connective/disjunction/optimizable_to_inclusion/optimizable_spec.rb +38 -6
  63. data/spec/unit/veritas/optimizer/function/connective/disjunction/optimizable_to_inclusion/optimize_spec.rb +44 -6
  64. data/spec/unit/veritas/optimizer/predicate_partition/left_spec.rb +149 -0
  65. data/spec/unit/veritas/optimizer/predicate_partition/remainder_spec.rb +149 -0
  66. data/spec/unit/veritas/optimizer/predicate_partition/right_spec.rb +149 -0
  67. data/spec/unit/veritas/optimizer/relation/operation/binary/left_order_operand/optimizable_spec.rb +1 -1
  68. data/spec/unit/veritas/optimizer/relation/operation/binary/left_order_operand/optimize_spec.rb +1 -1
  69. data/spec/unit/veritas/optimizer/relation/operation/binary/{materialized_operand → materialized_operands}/optimizable_spec.rb +1 -1
  70. data/spec/unit/veritas/optimizer/relation/operation/binary/{materialized_operand → materialized_operands}/optimize_spec.rb +1 -1
  71. data/spec/unit/veritas/optimizer/relation/operation/binary/right_order_operand/optimizable_spec.rb +1 -1
  72. data/spec/unit/veritas/optimizer/relation/operation/binary/right_order_operand/optimize_spec.rb +1 -1
  73. data/spec/unit/veritas/optimizer/relation/operation/limit/equal_limit_operand/optimizable_spec.rb +3 -3
  74. data/spec/unit/veritas/optimizer/relation/operation/limit/equal_limit_operand/optimize_spec.rb +4 -4
  75. data/spec/unit/veritas/optimizer/relation/operation/limit/limit_operand/optimizable_spec.rb +3 -3
  76. data/spec/unit/veritas/optimizer/relation/operation/limit/limit_operand/optimize_spec.rb +3 -3
  77. data/spec/unit/veritas/optimizer/relation/operation/limit/unoptimized_operand/optimizable_spec.rb +4 -4
  78. data/spec/unit/veritas/optimizer/relation/operation/limit/unoptimized_operand/optimize_spec.rb +4 -4
  79. data/spec/unit/veritas/optimizer/relation/operation/limit/zero_limit/optimizable_spec.rb +3 -3
  80. data/spec/unit/veritas/optimizer/relation/operation/limit/zero_limit/optimize_spec.rb +4 -4
  81. data/spec/unit/veritas/optimizer/relation/operation/offset/offset_operand/optimizable_spec.rb +3 -3
  82. data/spec/unit/veritas/optimizer/relation/operation/offset/offset_operand/optimize_spec.rb +3 -3
  83. data/spec/unit/veritas/optimizer/relation/operation/offset/unoptimized_operand/optimizable_spec.rb +4 -4
  84. data/spec/unit/veritas/optimizer/relation/operation/offset/unoptimized_operand/optimize_spec.rb +4 -4
  85. data/spec/unit/veritas/optimizer/relation/operation/offset/zero_offset/optimizable_spec.rb +3 -3
  86. data/spec/unit/veritas/optimizer/relation/operation/offset/zero_offset/optimize_spec.rb +3 -3
  87. data/spec/unit/veritas/optimizer/relation/operation/order/one_limit_operand/optimizable_spec.rb +3 -3
  88. data/spec/unit/veritas/optimizer/relation/operation/order/one_limit_operand/optimize_spec.rb +2 -2
  89. data/spec/unit/veritas/optimizer/relation/operation/order/order_operand/optimizable_spec.rb +2 -2
  90. data/spec/unit/veritas/optimizer/relation/operation/order/order_operand/optimize_spec.rb +2 -2
  91. data/spec/unit/veritas/optimizer/relation/operation/order/unoptimized_operand/optimizable_spec.rb +1 -1
  92. data/spec/unit/veritas/optimizer/relation/operation/order/unoptimized_operand/optimize_spec.rb +1 -1
  93. data/spec/unit/veritas/optimizer/relation/operation/reverse/order_operand/optimizable_spec.rb +2 -2
  94. data/spec/unit/veritas/optimizer/relation/operation/reverse/order_operand/optimize_spec.rb +1 -1
  95. data/spec/unit/veritas/optimizer/relation/operation/reverse/reverse_operand/optimizable_spec.rb +3 -3
  96. data/spec/unit/veritas/optimizer/relation/operation/reverse/reverse_operand/optimize_spec.rb +1 -1
  97. data/spec/unit/veritas/optimizer/relation/operation/reverse/unoptimized_operand/optimizable_spec.rb +3 -3
  98. data/spec/unit/veritas/optimizer/relation/operation/reverse/unoptimized_operand/optimize_spec.rb +1 -1
  99. data/spec/unit/veritas/optimizer/relation/operation/unary/order_operand/optimizable_spec.rb +1 -1
  100. data/spec/unit/veritas/optimizer/relation/operation/unary/order_operand/optimize_spec.rb +1 -1
  101. data/tasks/metrics/heckle.rake +1 -0
  102. data/veritas-optimizer.gemspec +33 -19
  103. metadata +37 -23
data/.travis.yml CHANGED
@@ -1,8 +1,10 @@
1
1
  bundler_args: --without guard metrics
2
2
  script: "bundle exec rake spec"
3
3
  rvm:
4
- - ree
5
4
  - 1.8.7
6
5
  - 1.9.2
7
- - ruby-head
8
6
  - rbx
7
+ # - rbx-2.0
8
+ - ree
9
+ - jruby
10
+ - ruby-head
data/Gemfile CHANGED
@@ -2,22 +2,21 @@
2
2
 
3
3
  source :rubygems
4
4
 
5
- gem 'veritas', '0.0.4', :git => 'git://github.com/dkubb/veritas.git'
5
+ gem 'veritas', '~> 0.0.5', :git => 'git://github.com/dkubb/veritas.git'
6
6
 
7
7
  group :development do
8
- gem 'backports', '~> 2.2.1'
9
- gem 'jeweler', '~> 1.6.0'
10
- gem 'rake', '~> 0.9.0'
8
+ gem 'backports', '~> 2.3.0'
9
+ gem 'jeweler', '~> 1.6.4'
10
+ gem 'rake', '~> 0.9.2'
11
11
  gem 'rspec', '~> 1.3.2'
12
- gem 'yard', '~> 0.7.1'
12
+ gem 'yard', '~> 0.7.2'
13
13
  end
14
14
 
15
15
  group :guard do
16
- gem 'growl', '~> 1.0.3'
17
- gem 'guard', '~> 0.3.4'
16
+ gem 'guard', '~> 0.5.1'
18
17
  gem 'guard-bundler', '~> 0.1.3'
19
18
  gem 'guard-ego', '~> 0.0.1'
20
- gem 'guard-rspec', '~> 0.3.1'
19
+ gem 'guard-rspec', '~> 0.4.0'
21
20
  end
22
21
 
23
22
  platform :jruby do
@@ -31,7 +30,7 @@ platforms :mri_18 do
31
30
  gem 'flay', '~> 1.4.2'
32
31
  gem 'flog', '~> 2.5.1'
33
32
  gem 'heckle', '~> 1.4.3'
34
- gem 'json', '~> 1.5.1'
33
+ gem 'json', '~> 1.5.3'
35
34
  gem 'metric_fu', '~> 2.1.1'
36
35
  gem 'mspec', '~> 1.5.17'
37
36
  gem 'rcov', '~> 0.9.9'
data/Guardfile CHANGED
@@ -14,8 +14,9 @@ guard 'rspec' do
14
14
  watch(%r{\Aspec/(?:lib|support|shared)/.+\.rb\z}) { 'spec' }
15
15
 
16
16
  # run unit specs if associated lib code is modified
17
- watch(%r{\Alib/(.+)\.rb\z}) { |m| Dir["spec/unit/#{m[1]}"] }
18
- watch("lib/#{File.basename(File.expand_path('../', __FILE__))}.rb") { 'spec' }
17
+ watch(%r{\Alib/(.+)\.rb\z}) { |m| Dir["spec/unit/#{m[1]}"] }
18
+ watch(%r{\Alib/(veritas/optimizer)/support/(.+)\.rb\z}) { |m| Dir["spec/unit/#{m[1]}/#{m[2]}"] }
19
+ watch("lib/#{File.basename(File.expand_path('../', __FILE__))}.rb") { 'spec' }
19
20
 
20
21
  # run a spec if it is modified
21
22
  watch(%r{\Aspec/(?:unit|integration)/.+_spec\.rb\z})
data/README.rdoc CHANGED
@@ -4,6 +4,61 @@ Relational algebra optimizer
4
4
 
5
5
  http://travis-ci.org/dkubb/veritas-optimizer.png
6
6
 
7
+ == Usage
8
+
9
+ # optimize a relation
10
+ new_relation = relation.optimize
11
+ new_relation = relation.optimize(optimizer)
12
+
13
+ # optimize a scalar function
14
+ new_function = function.optimize
15
+ new_function = function.optimize(optimizer)
16
+
17
+ # optimize an aggregate function
18
+ new_aggregate = function.aggregate
19
+ new_aggregate = function.aggregate(optimizer)
20
+
21
+ == Description
22
+
23
+ The purpose of this gem is to provide a simple API that can be used to optimize a veritas[https://github.com/dkubb/veritas] relation, scalar or aggregate function. An optional optimizer can be passed in to the #optimize method and return an equivalent but simplified version of the object.
24
+
25
+ One of the primary benefits of Relational Algebra is that it's based on logic, and the rules for simplifying logic are well known and studied. An optimizer can pass through user-generated objects and typically find ways to simplify or organize them in a way that will be more efficient when the operation is executed.
26
+
27
+ The goal is not to replace the advanced optimizers that are inside most databases and datastores, but to augment it with some simple optimizations that make the user provided query easier for the datastore to accept. On the ruby side we have knowledge about intent and can perform semantic optimization that the datastore otherwise would not be able to perform. In many cases we have richer constraints and data than many datastores and we can use that information to simplify and possibly short-circuit queries that could otherwise never return valid results.
28
+
29
+ With the ability to provide custom optimizers we can even target output to a structure optimized for specific datastores. All operations in relational algebra can be transformed into other equivalent operations, ones that are more efficient for the target datastore to execute. The built-in optimizers included in this gem are only a starting point; the intention is to expand them as well as help others create custom optimizers that are optimized for each datastore.
30
+
31
+ == Design
32
+
33
+ The contract for an optimizer instance is simple:
34
+
35
+ a) it must respond to #call, and accept an optimizable object as it's only argument
36
+ b) it must return an equivalent object
37
+ c) it must return the exact object when it cannot perform further optimizations
38
+
39
+ The optimizer can perform whatever logic it wishes on the object or any of it's contained objects as far down the tree as it likes as long as the requirements for (b) and (c) are met.
40
+
41
+ Inside this gem we have the concept of an optimizer chain. It's a chain of responsibility, which means it's a set of objects chained together to form a pipeline. The object is passed into the head of the pipeline and is either matched and returned by one of the optimizers, or it is already fully optimized and passes through to the end of the chain and is returned as-is. This chain organization has proven to be extremely effective, and it is trivial to re-order or add new optimizers into the middle of the chain as needed. Further work will be made to provide APIs to make this even simpler.
42
+
43
+ Here is an example of an optimizer chain for the restriction operator (think WHERE clause in SQL):
44
+
45
+ Veritas::Algebra::Restriction.optimizer = chain(
46
+ Tautology, # does the predicate match everything?
47
+ Contradiction, # does the predicate match nothing?
48
+ RestrictionOperand, # does the restriction contain another restriction?
49
+ SetOperand, # does the restriction contain a set operation?
50
+ OrderOperand, # does the restriction contain an order?
51
+ EmptyOperand, # does the restriction contain an empty relation?
52
+ MaterializedOperand, # does the restriction contain a materialized relation?
53
+ UnoptimizedOperand # does the restriction contain an unoptimized relation?
54
+ )
55
+
56
+ The restriction operator enters this pipeline, and tests are made at each stage. If the test returns true, then an optimization is performed. Usually the goal is to eliminate work performed by the system or collapse the tree of operations down into something simpler. More aggressive optimizations are usually checked first because we would like to prune as much of the tree as possible up-front. In this case, the first test checks to see if the restriction matches everything, in which case it's pretty much a no-op, and we can drop it altogether. If that's not the case, we then test to see if it matches nothing, if that's the case then we can return an empty relation. Then we test if it's another restriction, in which case we can "AND" the predicates for both restrictions together and return a single restriction operation. And so on down the list with the first match winning.
57
+
58
+ We always perform at least two optimization passes on each object, because once a tree has been simplified there could be further optimizations possible. Essentially we keep passing the objects into their corresponding optimizer chains until it passes through unchanged. This may seem rather expensive, and I guess it is, but optimization is very fast. Also it doesn't appear to affect performance much in practice due to our convention of testing for the most aggressive optimizations first; often it results in something that is completely optimized on the first try.
59
+
60
+ Once the optimization passes are finished, and no further optimization is possible, the result of an #optimize call is memoized. Further calls to #optimize will always return the same object.
61
+
7
62
  == Note on Patches/Pull Requests
8
63
 
9
64
  * If you want your code merged into the mainline, please discuss
data/Rakefile CHANGED
@@ -5,7 +5,7 @@ require 'rake'
5
5
  require File.expand_path('../lib/veritas/optimizer/version', __FILE__)
6
6
 
7
7
  begin
8
- gem('jeweler', '~> 1.6.0') if respond_to?(:gem, true)
8
+ gem('jeweler', '~> 1.6.2') if respond_to?(:gem, true)
9
9
  require 'jeweler'
10
10
 
11
11
  Jeweler::Tasks.new do |gem|
@@ -23,5 +23,5 @@ begin
23
23
 
24
24
  FileList['tasks/**/*.rake'].each { |task| import task }
25
25
  rescue LoadError
26
- puts 'Jeweler (or a dependency) not available. Install it with: gem install jeweler -v 1.6.0'
26
+ puts 'Jeweler (or a dependency) not available. Install it with: gem install jeweler -v 1.6.2'
27
27
  end
data/TODO CHANGED
@@ -1,25 +1,56 @@
1
- * All relations, even optimized ones, need to hold a reference to the
2
- original (possibly optimized) operations.
3
- * Especially "empty" relations should hold a reference to the original
4
- relation so that insert/update can be performed on them if they are
5
- empty not because they are valid, but because the source object is empty
6
- * Materialized relations will need references to the original objects.
1
+ * Pull up Rename rather than pushing it down
2
+ * Currently a Rename applies to every tuple in a result-set, and
3
+ doing the rename prematurely means that lots of tuples that would
4
+ otherwise be filtered out are going to be processed. A better
5
+ approach should be to push the rename up, dropping renames that
6
+ are projected away. It should not distribute over Binary ops
7
+ unless the other side of the binary op has he same aliases.
7
8
 
8
- * Optimization can replace one or more AST nodes with a simpler/more efficient
9
- structure as long as it is lossless. Just changing it to empty or
10
- materialized, the way things are now, we lose what the original relation
11
- was so it becomes impossible to insert/update/delete from those relations.
12
- * A write to a materialized object should first propagate the write down
13
- to the @operand, allowing it to raise exceptions if it's read*only,
14
- then it should make the change in the materialized object.
9
+ * For Binary operations, when the left or right is materialized then
10
+ change the other relation to use a restriction to filter the records.
11
+ * This will help optimize the fetching of the underlying records.
12
+
13
+ * Convert all the direct Veritas class usage to use the corresponding
14
+ methods. So for example instead of Algebra::Join.new(left, right) I
15
+ would want left.join(right).
16
+ * The reason for this is that as the Relation Gateway is created
17
+ it will have operands that will not be directly joinable without
18
+ gateway specific processing. By using the method directly I give the
19
+ chance for the gateway to step in and perform other work before
20
+ delegating up to the built-in operators in veritas.
21
+
22
+ * More optimizations:
23
+ * Union
24
+ * A Union of relations with the same base, header, and restrictions should
25
+ try to combine into a single relation with the restrictions using OR.
26
+
27
+ * Intersection
28
+ * An Intersection of relations with the same base, header, and restrictions
29
+ should try to combine into a single relation with the restrictions using
30
+ AND.
31
+ * Use the Join::RightMaterializedOperand and Join::LeftMaterializedOperand
32
+ optimizers to simplify Intersection operations
33
+
34
+ * Difference
35
+ * A Difference of relations with the same base and restrictions should
36
+ try to combine into a single relation with the restrictions using NOT.
37
+
38
+ * Summarization
39
+ * Add OrderSummarizePer for factoring out Order objects inside a Summarization
40
+ * When there are no aggregate functions, drop the Summarization and
41
+ return the summarize_per (?)
42
+ * Use the UnchangedHeader optimizer as a base class
43
+ * When summarize_per is an Order, the Order can be dropped.
15
44
 
16
- * Add further optimizations:
17
45
  * Projection
18
- * When it contains a Rename, if the renamed attributes are removed,
19
- then the rename can be removed.
20
46
  * When it contains a Restriction, if the removed attributes are *not*
21
47
  used in the predicate, then move the Restriction to contain the
22
48
  Projection.
49
+ * When a Projection contains a Restriction, wrap the Projection
50
+ in the Restriction, projecting away any attributes not used in
51
+ the restriction. If there are any remaining attributes, then
52
+ wrap the operation in a Projection removing those attributes.
53
+ * If all attributes are being used in the Restriction do nothing
23
54
  * When renames or extensions or summarizations are projected away and
24
55
  not used in any intermediary operation, then they can be dropped
25
56
  altogether. There's no point in going through all that work to
@@ -27,90 +58,61 @@
27
58
  the renames/extensions/summarizations to not add the attribute, and
28
59
  then call optimize again on the op to potentially remove the op altogether
29
60
  (like in the case of an extension adding one attribute that is removed).
30
- * Operation Order:
31
- * Projection containing an Order
32
- * Should remove the Order, since it is a noop
33
- * Projection should follow Rename
34
- * When a Projection contains a Restriction, wrap the Projection
35
- in the Restriction, projecting away any attributes not used in
36
- the restriction. If there are any remaining attributes, then
37
- wrap the operation in a Projection removing those attributes.
38
- * If all attributes are being used in the Restriction do nothing
39
- * When a Projection contains a Join, wrap the Join with a Projection
40
- of all the headers, minus those used in the Join. If there were
41
- any used, then wrap the whole operation in a Projection with
42
- the remaining attributes.
43
- * If all the attributes are used in the Join, do nothing
44
- * Try to use the same approach for Product
45
- * Test if it's possible to fully distribute projections over
46
- joins rather than splitting it up like this.
47
- * Restriction should follow Projection
48
- * Restriction optimizations:
49
- * "attr > ? OR attr > ?" -> "attr > ?", with the least restrictive value
50
- * Do the same for >=, <, <=
51
- * "attr > ? AND attr > ?" -> "attr > ?", with the most restrictive value
52
- * Do the same for >=, <, <=
53
- * "attr > 5 OR attr == 5" -> "attr >= 5"
54
- * "attr < 5 OR attr == 5" -> "attr <= 5"
55
- * "attr" = "string" AND "attr" =~ /string/ -> "attr" = "string"
56
- * If the regexp matches the constant, then it should be
57
- optimized down to a constant match. If it does not match
58
- then it should be optimized to a Contradiction.
59
- * Constant folding, eg:
60
- "attr1 > attr2 AND attr1 = 5" -> "5 > attr2 AND attr1 = 5"
61
- * This will probably only work across Conjunctions.
62
- * "attr > 5 AND attr = 6" -> "attr = 6", because attr must be
63
- equal to 6. this will probably be related to constant folding;
64
- the first expression will become 6 > 5, which evaluates to a
65
- Tautology, then the expression is a Tautology AND attr = 6,
66
- which simplifies down to attr = 6.
67
- * "attr < 5 AND attr = 6" -> "Contradiction", because attr must be equal to
68
- 6, and 6 < 5 evaluates to a Contradiction. A Contradiction AND attr = 6
69
- simplifies down to a Contradiction.
70
- * Figure out how to reorganize the Restriction predicates so that all
71
- similar operations are closer together to allow more efficient
72
- optimizations. This would allow optimizations of stuff like this:
61
+ * It does not distribute over Intersection or Difference, but see if
62
+ perhaps an exception can be made if there is a functional dependency
63
+ between the columns projected away and the one remaining. Then I *think*
64
+ it might still work, but more research will be needed.
65
+ * When a Projection contains a Join, wrap the Join with a Projection
66
+ of all the headers, minus those used in the Join. If there were
67
+ any used, then wrap the whole operation in a Projection with
68
+ the remaining attributes.
69
+ * If all the attributes are used in the Join, do nothing
70
+ * Try to use the same approach for Product
71
+ * Test if it's possible to fully distribute projections over
72
+ joins rather than splitting it up like this.
73
+
74
+ * Rename
75
+ * When wrapping a Summarization or Extension, and renaming the new attribute,
76
+ it should change the new attribute name, and remove it from the rename.
77
+
78
+ * Restriction
79
+ * Figure out how to reorganize the Restriction predicates so that all
80
+ similar operations are closer together to allow more efficient
81
+ optimizations. This would allow optimizations of stuff like this:
82
+
83
+ "attr1 = ? OR attr2 = ? OR attr1 = ?"
73
84
 
74
- "attr1 = ? OR attr2 = ? OR attr1 = ?"
85
+ Into:
75
86
 
76
- Into:
87
+ "attr1 IN(..) OR attr2 = ?"
77
88
 
78
- "attr1 IN(..) OR attr2 = ?"
79
- * Rename should distribute over Join, Product and Set operations
80
- * The goal should be to push Rename as close to the base tables
81
- as possible so that the names of attribute will be consistent
82
- throughout the whole tree.
83
- * A Union of relations with the same base, header, and restrictions should
84
- try to combine into a single relation with the restrictions using OR.
85
- * An Intersection of relations with the same base, header, and restrictions
86
- should try to combine into a single relation with the restrictions using
87
- AND.
88
- * A Difference of relations with the same base and restrictions should
89
- try to combine into a single relation with the restrictions using NOT.
90
- * Join Optimizations
91
- * When a Join contains a Join, and the size of the base relations is
92
- known, join the smallest with the largest, and then join that result
93
- with the remaining relation.
94
- * Make sure the smallest relation (with a known size) is always the
95
- right-most operation.
96
- * When a restriction uses a unique index:
97
- * Order can be factored out
98
- * Limit with a limit >= 1 can be factored out
99
- * Offset with an offset > 0 can be transformed into an empty
100
- relation, since at most there can be only one match.
89
+ * When it has an equality on a unique attribute:
90
+ * Limit with a limit >= 1 can be factored out
91
+ * Offset with an offset > 0 can be transformed into an empty
92
+ relation, since at most there can be only one match.
101
93
 
102
- * Summarization
103
- * Add OrderSummarizePer for factoring out Order objects inside a Summarization
104
- * When there are no aggregate functions, drop the Summarization and
105
- return the summarize_per (?)
106
- * Use the UnchangedHeader optimizer as a base class
107
- * When summarize_per is TABLE_DEE, and it is wrapped by Order, at most only
108
- one row can be returned, so Order can probably be dropped.
109
- * Consider making relations that represents a 0 or 1 tuple relations
110
- and use them in this case.
94
+ * Connective
95
+ * "attr > ? OR attr > ?" -> "attr > ?", with the least restrictive value
96
+ * Do the same for >=, <, <=
97
+ * "attr > ? AND attr > ?" -> "attr > ?", with the most restrictive value
98
+ * Do the same for >=, <, <=
99
+ * "attr > 5 OR attr == 5" -> "attr >= 5"
100
+ * "attr < 5 OR attr == 5" -> "attr <= 5"
101
+ * "attr" = "string" AND "attr" =~ /string/ -> "attr" = "string"
102
+ * If the regexp matches the constant, then it should be
103
+ optimized down to a constant match. If it does not match
104
+ then it should be optimized to a Contradiction.
105
+ * Constant folding, eg:
106
+ "attr1 > attr2 AND attr1 = 5" -> "5 > attr2 AND attr1 = 5"
107
+ * This will probably only work across Conjunctions.
108
+ * "attr > 5 AND attr = 6" -> "attr = 6", because attr must be
109
+ equal to 6. this will probably be related to constant folding;
110
+ the first expression will become 6 > 5, which evaluates to a
111
+ Tautology, then the expression is a Tautology AND attr = 6,
112
+ which simplifies down to attr = 6.
113
+ * "attr < 5 AND attr = 6" -> "Contradiction", because attr must be equal to
114
+ 6, and 6 < 5 evaluates to a Contradiction. A Contradiction AND attr = 6
115
+ simplifies down to a Contradiction.
111
116
 
112
- * Projection
113
- * It does not distribute over Intersection or Difference, but see if
114
- perhaps an exception can be made if there is a functional dependency
115
- between the columns projected away and the one remaining. Then I *think*
116
- it might still work, but more research will be needed.
117
+ * Inclusion/Exclusion
118
+ * When the enumerable is a contiguous sequence transform it into a Range
data/config/flay.yml CHANGED
@@ -1,3 +1,3 @@
1
1
  ---
2
- threshold: 126
3
- total_score: 870
2
+ threshold: 120
3
+ total_score: 942
data/config/flog.yml CHANGED
@@ -1,2 +1,2 @@
1
1
  ---
2
- threshold: 11.6
2
+ threshold: 19.9
data/config/roodi.yml CHANGED
@@ -5,11 +5,11 @@ CaseMissingElseCheck: { }
5
5
  ClassLineCountCheck: { line_count: 396 }
6
6
  ClassNameCheck: { pattern: !ruby/regexp /\A(?:[A-Z]+|[A-Z][a-z](?:[A-Z]?[a-z])+)\z/ }
7
7
  ClassVariableCheck: { }
8
- CyclomaticComplexityBlockCheck: { complexity: 2 }
8
+ CyclomaticComplexityBlockCheck: { complexity: 5 }
9
9
  CyclomaticComplexityMethodCheck: { complexity: 6 }
10
10
  EmptyRescueBodyCheck: { }
11
11
  ForLoopCheck: { }
12
- MethodLineCountCheck: { line_count: 9 }
12
+ MethodLineCountCheck: { line_count: 11 }
13
13
  MethodNameCheck: { pattern: !ruby/regexp /\A(?:[a-z\d](?:_?[a-z\d])+[?!=]?|\[\]=?|==|<=>|[+*&|-])\z/ }
14
14
  ModuleLineCountCheck: { line_count: 398 }
15
15
  ModuleNameCheck: { pattern: !ruby/regexp /\A(?:[A-Z]+|[A-Z][a-z](?:[A-Z]?[a-z])+)\z/ }
data/config/site.reek CHANGED
@@ -8,10 +8,10 @@ UncommunicativeParameterName:
8
8
  - !ruby/regexp /[0-9]$/
9
9
  - !ruby/regexp /[A-Z]/
10
10
  LargeClass:
11
- max_methods: 11 # TODO: decrease max_methods to 10-15 or less
11
+ max_methods: 14 # TODO: decrease max_methods to 10-15 or less
12
12
  exclude: []
13
13
  enabled: true
14
- max_instance_variables: 2
14
+ max_instance_variables: 5
15
15
  UncommunicativeMethodName:
16
16
  accept: []
17
17
  exclude: []
@@ -58,7 +58,7 @@ module Veritas
58
58
  EmptyRight,
59
59
  LeftOrderOperand,
60
60
  RightOrderOperand,
61
- MaterializedOperand,
61
+ MaterializedOperands,
62
62
  UnoptimizedOperands
63
63
  )
64
64
 
@@ -58,7 +58,7 @@ module Veritas
58
58
  EmptyRight,
59
59
  LeftOrderOperand,
60
60
  RightOrderOperand,
61
- MaterializedOperand,
61
+ MaterializedOperands,
62
62
  UnoptimizedOperands
63
63
  )
64
64
 
@@ -7,6 +7,30 @@ module Veritas
7
7
  # Abstract base class representing Join optimizations
8
8
  class Join < Relation::Operation::Combination
9
9
 
10
+ CONTRADICTION = Veritas::Function::Proposition::Contradiction.instance
11
+
12
+ private
13
+
14
+ # Return the key to join the operations with
15
+ #
16
+ # @return [Header]
17
+ #
18
+ # @todo find a minimal key from the header
19
+ #
20
+ # @api private
21
+ def join_key
22
+ operation.join_header
23
+ end
24
+
25
+ # Return a predicate that matches every tuple in the materialized operand
26
+ #
27
+ # @return [Function]
28
+ #
29
+ # @api private
30
+ def materialized_predicate
31
+ matching_projection.reduce(CONTRADICTION) { |predicate, tuple| predicate.or(tuple.predicate) }.optimize
32
+ end
33
+
10
34
  # Optimize when operands' headers are equal
11
35
  class EqualHeaders < self
12
36
 
@@ -30,16 +54,108 @@ module Veritas
30
54
 
31
55
  end # class EqualHeaders
32
56
 
57
+ # Optimize when the left operand is materialized
58
+ class LeftMaterializedOperand < self
59
+
60
+ # Test if the left operand is materialized
61
+ #
62
+ # @return [Boolean]
63
+ #
64
+ # @api private
65
+ def optimizable?
66
+ left.materialized? && !right_matching_left?
67
+ end
68
+
69
+ # Return the join of the left and right with the right restricted
70
+ #
71
+ # @return [Algebra::Join]
72
+ #
73
+ # @api private
74
+ def optimize
75
+ operation.class.new(left, right.restrict { materialized_predicate })
76
+ end
77
+
78
+ private
79
+
80
+ # Test if the right operand is a restriction matching the left
81
+ #
82
+ # @return [Boolean]
83
+ #
84
+ # @api private
85
+ def right_matching_left?
86
+ right = self.right
87
+ right.kind_of?(Veritas::Algebra::Restriction) && right.predicate.eql?(materialized_predicate)
88
+ end
89
+
90
+ # Return a the matching projection of the materializd relation
91
+ #
92
+ # @return [Projection]
93
+ #
94
+ # @api private
95
+ def matching_projection
96
+ left.project(join_key)
97
+ end
98
+
99
+ end # class LeftMaterializedOperand
100
+
101
+ # Optimize when the right operand is materialized
102
+ class RightMaterializedOperand < self
103
+
104
+ # Test if the right operand is materialized
105
+ #
106
+ # @return [Boolean]
107
+ #
108
+ # @api private
109
+ def optimizable?
110
+ right.materialized? && !left_matching_right?
111
+ end
112
+
113
+ # Return the join of the left and right with the left restricted
114
+ #
115
+ # @return [Algebra::Join]
116
+ #
117
+ # @api private
118
+ def optimize
119
+ operation.class.new(left.restrict { materialized_predicate }, right)
120
+ end
121
+
122
+ private
123
+
124
+ # Test if the left operand is a restriction matching the right
125
+ #
126
+ # @return [Boolean]
127
+ #
128
+ # @api private
129
+ def left_matching_right?
130
+ left = self.left
131
+ left.kind_of?(Veritas::Algebra::Restriction) && left.predicate.eql?(materialized_predicate)
132
+ end
133
+
134
+ # Return a the matching projection of the materializd relation
135
+ #
136
+ # @return [Projection]
137
+ #
138
+ # @api private
139
+ def matching_projection
140
+ right.project(join_key)
141
+ end
142
+
143
+ end # class RightMaterializedOperand
144
+
33
145
  Veritas::Algebra::Join.optimizer = chain(
34
146
  EmptyLeft,
35
147
  EmptyRight,
36
148
  EqualHeaders,
37
149
  LeftOrderOperand,
38
150
  RightOrderOperand,
39
- MaterializedOperand,
151
+ MaterializedOperands,
152
+ LeftMaterializedOperand,
153
+ RightMaterializedOperand,
40
154
  UnoptimizedOperands
41
155
  )
42
156
 
157
+ memoize :materialized_predicate
158
+
43
159
  end # class Join
44
160
  end # module Algebra
45
161
  end # class Optimizer
@@ -62,7 +62,7 @@ module Veritas
62
62
  EmptyRight,
63
63
  LeftOrderOperand,
64
64
  RightOrderOperand,
65
- MaterializedOperand,
65
+ MaterializedOperands,
66
66
  UnoptimizedOperands
67
67
  )
68
68
 
@@ -258,7 +258,18 @@ module Veritas
258
258
  #
259
259
  # @api private
260
260
  def optimize
261
- operand.class.new(wrap_operand, operation.directions)
261
+ operand.class.new(wrap_operand, directions)
262
+ end
263
+
264
+ private
265
+
266
+ # Return the renamed directions
267
+ #
268
+ # @return [Relation::Operation::Order::DirectionSet]
269
+ #
270
+ # @api private
271
+ def directions
272
+ operand.directions.rename(aliases)
262
273
  end
263
274
 
264
275
  end # class OrderOperand