activecube 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.idea/workspace.xml +114 -0
  4. data/.rspec +3 -0
  5. data/.travis.yml +7 -0
  6. data/CODE_OF_CONDUCT.md +74 -0
  7. data/Gemfile +6 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +60 -0
  10. data/Rakefile +6 -0
  11. data/activecube.gemspec +32 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/lib/activecube.rb +6 -0
  15. data/lib/activecube/active_record_extension.rb +20 -0
  16. data/lib/activecube/base.rb +6 -0
  17. data/lib/activecube/clickhouse/metric/count.rb +10 -0
  18. data/lib/activecube/clickhouse/metric/sum.rb +11 -0
  19. data/lib/activecube/cube_definition.rb +62 -0
  20. data/lib/activecube/dimension.rb +5 -0
  21. data/lib/activecube/dimension_definition_methods.rb +32 -0
  22. data/lib/activecube/field.rb +12 -0
  23. data/lib/activecube/metric.rb +5 -0
  24. data/lib/activecube/processor/composer.rb +76 -0
  25. data/lib/activecube/processor/index.rb +17 -0
  26. data/lib/activecube/processor/measure_tables.rb +42 -0
  27. data/lib/activecube/processor/optimizer.rb +123 -0
  28. data/lib/activecube/processor/table.rb +54 -0
  29. data/lib/activecube/query/chain_appender.rb +25 -0
  30. data/lib/activecube/query/cube_query.rb +112 -0
  31. data/lib/activecube/query/item.rb +22 -0
  32. data/lib/activecube/query/limit.rb +17 -0
  33. data/lib/activecube/query/measure.rb +40 -0
  34. data/lib/activecube/query/or_selector.rb +20 -0
  35. data/lib/activecube/query/ordering.rb +17 -0
  36. data/lib/activecube/query/selector.rb +84 -0
  37. data/lib/activecube/query/slice.rb +57 -0
  38. data/lib/activecube/query_methods.rb +11 -0
  39. data/lib/activecube/selector.rb +11 -0
  40. data/lib/activecube/version.rb +3 -0
  41. metadata +140 -0
@@ -0,0 +1,5 @@
1
+ module Activecube
2
+ class Dimension
3
+ extend DimensionDefinitionMethods
4
+ end
5
+ end
@@ -0,0 +1,32 @@
1
+ module Activecube
2
+ module DimensionDefinitionMethods
3
+
4
+ attr_reader :column_names, :identity, :fields
5
+
6
+ def column_name
7
+ raise "Not defined column for a metric #{self.name}" if column_names.empty?
8
+ raise "Defined more than one column for a metric #{self.name}" if column_names.count>1
9
+ column_names.first
10
+ end
11
+
12
+ private
13
+
14
+ def column *args
15
+ array = (@column_names ||= [] )
16
+ data = [*args].flatten
17
+ values = data & array
18
+ raise DefinitionError, "#{values.join(',')} already defined for columns in #{self.name}" unless values.empty?
19
+ array.concat data
20
+ end
21
+
22
+ def field *args
23
+ (@fields ||= {} )[args.first.to_sym] = Field.new( *args)
24
+ end
25
+
26
+ def identity_column *args
27
+ raise "Identity already defined as #{identity} for #{self.name}" if @identity
28
+ @identity = args.first
29
+ end
30
+
31
+ end
32
+ end
@@ -0,0 +1,12 @@
1
+ module Activecube
2
+ class Field
3
+
4
+ attr_reader :name, :definition
5
+ def initialize *args
6
+ @name = args.first
7
+ @definition = args.second
8
+ end
9
+
10
+
11
+ end
12
+ end
@@ -0,0 +1,5 @@
1
+ module Activecube
2
+ class Metric
3
+ extend DimensionDefinitionMethods
4
+ end
5
+ end
@@ -0,0 +1,76 @@
1
+ module Activecube::Processor
2
+ class Composer
3
+
4
+ attr_reader :cube_query, :models
5
+ def initialize cube_query
6
+ @cube_query = cube_query
7
+ end
8
+
9
+ def build_query
10
+ compose_queries optimize! ranked_tables
11
+ end
12
+
13
+ def connection
14
+ connections = models.map(&:connection).compact.uniq
15
+ raise "No connection found for query" if connections.empty?
16
+ raise "Tables #{models.map(&:name).join(',')} mapped to multiple connections, can not query" if connections.count>1
17
+ connections.first
18
+ end
19
+
20
+ private
21
+
22
+ def optimize! measure_tables
23
+
24
+ all_tables = measure_tables.map(&:tables).map(&:keys).flatten.uniq
25
+
26
+ cost_matrix = measure_tables.collect do |measure_table|
27
+ all_tables.collect{|table|
28
+ measure_table.tables[table].try(&:cost)
29
+ }
30
+ end
31
+
32
+ before = total_cost measure_tables
33
+ Optimizer.new(cost_matrix).optimize.each_with_index do |optimal, index|
34
+ measure_tables[index].selected = optimal
35
+ end
36
+ after = total_cost measure_tables
37
+
38
+ raise "Optimizer made it worth #{before} -> #{after} for #{cost_matrix}" unless after <= before
39
+ measure_tables
40
+
41
+ end
42
+
43
+ def total_cost measure_tables
44
+ measure_tables.group_by(&:table).collect{|t| t.second.map(&:entry).map(&:cost).max }.sum
45
+ end
46
+
47
+ def ranked_tables
48
+ tables = cube_query.cube.tables.select{|table| table.matches? cube_query, []}
49
+ cube_query.measures.collect do |measure|
50
+ by = MeasureTables.new measure
51
+ tables.each{|table|
52
+ next unless table.measures? measure
53
+ max_cardinality_index = table.model.activecube_indexes.select{|index|
54
+ index.indexes? cube_query, [measure]
55
+ }.sort_by(&:cardinality).last
56
+ by.add_table table, max_cardinality_index
57
+ }
58
+ raise "Metric #{measure.key} #{measure.definition.name} can not be measured by any of tables #{tables.map(&:name).join(',')}" if by.tables.empty?
59
+ by
60
+ end
61
+ end
62
+
63
+ def compose_queries measure_tables
64
+ composed_query = nil
65
+ @models = []
66
+ measure_tables.group_by(&:table).each_pair do |table, list|
67
+ @models << table.model
68
+ reduced = cube_query.reduced list.map(&:measure)
69
+ table_query = table.query reduced
70
+ composed_query = composed_query ? table.join(cube_query, composed_query, table_query) : table_query
71
+ end
72
+ composed_query
73
+ end
74
+
75
+ end
76
+ end
@@ -0,0 +1,17 @@
1
+ module Activecube
2
+ module Processor
3
+ class Index
4
+
5
+ attr_reader :fields, :cardinality
6
+ def initialize name, *args
7
+ @fields = [name].flatten
8
+ @cardinality = args.first && args.first[:cardinality]
9
+ end
10
+
11
+ def indexes? query, measures
12
+ (fields - query.selector_column_names(measures)).empty?
13
+ end
14
+
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,42 @@
1
+ module Activecube
2
+ module Processor
3
+ class MeasureTables
4
+
5
+ class Entry
6
+ attr_reader :table, :index, :cardinality, :cost
7
+ def initialize table, index
8
+ @table = table
9
+ @index = index
10
+ @cardinality = index ? index.cardinality : 0
11
+ @cost = 1.0 / (1.0 + cardinality)
12
+ end
13
+ end
14
+
15
+ attr_reader :measure, :entries, :tables
16
+ attr_accessor :selected
17
+
18
+ def initialize measure
19
+ @measure = measure
20
+ @tables = {}
21
+ @entries = []
22
+ @selected = 0
23
+ end
24
+
25
+
26
+ def add_table table, index
27
+ e = Entry.new(table, index)
28
+ entries << e
29
+ tables[table] = e
30
+ end
31
+
32
+ def table
33
+ entry.table
34
+ end
35
+
36
+ def entry
37
+ entries[selected]
38
+ end
39
+
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,123 @@
1
+ module Activecube::Processor
2
+
3
+ class Optimizer
4
+
5
+ UNLIM_COST = 9999
6
+ MAX_ITERATIONS = 3
7
+
8
+ attr_reader :tables_count, :metrics_count, :cost_matrix
9
+ def initialize cost_matrix
10
+ @cost_matrix = cost_matrix
11
+ end
12
+
13
+ def optimize
14
+
15
+ Rails.cache.fetch(cost_matrix, expires_in: 12.hours) do
16
+
17
+ @tables_count = cost_matrix.map(&:count).max
18
+ @metrics_count = cost_matrix.count
19
+
20
+ tables_count==1 ? [0]*metrics_count : do_optimize
21
+
22
+ end
23
+
24
+
25
+ end
26
+
27
+ private
28
+
29
+
30
+ def do_optimize
31
+ @tables_by_metrics = []
32
+
33
+ # sort metrics from low min cost to higher min costs ( by all applicable tables )
34
+ sort_metrics
35
+
36
+ # fill initial @tables_by_metrics by selecting tables with minimum cost for metrics.
37
+ # If there are more than one table with this minimum cost, then select already selected table with maximum cost
38
+ select_min_cost_by_metric
39
+
40
+ # make iterations over @tables_by_metrics ( max MAX_ITERATIONS)
41
+ iterates
42
+
43
+ @tables_by_metrics
44
+ end
45
+
46
+ def sort_metrics
47
+ @metrics_index_sorted = (0...metrics_count).sort_by{|m_i| cost_matrix[m_i].compact.min || UNLIM_COST }
48
+ end
49
+
50
+ def select_min_cost_by_metric
51
+
52
+ @metrics_index_sorted.collect do |m_i|
53
+
54
+ table_index_cost = (0...tables_count).map{|c_i| [c_i,
55
+ cost_matrix[m_i][c_i] || UNLIM_COST,
56
+ (@tables_by_metrics.include?(c_i) ? -cost_matrix[@tables_by_metrics.index(c_i)][c_i] : 0)
57
+ ]}.sort_by(&:third).sort_by(&:second)
58
+
59
+ @tables_by_metrics[m_i] = table_index_cost.first.first
60
+
61
+ end
62
+ end
63
+
64
+ def iterates
65
+
66
+ steps = [@tables_by_metrics]
67
+
68
+ (1..MAX_ITERATIONS).each do |iteration|
69
+
70
+ step = []
71
+ prev_step = steps.last
72
+
73
+ prev_step.each_with_index {|c_i, m_i|
74
+
75
+ table_included_times = prev_step.select{|c| c==c_i }.count
76
+ old_cost = cost_matrix[m_i][c_i]
77
+ new_c_i = (0...tables_count).detect{|c_n|
78
+ new_cost = cost_matrix[m_i][c_n]
79
+ next if c_i==c_n || new_cost.nil?
80
+ new_table_included_times = prev_step.select{|c| c==c_n }.count
81
+
82
+ if old_cost.nil?
83
+ # if we have non indexed table now
84
+ true
85
+ elsif table_included_times>1
86
+ if new_table_included_times>0
87
+ # table to used table if
88
+ # cost now > new cost
89
+ old_cost > new_cost
90
+ else
91
+ # table to unused table if
92
+ # cost now > new cost + max other cost in table now
93
+ old_cost > new_cost + ( prev_step.select.with_index{|c,i| c==c_i && i!=m_i }.max || UNLIM_COST )
94
+ end
95
+ else
96
+ if new_table_included_times>0
97
+ # unused table to table if
98
+ # new cost < cost now + max other cost in new table
99
+ old_cost > new_cost - ( prev_step.select{|c| c==c_n }.max || UNLIM_COST )
100
+ else
101
+ # unused to unused
102
+ # cost now > new cost
103
+ old_cost > new_cost
104
+ end
105
+ end
106
+
107
+ }
108
+
109
+ step << c_i || new_c_i
110
+
111
+ }
112
+
113
+ break if steps.include? step
114
+ steps << step
115
+ end
116
+
117
+ @tables_by_metrics = steps.last
118
+
119
+ end
120
+
121
+ end
122
+
123
+ end
@@ -0,0 +1,54 @@
1
+ module Activecube::Processor
2
+ class Table
3
+
4
+ attr_reader :model
5
+
6
+ def initialize model
7
+ @model = model
8
+ end
9
+
10
+ def name
11
+ model.table_name
12
+ end
13
+
14
+ def matches? query, measures = query.measures
15
+ (query.column_names(measures)-model.attribute_types.keys).empty?
16
+ end
17
+
18
+ def measures? measure
19
+ (measure.required_column_names - model.attribute_types.keys).empty?
20
+ end
21
+
22
+ def query cube_query
23
+
24
+ table = model.arel_table
25
+ query = table
26
+
27
+ (cube_query.slices + cube_query.measures + cube_query.selectors + cube_query.options).each do |s|
28
+ query = s.append_query cube_query, table, query
29
+ end
30
+
31
+ query
32
+ end
33
+
34
+ def join cube_query, left_query, right_query
35
+
36
+ outer_table = model.arel_table.class.new('').project(Arel.star)
37
+
38
+ dimension_names = cube_query.join_fields
39
+
40
+ query = outer_table.from(left_query).
41
+ join(right_query, ::Arel::Nodes::FullOuterJoin).
42
+ using(*dimension_names)
43
+
44
+ cube_query.options.each do |option|
45
+ query = option.append_query cube_query, outer_table, query
46
+ end
47
+
48
+
49
+ query
50
+ end
51
+
52
+
53
+ end
54
+ end
@@ -0,0 +1,25 @@
1
+ module Activecube::Query
2
+ module ChainAppender
3
+ private
4
+
5
+ def append *args, list, def_class, definitions
6
+ list.concat args.map{|arg|
7
+ if arg.kind_of?(Symbol) && definitions
8
+ definitions[arg]
9
+ elsif arg.kind_of?(def_class)
10
+ arg
11
+ elsif arg.kind_of? Hash
12
+ arg.collect{|pair|
13
+ raise ArgumentError, "Unexpected #{pair.second.class.name} to use for #{def_class} as #{arg}[#{pair.first}]" unless pair.second.kind_of?(def_class)
14
+ pair.second.alias! pair.first
15
+ }
16
+ else
17
+ raise ArgumentError, "Unexpected #{arg.class} to use for #{def_class} as #{arg}"
18
+ end
19
+ }.flatten
20
+ self
21
+ end
22
+
23
+
24
+ end
25
+ end
@@ -0,0 +1,112 @@
1
+ module Activecube::Query
2
+ class CubeQuery
3
+
4
+ include ChainAppender
5
+
6
+ attr_reader :cube, :slices, :measures, :selectors, :orderings, :options
7
+ def initialize cube, slices = [], measures = [], selectors = [], options = []
8
+ @cube = cube
9
+ @slices = slices
10
+ @measures = measures
11
+ @selectors = selectors
12
+ @options = options
13
+ end
14
+
15
+ def slice *args
16
+ append *args, @slices, Slice, cube.dimensions
17
+ end
18
+
19
+ def measure *args
20
+ append *args, @measures, Measure, cube.metrics
21
+ end
22
+
23
+ def select *args
24
+ append *args, @selectors, Selector, cube.selectors
25
+ end
26
+
27
+ def desc *args
28
+ args.each{|arg|
29
+ options << Ordering.new(arg, :desc)
30
+ }
31
+ self
32
+ end
33
+
34
+ def asc *args
35
+ args.each{|arg|
36
+ options << Ordering.new( arg, :asc)
37
+ }
38
+ self
39
+ end
40
+
41
+ def skip *args
42
+ args.each{|arg|
43
+ options << Limit.new( arg, :skip)
44
+ }
45
+ self
46
+ end
47
+
48
+ def take *args
49
+ args.each{|arg|
50
+ options << Limit.new( arg, :take)
51
+ }
52
+ self
53
+ end
54
+
55
+ def query
56
+ composer = Activecube::Processor::Composer.new(self)
57
+ sql = composer.build_query.to_sql
58
+ composer.connection.exec_query(sql)
59
+ end
60
+
61
+ def to_query
62
+ Activecube::Processor::Composer.new(self).build_query
63
+ end
64
+
65
+ def to_sql
66
+ to_query.to_sql
67
+ end
68
+
69
+ def column_names measures = self.measures
70
+ (measures + slices + selectors).map(&:required_column_names).flatten.uniq
71
+ end
72
+
73
+ def selector_column_names measures = self.measures
74
+ (measures.map(&:selectors) + selectors).flatten.map(&:required_column_names).flatten.uniq
75
+ end
76
+
77
+ def reduced other_measures
78
+
79
+ common_selectors = []
80
+ other_measures.each_with_index do |m,i|
81
+ if i==0
82
+ common_selectors += m.selectors
83
+ else
84
+ common_selectors &= m.selectors
85
+ end
86
+ end
87
+
88
+ if common_selectors.empty?
89
+ reduced_measures = other_measures
90
+ reduced_selectors = self.selectors
91
+ else
92
+ reduced_measures = other_measures.collect{|m|
93
+ Measure.new m.cube, m.key, m.definition, (m.selectors - common_selectors)
94
+ }
95
+ reduced_selectors = self.selectors + common_selectors
96
+ end
97
+
98
+ unless reduced_measures.detect{|rm| rm.selectors.empty? }
99
+ reduced_selectors += [OrSelector.new(reduced_measures.map(&:selectors).flatten.uniq)]
100
+ end
101
+
102
+ return self if (reduced_measures == self.measures) && (reduced_selectors == self.selectors)
103
+
104
+ CubeQuery.new cube, slices, reduced_measures, reduced_selectors
105
+ end
106
+
107
+ def join_fields
108
+ slices.map{|s| s.dimension_class.identity || s.key }.uniq
109
+ end
110
+
111
+ end
112
+ end