activecube 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.idea/workspace.xml +114 -0
  4. data/.rspec +3 -0
  5. data/.travis.yml +7 -0
  6. data/CODE_OF_CONDUCT.md +74 -0
  7. data/Gemfile +6 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +60 -0
  10. data/Rakefile +6 -0
  11. data/activecube.gemspec +32 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/lib/activecube.rb +6 -0
  15. data/lib/activecube/active_record_extension.rb +20 -0
  16. data/lib/activecube/base.rb +6 -0
  17. data/lib/activecube/clickhouse/metric/count.rb +10 -0
  18. data/lib/activecube/clickhouse/metric/sum.rb +11 -0
  19. data/lib/activecube/cube_definition.rb +62 -0
  20. data/lib/activecube/dimension.rb +5 -0
  21. data/lib/activecube/dimension_definition_methods.rb +32 -0
  22. data/lib/activecube/field.rb +12 -0
  23. data/lib/activecube/metric.rb +5 -0
  24. data/lib/activecube/processor/composer.rb +76 -0
  25. data/lib/activecube/processor/index.rb +17 -0
  26. data/lib/activecube/processor/measure_tables.rb +42 -0
  27. data/lib/activecube/processor/optimizer.rb +123 -0
  28. data/lib/activecube/processor/table.rb +54 -0
  29. data/lib/activecube/query/chain_appender.rb +25 -0
  30. data/lib/activecube/query/cube_query.rb +112 -0
  31. data/lib/activecube/query/item.rb +22 -0
  32. data/lib/activecube/query/limit.rb +17 -0
  33. data/lib/activecube/query/measure.rb +40 -0
  34. data/lib/activecube/query/or_selector.rb +20 -0
  35. data/lib/activecube/query/ordering.rb +17 -0
  36. data/lib/activecube/query/selector.rb +84 -0
  37. data/lib/activecube/query/slice.rb +57 -0
  38. data/lib/activecube/query_methods.rb +11 -0
  39. data/lib/activecube/selector.rb +11 -0
  40. data/lib/activecube/version.rb +3 -0
  41. metadata +140 -0
@@ -0,0 +1,5 @@
1
+ module Activecube
2
+ class Dimension
3
+ extend DimensionDefinitionMethods
4
+ end
5
+ end
@@ -0,0 +1,32 @@
1
+ module Activecube
2
+ module DimensionDefinitionMethods
3
+
4
+ attr_reader :column_names, :identity, :fields
5
+
6
+ def column_name
7
+ raise "Not defined column for a metric #{self.name}" if column_names.empty?
8
+ raise "Defined more than one column for a metric #{self.name}" if column_names.count>1
9
+ column_names.first
10
+ end
11
+
12
+ private
13
+
14
+ def column *args
15
+ array = (@column_names ||= [] )
16
+ data = [*args].flatten
17
+ values = data & array
18
+ raise DefinitionError, "#{values.join(',')} already defined for columns in #{self.name}" unless values.empty?
19
+ array.concat data
20
+ end
21
+
22
+ def field *args
23
+ (@fields ||= {} )[args.first.to_sym] = Field.new( *args)
24
+ end
25
+
26
+ def identity_column *args
27
+ raise "Identity already defined as #{identity} for #{self.name}" if @identity
28
+ @identity = args.first
29
+ end
30
+
31
+ end
32
+ end
@@ -0,0 +1,12 @@
1
+ module Activecube
2
+ class Field
3
+
4
+ attr_reader :name, :definition
5
+ def initialize *args
6
+ @name = args.first
7
+ @definition = args.second
8
+ end
9
+
10
+
11
+ end
12
+ end
@@ -0,0 +1,5 @@
1
+ module Activecube
2
+ class Metric
3
+ extend DimensionDefinitionMethods
4
+ end
5
+ end
@@ -0,0 +1,76 @@
1
+ module Activecube::Processor
2
+ class Composer
3
+
4
+ attr_reader :cube_query, :models
5
+ def initialize cube_query
6
+ @cube_query = cube_query
7
+ end
8
+
9
+ def build_query
10
+ compose_queries optimize! ranked_tables
11
+ end
12
+
13
+ def connection
14
+ connections = models.map(&:connection).compact.uniq
15
+ raise "No connection found for query" if connections.empty?
16
+ raise "Tables #{models.map(&:name).join(',')} mapped to multiple connections, can not query" if connections.count>1
17
+ connections.first
18
+ end
19
+
20
+ private
21
+
22
+ def optimize! measure_tables
23
+
24
+ all_tables = measure_tables.map(&:tables).map(&:keys).flatten.uniq
25
+
26
+ cost_matrix = measure_tables.collect do |measure_table|
27
+ all_tables.collect{|table|
28
+ measure_table.tables[table].try(&:cost)
29
+ }
30
+ end
31
+
32
+ before = total_cost measure_tables
33
+ Optimizer.new(cost_matrix).optimize.each_with_index do |optimal, index|
34
+ measure_tables[index].selected = optimal
35
+ end
36
+ after = total_cost measure_tables
37
+
38
+ raise "Optimizer made it worth #{before} -> #{after} for #{cost_matrix}" unless after <= before
39
+ measure_tables
40
+
41
+ end
42
+
43
+ def total_cost measure_tables
44
+ measure_tables.group_by(&:table).collect{|t| t.second.map(&:entry).map(&:cost).max }.sum
45
+ end
46
+
47
+ def ranked_tables
48
+ tables = cube_query.cube.tables.select{|table| table.matches? cube_query, []}
49
+ cube_query.measures.collect do |measure|
50
+ by = MeasureTables.new measure
51
+ tables.each{|table|
52
+ next unless table.measures? measure
53
+ max_cardinality_index = table.model.activecube_indexes.select{|index|
54
+ index.indexes? cube_query, [measure]
55
+ }.sort_by(&:cardinality).last
56
+ by.add_table table, max_cardinality_index
57
+ }
58
+ raise "Metric #{measure.key} #{measure.definition.name} can not be measured by any of tables #{tables.map(&:name).join(',')}" if by.tables.empty?
59
+ by
60
+ end
61
+ end
62
+
63
+ def compose_queries measure_tables
64
+ composed_query = nil
65
+ @models = []
66
+ measure_tables.group_by(&:table).each_pair do |table, list|
67
+ @models << table.model
68
+ reduced = cube_query.reduced list.map(&:measure)
69
+ table_query = table.query reduced
70
+ composed_query = composed_query ? table.join(cube_query, composed_query, table_query) : table_query
71
+ end
72
+ composed_query
73
+ end
74
+
75
+ end
76
+ end
@@ -0,0 +1,17 @@
1
+ module Activecube
2
+ module Processor
3
+ class Index
4
+
5
+ attr_reader :fields, :cardinality
6
+ def initialize name, *args
7
+ @fields = [name].flatten
8
+ @cardinality = args.first && args.first[:cardinality]
9
+ end
10
+
11
+ def indexes? query, measures
12
+ (fields - query.selector_column_names(measures)).empty?
13
+ end
14
+
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,42 @@
1
+ module Activecube
2
+ module Processor
3
+ class MeasureTables
4
+
5
+ class Entry
6
+ attr_reader :table, :index, :cardinality, :cost
7
+ def initialize table, index
8
+ @table = table
9
+ @index = index
10
+ @cardinality = index ? index.cardinality : 0
11
+ @cost = 1.0 / (1.0 + cardinality)
12
+ end
13
+ end
14
+
15
+ attr_reader :measure, :entries, :tables
16
+ attr_accessor :selected
17
+
18
+ def initialize measure
19
+ @measure = measure
20
+ @tables = {}
21
+ @entries = []
22
+ @selected = 0
23
+ end
24
+
25
+
26
+ def add_table table, index
27
+ e = Entry.new(table, index)
28
+ entries << e
29
+ tables[table] = e
30
+ end
31
+
32
+ def table
33
+ entry.table
34
+ end
35
+
36
+ def entry
37
+ entries[selected]
38
+ end
39
+
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,123 @@
1
+ module Activecube::Processor
2
+
3
+ class Optimizer
4
+
5
+ UNLIM_COST = 9999
6
+ MAX_ITERATIONS = 3
7
+
8
+ attr_reader :tables_count, :metrics_count, :cost_matrix
9
+ def initialize cost_matrix
10
+ @cost_matrix = cost_matrix
11
+ end
12
+
13
+ def optimize
14
+
15
+ Rails.cache.fetch(cost_matrix, expires_in: 12.hours) do
16
+
17
+ @tables_count = cost_matrix.map(&:count).max
18
+ @metrics_count = cost_matrix.count
19
+
20
+ tables_count==1 ? [0]*metrics_count : do_optimize
21
+
22
+ end
23
+
24
+
25
+ end
26
+
27
+ private
28
+
29
+
30
+ def do_optimize
31
+ @tables_by_metrics = []
32
+
33
+ # sort metrics from low min cost to higher min costs ( by all applicable tables )
34
+ sort_metrics
35
+
36
+ # fill initial @tables_by_metrics by selecting tables with minimum cost for metrics.
37
+ # If there are more than one table with this minimum cost, then select already selected table with maximum cost
38
+ select_min_cost_by_metric
39
+
40
+ # make iterations over @tables_by_metrics ( max MAX_ITERATIONS)
41
+ iterates
42
+
43
+ @tables_by_metrics
44
+ end
45
+
46
+ def sort_metrics
47
+ @metrics_index_sorted = (0...metrics_count).sort_by{|m_i| cost_matrix[m_i].compact.min || UNLIM_COST }
48
+ end
49
+
50
+ def select_min_cost_by_metric
51
+
52
+ @metrics_index_sorted.collect do |m_i|
53
+
54
+ table_index_cost = (0...tables_count).map{|c_i| [c_i,
55
+ cost_matrix[m_i][c_i] || UNLIM_COST,
56
+ (@tables_by_metrics.include?(c_i) ? -cost_matrix[@tables_by_metrics.index(c_i)][c_i] : 0)
57
+ ]}.sort_by(&:third).sort_by(&:second)
58
+
59
+ @tables_by_metrics[m_i] = table_index_cost.first.first
60
+
61
+ end
62
+ end
63
+
64
+ def iterates
65
+
66
+ steps = [@tables_by_metrics]
67
+
68
+ (1..MAX_ITERATIONS).each do |iteration|
69
+
70
+ step = []
71
+ prev_step = steps.last
72
+
73
+ prev_step.each_with_index {|c_i, m_i|
74
+
75
+ table_included_times = prev_step.select{|c| c==c_i }.count
76
+ old_cost = cost_matrix[m_i][c_i]
77
+ new_c_i = (0...tables_count).detect{|c_n|
78
+ new_cost = cost_matrix[m_i][c_n]
79
+ next if c_i==c_n || new_cost.nil?
80
+ new_table_included_times = prev_step.select{|c| c==c_n }.count
81
+
82
+ if old_cost.nil?
83
+ # if we have non indexed table now
84
+ true
85
+ elsif table_included_times>1
86
+ if new_table_included_times>0
87
+ # table to used table if
88
+ # cost now > new cost
89
+ old_cost > new_cost
90
+ else
91
+ # table to unused table if
92
+ # cost now > new cost + max other cost in table now
93
+ old_cost > new_cost + ( prev_step.select.with_index{|c,i| c==c_i && i!=m_i }.max || UNLIM_COST )
94
+ end
95
+ else
96
+ if new_table_included_times>0
97
+ # unused table to table if
98
+ # new cost < cost now + max other cost in new table
99
+ old_cost > new_cost - ( prev_step.select{|c| c==c_n }.max || UNLIM_COST )
100
+ else
101
+ # unused to unused
102
+ # cost now > new cost
103
+ old_cost > new_cost
104
+ end
105
+ end
106
+
107
+ }
108
+
109
+ step << c_i || new_c_i
110
+
111
+ }
112
+
113
+ break if steps.include? step
114
+ steps << step
115
+ end
116
+
117
+ @tables_by_metrics = steps.last
118
+
119
+ end
120
+
121
+ end
122
+
123
+ end
@@ -0,0 +1,54 @@
1
+ module Activecube::Processor
2
+ class Table
3
+
4
+ attr_reader :model
5
+
6
+ def initialize model
7
+ @model = model
8
+ end
9
+
10
+ def name
11
+ model.table_name
12
+ end
13
+
14
+ def matches? query, measures = query.measures
15
+ (query.column_names(measures)-model.attribute_types.keys).empty?
16
+ end
17
+
18
+ def measures? measure
19
+ (measure.required_column_names - model.attribute_types.keys).empty?
20
+ end
21
+
22
+ def query cube_query
23
+
24
+ table = model.arel_table
25
+ query = table
26
+
27
+ (cube_query.slices + cube_query.measures + cube_query.selectors + cube_query.options).each do |s|
28
+ query = s.append_query cube_query, table, query
29
+ end
30
+
31
+ query
32
+ end
33
+
34
+ def join cube_query, left_query, right_query
35
+
36
+ outer_table = model.arel_table.class.new('').project(Arel.star)
37
+
38
+ dimension_names = cube_query.join_fields
39
+
40
+ query = outer_table.from(left_query).
41
+ join(right_query, ::Arel::Nodes::FullOuterJoin).
42
+ using(*dimension_names)
43
+
44
+ cube_query.options.each do |option|
45
+ query = option.append_query cube_query, outer_table, query
46
+ end
47
+
48
+
49
+ query
50
+ end
51
+
52
+
53
+ end
54
+ end
@@ -0,0 +1,25 @@
1
+ module Activecube::Query
2
+ module ChainAppender
3
+ private
4
+
5
+ def append *args, list, def_class, definitions
6
+ list.concat args.map{|arg|
7
+ if arg.kind_of?(Symbol) && definitions
8
+ definitions[arg]
9
+ elsif arg.kind_of?(def_class)
10
+ arg
11
+ elsif arg.kind_of? Hash
12
+ arg.collect{|pair|
13
+ raise ArgumentError, "Unexpected #{pair.second.class.name} to use for #{def_class} as #{arg}[#{pair.first}]" unless pair.second.kind_of?(def_class)
14
+ pair.second.alias! pair.first
15
+ }
16
+ else
17
+ raise ArgumentError, "Unexpected #{arg.class} to use for #{def_class} as #{arg}"
18
+ end
19
+ }.flatten
20
+ self
21
+ end
22
+
23
+
24
+ end
25
+ end
@@ -0,0 +1,112 @@
1
+ module Activecube::Query
2
+ class CubeQuery
3
+
4
+ include ChainAppender
5
+
6
+ attr_reader :cube, :slices, :measures, :selectors, :orderings, :options
7
+ def initialize cube, slices = [], measures = [], selectors = [], options = []
8
+ @cube = cube
9
+ @slices = slices
10
+ @measures = measures
11
+ @selectors = selectors
12
+ @options = options
13
+ end
14
+
15
+ def slice *args
16
+ append *args, @slices, Slice, cube.dimensions
17
+ end
18
+
19
+ def measure *args
20
+ append *args, @measures, Measure, cube.metrics
21
+ end
22
+
23
+ def select *args
24
+ append *args, @selectors, Selector, cube.selectors
25
+ end
26
+
27
+ def desc *args
28
+ args.each{|arg|
29
+ options << Ordering.new(arg, :desc)
30
+ }
31
+ self
32
+ end
33
+
34
+ def asc *args
35
+ args.each{|arg|
36
+ options << Ordering.new( arg, :asc)
37
+ }
38
+ self
39
+ end
40
+
41
+ def skip *args
42
+ args.each{|arg|
43
+ options << Limit.new( arg, :skip)
44
+ }
45
+ self
46
+ end
47
+
48
+ def take *args
49
+ args.each{|arg|
50
+ options << Limit.new( arg, :take)
51
+ }
52
+ self
53
+ end
54
+
55
+ def query
56
+ composer = Activecube::Processor::Composer.new(self)
57
+ sql = composer.build_query.to_sql
58
+ composer.connection.exec_query(sql)
59
+ end
60
+
61
+ def to_query
62
+ Activecube::Processor::Composer.new(self).build_query
63
+ end
64
+
65
+ def to_sql
66
+ to_query.to_sql
67
+ end
68
+
69
+ def column_names measures = self.measures
70
+ (measures + slices + selectors).map(&:required_column_names).flatten.uniq
71
+ end
72
+
73
+ def selector_column_names measures = self.measures
74
+ (measures.map(&:selectors) + selectors).flatten.map(&:required_column_names).flatten.uniq
75
+ end
76
+
77
+ def reduced other_measures
78
+
79
+ common_selectors = []
80
+ other_measures.each_with_index do |m,i|
81
+ if i==0
82
+ common_selectors += m.selectors
83
+ else
84
+ common_selectors &= m.selectors
85
+ end
86
+ end
87
+
88
+ if common_selectors.empty?
89
+ reduced_measures = other_measures
90
+ reduced_selectors = self.selectors
91
+ else
92
+ reduced_measures = other_measures.collect{|m|
93
+ Measure.new m.cube, m.key, m.definition, (m.selectors - common_selectors)
94
+ }
95
+ reduced_selectors = self.selectors + common_selectors
96
+ end
97
+
98
+ unless reduced_measures.detect{|rm| rm.selectors.empty? }
99
+ reduced_selectors += [OrSelector.new(reduced_measures.map(&:selectors).flatten.uniq)]
100
+ end
101
+
102
+ return self if (reduced_measures == self.measures) && (reduced_selectors == self.selectors)
103
+
104
+ CubeQuery.new cube, slices, reduced_measures, reduced_selectors
105
+ end
106
+
107
+ def join_fields
108
+ slices.map{|s| s.dimension_class.identity || s.key }.uniq
109
+ end
110
+
111
+ end
112
+ end