activecube 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.idea/workspace.xml +114 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +60 -0
- data/Rakefile +6 -0
- data/activecube.gemspec +32 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/activecube.rb +6 -0
- data/lib/activecube/active_record_extension.rb +20 -0
- data/lib/activecube/base.rb +6 -0
- data/lib/activecube/clickhouse/metric/count.rb +10 -0
- data/lib/activecube/clickhouse/metric/sum.rb +11 -0
- data/lib/activecube/cube_definition.rb +62 -0
- data/lib/activecube/dimension.rb +5 -0
- data/lib/activecube/dimension_definition_methods.rb +32 -0
- data/lib/activecube/field.rb +12 -0
- data/lib/activecube/metric.rb +5 -0
- data/lib/activecube/processor/composer.rb +76 -0
- data/lib/activecube/processor/index.rb +17 -0
- data/lib/activecube/processor/measure_tables.rb +42 -0
- data/lib/activecube/processor/optimizer.rb +123 -0
- data/lib/activecube/processor/table.rb +54 -0
- data/lib/activecube/query/chain_appender.rb +25 -0
- data/lib/activecube/query/cube_query.rb +112 -0
- data/lib/activecube/query/item.rb +22 -0
- data/lib/activecube/query/limit.rb +17 -0
- data/lib/activecube/query/measure.rb +40 -0
- data/lib/activecube/query/or_selector.rb +20 -0
- data/lib/activecube/query/ordering.rb +17 -0
- data/lib/activecube/query/selector.rb +84 -0
- data/lib/activecube/query/slice.rb +57 -0
- data/lib/activecube/query_methods.rb +11 -0
- data/lib/activecube/selector.rb +11 -0
- data/lib/activecube/version.rb +3 -0
- metadata +140 -0
@@ -0,0 +1,32 @@
|
|
1
|
+
module Activecube
|
2
|
+
module DimensionDefinitionMethods
|
3
|
+
|
4
|
+
attr_reader :column_names, :identity, :fields
|
5
|
+
|
6
|
+
def column_name
|
7
|
+
raise "Not defined column for a metric #{self.name}" if column_names.empty?
|
8
|
+
raise "Defined more than one column for a metric #{self.name}" if column_names.count>1
|
9
|
+
column_names.first
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def column *args
|
15
|
+
array = (@column_names ||= [] )
|
16
|
+
data = [*args].flatten
|
17
|
+
values = data & array
|
18
|
+
raise DefinitionError, "#{values.join(',')} already defined for columns in #{self.name}" unless values.empty?
|
19
|
+
array.concat data
|
20
|
+
end
|
21
|
+
|
22
|
+
def field *args
|
23
|
+
(@fields ||= {} )[args.first.to_sym] = Field.new( *args)
|
24
|
+
end
|
25
|
+
|
26
|
+
def identity_column *args
|
27
|
+
raise "Identity already defined as #{identity} for #{self.name}" if @identity
|
28
|
+
@identity = args.first
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module Activecube::Processor
|
2
|
+
class Composer
|
3
|
+
|
4
|
+
attr_reader :cube_query, :models
|
5
|
+
def initialize cube_query
|
6
|
+
@cube_query = cube_query
|
7
|
+
end
|
8
|
+
|
9
|
+
def build_query
|
10
|
+
compose_queries optimize! ranked_tables
|
11
|
+
end
|
12
|
+
|
13
|
+
def connection
|
14
|
+
connections = models.map(&:connection).compact.uniq
|
15
|
+
raise "No connection found for query" if connections.empty?
|
16
|
+
raise "Tables #{models.map(&:name).join(',')} mapped to multiple connections, can not query" if connections.count>1
|
17
|
+
connections.first
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def optimize! measure_tables
|
23
|
+
|
24
|
+
all_tables = measure_tables.map(&:tables).map(&:keys).flatten.uniq
|
25
|
+
|
26
|
+
cost_matrix = measure_tables.collect do |measure_table|
|
27
|
+
all_tables.collect{|table|
|
28
|
+
measure_table.tables[table].try(&:cost)
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
before = total_cost measure_tables
|
33
|
+
Optimizer.new(cost_matrix).optimize.each_with_index do |optimal, index|
|
34
|
+
measure_tables[index].selected = optimal
|
35
|
+
end
|
36
|
+
after = total_cost measure_tables
|
37
|
+
|
38
|
+
raise "Optimizer made it worth #{before} -> #{after} for #{cost_matrix}" unless after <= before
|
39
|
+
measure_tables
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
def total_cost measure_tables
|
44
|
+
measure_tables.group_by(&:table).collect{|t| t.second.map(&:entry).map(&:cost).max }.sum
|
45
|
+
end
|
46
|
+
|
47
|
+
def ranked_tables
|
48
|
+
tables = cube_query.cube.tables.select{|table| table.matches? cube_query, []}
|
49
|
+
cube_query.measures.collect do |measure|
|
50
|
+
by = MeasureTables.new measure
|
51
|
+
tables.each{|table|
|
52
|
+
next unless table.measures? measure
|
53
|
+
max_cardinality_index = table.model.activecube_indexes.select{|index|
|
54
|
+
index.indexes? cube_query, [measure]
|
55
|
+
}.sort_by(&:cardinality).last
|
56
|
+
by.add_table table, max_cardinality_index
|
57
|
+
}
|
58
|
+
raise "Metric #{measure.key} #{measure.definition.name} can not be measured by any of tables #{tables.map(&:name).join(',')}" if by.tables.empty?
|
59
|
+
by
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def compose_queries measure_tables
|
64
|
+
composed_query = nil
|
65
|
+
@models = []
|
66
|
+
measure_tables.group_by(&:table).each_pair do |table, list|
|
67
|
+
@models << table.model
|
68
|
+
reduced = cube_query.reduced list.map(&:measure)
|
69
|
+
table_query = table.query reduced
|
70
|
+
composed_query = composed_query ? table.join(cube_query, composed_query, table_query) : table_query
|
71
|
+
end
|
72
|
+
composed_query
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Activecube
|
2
|
+
module Processor
|
3
|
+
class Index
|
4
|
+
|
5
|
+
attr_reader :fields, :cardinality
|
6
|
+
def initialize name, *args
|
7
|
+
@fields = [name].flatten
|
8
|
+
@cardinality = args.first && args.first[:cardinality]
|
9
|
+
end
|
10
|
+
|
11
|
+
def indexes? query, measures
|
12
|
+
(fields - query.selector_column_names(measures)).empty?
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Activecube
|
2
|
+
module Processor
|
3
|
+
class MeasureTables
|
4
|
+
|
5
|
+
class Entry
|
6
|
+
attr_reader :table, :index, :cardinality, :cost
|
7
|
+
def initialize table, index
|
8
|
+
@table = table
|
9
|
+
@index = index
|
10
|
+
@cardinality = index ? index.cardinality : 0
|
11
|
+
@cost = 1.0 / (1.0 + cardinality)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
attr_reader :measure, :entries, :tables
|
16
|
+
attr_accessor :selected
|
17
|
+
|
18
|
+
def initialize measure
|
19
|
+
@measure = measure
|
20
|
+
@tables = {}
|
21
|
+
@entries = []
|
22
|
+
@selected = 0
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
def add_table table, index
|
27
|
+
e = Entry.new(table, index)
|
28
|
+
entries << e
|
29
|
+
tables[table] = e
|
30
|
+
end
|
31
|
+
|
32
|
+
def table
|
33
|
+
entry.table
|
34
|
+
end
|
35
|
+
|
36
|
+
def entry
|
37
|
+
entries[selected]
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
module Activecube::Processor
|
2
|
+
|
3
|
+
class Optimizer
|
4
|
+
|
5
|
+
UNLIM_COST = 9999
|
6
|
+
MAX_ITERATIONS = 3
|
7
|
+
|
8
|
+
attr_reader :tables_count, :metrics_count, :cost_matrix
|
9
|
+
def initialize cost_matrix
|
10
|
+
@cost_matrix = cost_matrix
|
11
|
+
end
|
12
|
+
|
13
|
+
def optimize
|
14
|
+
|
15
|
+
Rails.cache.fetch(cost_matrix, expires_in: 12.hours) do
|
16
|
+
|
17
|
+
@tables_count = cost_matrix.map(&:count).max
|
18
|
+
@metrics_count = cost_matrix.count
|
19
|
+
|
20
|
+
tables_count==1 ? [0]*metrics_count : do_optimize
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
|
30
|
+
def do_optimize
|
31
|
+
@tables_by_metrics = []
|
32
|
+
|
33
|
+
# sort metrics from low min cost to higher min costs ( by all applicable tables )
|
34
|
+
sort_metrics
|
35
|
+
|
36
|
+
# fill initial @tables_by_metrics by selecting tables with minimum cost for metrics.
|
37
|
+
# If there are more than one table with this minimum cost, then select already selected table with maximum cost
|
38
|
+
select_min_cost_by_metric
|
39
|
+
|
40
|
+
# make iterations over @tables_by_metrics ( max MAX_ITERATIONS)
|
41
|
+
iterates
|
42
|
+
|
43
|
+
@tables_by_metrics
|
44
|
+
end
|
45
|
+
|
46
|
+
def sort_metrics
|
47
|
+
@metrics_index_sorted = (0...metrics_count).sort_by{|m_i| cost_matrix[m_i].compact.min || UNLIM_COST }
|
48
|
+
end
|
49
|
+
|
50
|
+
def select_min_cost_by_metric
|
51
|
+
|
52
|
+
@metrics_index_sorted.collect do |m_i|
|
53
|
+
|
54
|
+
table_index_cost = (0...tables_count).map{|c_i| [c_i,
|
55
|
+
cost_matrix[m_i][c_i] || UNLIM_COST,
|
56
|
+
(@tables_by_metrics.include?(c_i) ? -cost_matrix[@tables_by_metrics.index(c_i)][c_i] : 0)
|
57
|
+
]}.sort_by(&:third).sort_by(&:second)
|
58
|
+
|
59
|
+
@tables_by_metrics[m_i] = table_index_cost.first.first
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def iterates
|
65
|
+
|
66
|
+
steps = [@tables_by_metrics]
|
67
|
+
|
68
|
+
(1..MAX_ITERATIONS).each do |iteration|
|
69
|
+
|
70
|
+
step = []
|
71
|
+
prev_step = steps.last
|
72
|
+
|
73
|
+
prev_step.each_with_index {|c_i, m_i|
|
74
|
+
|
75
|
+
table_included_times = prev_step.select{|c| c==c_i }.count
|
76
|
+
old_cost = cost_matrix[m_i][c_i]
|
77
|
+
new_c_i = (0...tables_count).detect{|c_n|
|
78
|
+
new_cost = cost_matrix[m_i][c_n]
|
79
|
+
next if c_i==c_n || new_cost.nil?
|
80
|
+
new_table_included_times = prev_step.select{|c| c==c_n }.count
|
81
|
+
|
82
|
+
if old_cost.nil?
|
83
|
+
# if we have non indexed table now
|
84
|
+
true
|
85
|
+
elsif table_included_times>1
|
86
|
+
if new_table_included_times>0
|
87
|
+
# table to used table if
|
88
|
+
# cost now > new cost
|
89
|
+
old_cost > new_cost
|
90
|
+
else
|
91
|
+
# table to unused table if
|
92
|
+
# cost now > new cost + max other cost in table now
|
93
|
+
old_cost > new_cost + ( prev_step.select.with_index{|c,i| c==c_i && i!=m_i }.max || UNLIM_COST )
|
94
|
+
end
|
95
|
+
else
|
96
|
+
if new_table_included_times>0
|
97
|
+
# unused table to table if
|
98
|
+
# new cost < cost now + max other cost in new table
|
99
|
+
old_cost > new_cost - ( prev_step.select{|c| c==c_n }.max || UNLIM_COST )
|
100
|
+
else
|
101
|
+
# unused to unused
|
102
|
+
# cost now > new cost
|
103
|
+
old_cost > new_cost
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
}
|
108
|
+
|
109
|
+
step << c_i || new_c_i
|
110
|
+
|
111
|
+
}
|
112
|
+
|
113
|
+
break if steps.include? step
|
114
|
+
steps << step
|
115
|
+
end
|
116
|
+
|
117
|
+
@tables_by_metrics = steps.last
|
118
|
+
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Activecube::Processor
|
2
|
+
class Table
|
3
|
+
|
4
|
+
attr_reader :model
|
5
|
+
|
6
|
+
def initialize model
|
7
|
+
@model = model
|
8
|
+
end
|
9
|
+
|
10
|
+
def name
|
11
|
+
model.table_name
|
12
|
+
end
|
13
|
+
|
14
|
+
def matches? query, measures = query.measures
|
15
|
+
(query.column_names(measures)-model.attribute_types.keys).empty?
|
16
|
+
end
|
17
|
+
|
18
|
+
def measures? measure
|
19
|
+
(measure.required_column_names - model.attribute_types.keys).empty?
|
20
|
+
end
|
21
|
+
|
22
|
+
def query cube_query
|
23
|
+
|
24
|
+
table = model.arel_table
|
25
|
+
query = table
|
26
|
+
|
27
|
+
(cube_query.slices + cube_query.measures + cube_query.selectors + cube_query.options).each do |s|
|
28
|
+
query = s.append_query cube_query, table, query
|
29
|
+
end
|
30
|
+
|
31
|
+
query
|
32
|
+
end
|
33
|
+
|
34
|
+
def join cube_query, left_query, right_query
|
35
|
+
|
36
|
+
outer_table = model.arel_table.class.new('').project(Arel.star)
|
37
|
+
|
38
|
+
dimension_names = cube_query.join_fields
|
39
|
+
|
40
|
+
query = outer_table.from(left_query).
|
41
|
+
join(right_query, ::Arel::Nodes::FullOuterJoin).
|
42
|
+
using(*dimension_names)
|
43
|
+
|
44
|
+
cube_query.options.each do |option|
|
45
|
+
query = option.append_query cube_query, outer_table, query
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
query
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Activecube::Query
|
2
|
+
module ChainAppender
|
3
|
+
private
|
4
|
+
|
5
|
+
def append *args, list, def_class, definitions
|
6
|
+
list.concat args.map{|arg|
|
7
|
+
if arg.kind_of?(Symbol) && definitions
|
8
|
+
definitions[arg]
|
9
|
+
elsif arg.kind_of?(def_class)
|
10
|
+
arg
|
11
|
+
elsif arg.kind_of? Hash
|
12
|
+
arg.collect{|pair|
|
13
|
+
raise ArgumentError, "Unexpected #{pair.second.class.name} to use for #{def_class} as #{arg}[#{pair.first}]" unless pair.second.kind_of?(def_class)
|
14
|
+
pair.second.alias! pair.first
|
15
|
+
}
|
16
|
+
else
|
17
|
+
raise ArgumentError, "Unexpected #{arg.class} to use for #{def_class} as #{arg}"
|
18
|
+
end
|
19
|
+
}.flatten
|
20
|
+
self
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
module Activecube::Query
|
2
|
+
class CubeQuery
|
3
|
+
|
4
|
+
include ChainAppender
|
5
|
+
|
6
|
+
attr_reader :cube, :slices, :measures, :selectors, :orderings, :options
|
7
|
+
def initialize cube, slices = [], measures = [], selectors = [], options = []
|
8
|
+
@cube = cube
|
9
|
+
@slices = slices
|
10
|
+
@measures = measures
|
11
|
+
@selectors = selectors
|
12
|
+
@options = options
|
13
|
+
end
|
14
|
+
|
15
|
+
def slice *args
|
16
|
+
append *args, @slices, Slice, cube.dimensions
|
17
|
+
end
|
18
|
+
|
19
|
+
def measure *args
|
20
|
+
append *args, @measures, Measure, cube.metrics
|
21
|
+
end
|
22
|
+
|
23
|
+
def select *args
|
24
|
+
append *args, @selectors, Selector, cube.selectors
|
25
|
+
end
|
26
|
+
|
27
|
+
def desc *args
|
28
|
+
args.each{|arg|
|
29
|
+
options << Ordering.new(arg, :desc)
|
30
|
+
}
|
31
|
+
self
|
32
|
+
end
|
33
|
+
|
34
|
+
def asc *args
|
35
|
+
args.each{|arg|
|
36
|
+
options << Ordering.new( arg, :asc)
|
37
|
+
}
|
38
|
+
self
|
39
|
+
end
|
40
|
+
|
41
|
+
def skip *args
|
42
|
+
args.each{|arg|
|
43
|
+
options << Limit.new( arg, :skip)
|
44
|
+
}
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
def take *args
|
49
|
+
args.each{|arg|
|
50
|
+
options << Limit.new( arg, :take)
|
51
|
+
}
|
52
|
+
self
|
53
|
+
end
|
54
|
+
|
55
|
+
def query
|
56
|
+
composer = Activecube::Processor::Composer.new(self)
|
57
|
+
sql = composer.build_query.to_sql
|
58
|
+
composer.connection.exec_query(sql)
|
59
|
+
end
|
60
|
+
|
61
|
+
def to_query
|
62
|
+
Activecube::Processor::Composer.new(self).build_query
|
63
|
+
end
|
64
|
+
|
65
|
+
def to_sql
|
66
|
+
to_query.to_sql
|
67
|
+
end
|
68
|
+
|
69
|
+
def column_names measures = self.measures
|
70
|
+
(measures + slices + selectors).map(&:required_column_names).flatten.uniq
|
71
|
+
end
|
72
|
+
|
73
|
+
def selector_column_names measures = self.measures
|
74
|
+
(measures.map(&:selectors) + selectors).flatten.map(&:required_column_names).flatten.uniq
|
75
|
+
end
|
76
|
+
|
77
|
+
def reduced other_measures
|
78
|
+
|
79
|
+
common_selectors = []
|
80
|
+
other_measures.each_with_index do |m,i|
|
81
|
+
if i==0
|
82
|
+
common_selectors += m.selectors
|
83
|
+
else
|
84
|
+
common_selectors &= m.selectors
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
if common_selectors.empty?
|
89
|
+
reduced_measures = other_measures
|
90
|
+
reduced_selectors = self.selectors
|
91
|
+
else
|
92
|
+
reduced_measures = other_measures.collect{|m|
|
93
|
+
Measure.new m.cube, m.key, m.definition, (m.selectors - common_selectors)
|
94
|
+
}
|
95
|
+
reduced_selectors = self.selectors + common_selectors
|
96
|
+
end
|
97
|
+
|
98
|
+
unless reduced_measures.detect{|rm| rm.selectors.empty? }
|
99
|
+
reduced_selectors += [OrSelector.new(reduced_measures.map(&:selectors).flatten.uniq)]
|
100
|
+
end
|
101
|
+
|
102
|
+
return self if (reduced_measures == self.measures) && (reduced_selectors == self.selectors)
|
103
|
+
|
104
|
+
CubeQuery.new cube, slices, reduced_measures, reduced_selectors
|
105
|
+
end
|
106
|
+
|
107
|
+
def join_fields
|
108
|
+
slices.map{|s| s.dimension_class.identity || s.key }.uniq
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
112
|
+
end
|