activecube 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.idea/workspace.xml +114 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +60 -0
- data/Rakefile +6 -0
- data/activecube.gemspec +32 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/activecube.rb +6 -0
- data/lib/activecube/active_record_extension.rb +20 -0
- data/lib/activecube/base.rb +6 -0
- data/lib/activecube/clickhouse/metric/count.rb +10 -0
- data/lib/activecube/clickhouse/metric/sum.rb +11 -0
- data/lib/activecube/cube_definition.rb +62 -0
- data/lib/activecube/dimension.rb +5 -0
- data/lib/activecube/dimension_definition_methods.rb +32 -0
- data/lib/activecube/field.rb +12 -0
- data/lib/activecube/metric.rb +5 -0
- data/lib/activecube/processor/composer.rb +76 -0
- data/lib/activecube/processor/index.rb +17 -0
- data/lib/activecube/processor/measure_tables.rb +42 -0
- data/lib/activecube/processor/optimizer.rb +123 -0
- data/lib/activecube/processor/table.rb +54 -0
- data/lib/activecube/query/chain_appender.rb +25 -0
- data/lib/activecube/query/cube_query.rb +112 -0
- data/lib/activecube/query/item.rb +22 -0
- data/lib/activecube/query/limit.rb +17 -0
- data/lib/activecube/query/measure.rb +40 -0
- data/lib/activecube/query/or_selector.rb +20 -0
- data/lib/activecube/query/ordering.rb +17 -0
- data/lib/activecube/query/selector.rb +84 -0
- data/lib/activecube/query/slice.rb +57 -0
- data/lib/activecube/query_methods.rb +11 -0
- data/lib/activecube/selector.rb +11 -0
- data/lib/activecube/version.rb +3 -0
- metadata +140 -0
@@ -0,0 +1,32 @@
|
|
1
|
+
module Activecube
|
2
|
+
module DimensionDefinitionMethods
|
3
|
+
|
4
|
+
attr_reader :column_names, :identity, :fields
|
5
|
+
|
6
|
+
def column_name
|
7
|
+
raise "Not defined column for a metric #{self.name}" if column_names.empty?
|
8
|
+
raise "Defined more than one column for a metric #{self.name}" if column_names.count>1
|
9
|
+
column_names.first
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def column *args
|
15
|
+
array = (@column_names ||= [] )
|
16
|
+
data = [*args].flatten
|
17
|
+
values = data & array
|
18
|
+
raise DefinitionError, "#{values.join(',')} already defined for columns in #{self.name}" unless values.empty?
|
19
|
+
array.concat data
|
20
|
+
end
|
21
|
+
|
22
|
+
def field *args
|
23
|
+
(@fields ||= {} )[args.first.to_sym] = Field.new( *args)
|
24
|
+
end
|
25
|
+
|
26
|
+
def identity_column *args
|
27
|
+
raise "Identity already defined as #{identity} for #{self.name}" if @identity
|
28
|
+
@identity = args.first
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module Activecube::Processor
|
2
|
+
class Composer
|
3
|
+
|
4
|
+
attr_reader :cube_query, :models
|
5
|
+
def initialize cube_query
|
6
|
+
@cube_query = cube_query
|
7
|
+
end
|
8
|
+
|
9
|
+
def build_query
|
10
|
+
compose_queries optimize! ranked_tables
|
11
|
+
end
|
12
|
+
|
13
|
+
def connection
|
14
|
+
connections = models.map(&:connection).compact.uniq
|
15
|
+
raise "No connection found for query" if connections.empty?
|
16
|
+
raise "Tables #{models.map(&:name).join(',')} mapped to multiple connections, can not query" if connections.count>1
|
17
|
+
connections.first
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def optimize! measure_tables
|
23
|
+
|
24
|
+
all_tables = measure_tables.map(&:tables).map(&:keys).flatten.uniq
|
25
|
+
|
26
|
+
cost_matrix = measure_tables.collect do |measure_table|
|
27
|
+
all_tables.collect{|table|
|
28
|
+
measure_table.tables[table].try(&:cost)
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
before = total_cost measure_tables
|
33
|
+
Optimizer.new(cost_matrix).optimize.each_with_index do |optimal, index|
|
34
|
+
measure_tables[index].selected = optimal
|
35
|
+
end
|
36
|
+
after = total_cost measure_tables
|
37
|
+
|
38
|
+
raise "Optimizer made it worth #{before} -> #{after} for #{cost_matrix}" unless after <= before
|
39
|
+
measure_tables
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
def total_cost measure_tables
|
44
|
+
measure_tables.group_by(&:table).collect{|t| t.second.map(&:entry).map(&:cost).max }.sum
|
45
|
+
end
|
46
|
+
|
47
|
+
def ranked_tables
|
48
|
+
tables = cube_query.cube.tables.select{|table| table.matches? cube_query, []}
|
49
|
+
cube_query.measures.collect do |measure|
|
50
|
+
by = MeasureTables.new measure
|
51
|
+
tables.each{|table|
|
52
|
+
next unless table.measures? measure
|
53
|
+
max_cardinality_index = table.model.activecube_indexes.select{|index|
|
54
|
+
index.indexes? cube_query, [measure]
|
55
|
+
}.sort_by(&:cardinality).last
|
56
|
+
by.add_table table, max_cardinality_index
|
57
|
+
}
|
58
|
+
raise "Metric #{measure.key} #{measure.definition.name} can not be measured by any of tables #{tables.map(&:name).join(',')}" if by.tables.empty?
|
59
|
+
by
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def compose_queries measure_tables
|
64
|
+
composed_query = nil
|
65
|
+
@models = []
|
66
|
+
measure_tables.group_by(&:table).each_pair do |table, list|
|
67
|
+
@models << table.model
|
68
|
+
reduced = cube_query.reduced list.map(&:measure)
|
69
|
+
table_query = table.query reduced
|
70
|
+
composed_query = composed_query ? table.join(cube_query, composed_query, table_query) : table_query
|
71
|
+
end
|
72
|
+
composed_query
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Activecube
|
2
|
+
module Processor
|
3
|
+
class Index
|
4
|
+
|
5
|
+
attr_reader :fields, :cardinality
|
6
|
+
def initialize name, *args
|
7
|
+
@fields = [name].flatten
|
8
|
+
@cardinality = args.first && args.first[:cardinality]
|
9
|
+
end
|
10
|
+
|
11
|
+
def indexes? query, measures
|
12
|
+
(fields - query.selector_column_names(measures)).empty?
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Activecube
|
2
|
+
module Processor
|
3
|
+
class MeasureTables
|
4
|
+
|
5
|
+
class Entry
|
6
|
+
attr_reader :table, :index, :cardinality, :cost
|
7
|
+
def initialize table, index
|
8
|
+
@table = table
|
9
|
+
@index = index
|
10
|
+
@cardinality = index ? index.cardinality : 0
|
11
|
+
@cost = 1.0 / (1.0 + cardinality)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
attr_reader :measure, :entries, :tables
|
16
|
+
attr_accessor :selected
|
17
|
+
|
18
|
+
def initialize measure
|
19
|
+
@measure = measure
|
20
|
+
@tables = {}
|
21
|
+
@entries = []
|
22
|
+
@selected = 0
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
def add_table table, index
|
27
|
+
e = Entry.new(table, index)
|
28
|
+
entries << e
|
29
|
+
tables[table] = e
|
30
|
+
end
|
31
|
+
|
32
|
+
def table
|
33
|
+
entry.table
|
34
|
+
end
|
35
|
+
|
36
|
+
def entry
|
37
|
+
entries[selected]
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
module Activecube::Processor
|
2
|
+
|
3
|
+
class Optimizer
|
4
|
+
|
5
|
+
UNLIM_COST = 9999
|
6
|
+
MAX_ITERATIONS = 3
|
7
|
+
|
8
|
+
attr_reader :tables_count, :metrics_count, :cost_matrix
|
9
|
+
def initialize cost_matrix
|
10
|
+
@cost_matrix = cost_matrix
|
11
|
+
end
|
12
|
+
|
13
|
+
def optimize
|
14
|
+
|
15
|
+
Rails.cache.fetch(cost_matrix, expires_in: 12.hours) do
|
16
|
+
|
17
|
+
@tables_count = cost_matrix.map(&:count).max
|
18
|
+
@metrics_count = cost_matrix.count
|
19
|
+
|
20
|
+
tables_count==1 ? [0]*metrics_count : do_optimize
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
|
30
|
+
def do_optimize
|
31
|
+
@tables_by_metrics = []
|
32
|
+
|
33
|
+
# sort metrics from low min cost to higher min costs ( by all applicable tables )
|
34
|
+
sort_metrics
|
35
|
+
|
36
|
+
# fill initial @tables_by_metrics by selecting tables with minimum cost for metrics.
|
37
|
+
# If there are more than one table with this minimum cost, then select already selected table with maximum cost
|
38
|
+
select_min_cost_by_metric
|
39
|
+
|
40
|
+
# make iterations over @tables_by_metrics ( max MAX_ITERATIONS)
|
41
|
+
iterates
|
42
|
+
|
43
|
+
@tables_by_metrics
|
44
|
+
end
|
45
|
+
|
46
|
+
def sort_metrics
|
47
|
+
@metrics_index_sorted = (0...metrics_count).sort_by{|m_i| cost_matrix[m_i].compact.min || UNLIM_COST }
|
48
|
+
end
|
49
|
+
|
50
|
+
def select_min_cost_by_metric
|
51
|
+
|
52
|
+
@metrics_index_sorted.collect do |m_i|
|
53
|
+
|
54
|
+
table_index_cost = (0...tables_count).map{|c_i| [c_i,
|
55
|
+
cost_matrix[m_i][c_i] || UNLIM_COST,
|
56
|
+
(@tables_by_metrics.include?(c_i) ? -cost_matrix[@tables_by_metrics.index(c_i)][c_i] : 0)
|
57
|
+
]}.sort_by(&:third).sort_by(&:second)
|
58
|
+
|
59
|
+
@tables_by_metrics[m_i] = table_index_cost.first.first
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def iterates
|
65
|
+
|
66
|
+
steps = [@tables_by_metrics]
|
67
|
+
|
68
|
+
(1..MAX_ITERATIONS).each do |iteration|
|
69
|
+
|
70
|
+
step = []
|
71
|
+
prev_step = steps.last
|
72
|
+
|
73
|
+
prev_step.each_with_index {|c_i, m_i|
|
74
|
+
|
75
|
+
table_included_times = prev_step.select{|c| c==c_i }.count
|
76
|
+
old_cost = cost_matrix[m_i][c_i]
|
77
|
+
new_c_i = (0...tables_count).detect{|c_n|
|
78
|
+
new_cost = cost_matrix[m_i][c_n]
|
79
|
+
next if c_i==c_n || new_cost.nil?
|
80
|
+
new_table_included_times = prev_step.select{|c| c==c_n }.count
|
81
|
+
|
82
|
+
if old_cost.nil?
|
83
|
+
# if we have non indexed table now
|
84
|
+
true
|
85
|
+
elsif table_included_times>1
|
86
|
+
if new_table_included_times>0
|
87
|
+
# table to used table if
|
88
|
+
# cost now > new cost
|
89
|
+
old_cost > new_cost
|
90
|
+
else
|
91
|
+
# table to unused table if
|
92
|
+
# cost now > new cost + max other cost in table now
|
93
|
+
old_cost > new_cost + ( prev_step.select.with_index{|c,i| c==c_i && i!=m_i }.max || UNLIM_COST )
|
94
|
+
end
|
95
|
+
else
|
96
|
+
if new_table_included_times>0
|
97
|
+
# unused table to table if
|
98
|
+
# new cost < cost now + max other cost in new table
|
99
|
+
old_cost > new_cost - ( prev_step.select{|c| c==c_n }.max || UNLIM_COST )
|
100
|
+
else
|
101
|
+
# unused to unused
|
102
|
+
# cost now > new cost
|
103
|
+
old_cost > new_cost
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
}
|
108
|
+
|
109
|
+
step << c_i || new_c_i
|
110
|
+
|
111
|
+
}
|
112
|
+
|
113
|
+
break if steps.include? step
|
114
|
+
steps << step
|
115
|
+
end
|
116
|
+
|
117
|
+
@tables_by_metrics = steps.last
|
118
|
+
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Activecube::Processor
|
2
|
+
class Table
|
3
|
+
|
4
|
+
attr_reader :model
|
5
|
+
|
6
|
+
def initialize model
|
7
|
+
@model = model
|
8
|
+
end
|
9
|
+
|
10
|
+
def name
|
11
|
+
model.table_name
|
12
|
+
end
|
13
|
+
|
14
|
+
def matches? query, measures = query.measures
|
15
|
+
(query.column_names(measures)-model.attribute_types.keys).empty?
|
16
|
+
end
|
17
|
+
|
18
|
+
def measures? measure
|
19
|
+
(measure.required_column_names - model.attribute_types.keys).empty?
|
20
|
+
end
|
21
|
+
|
22
|
+
def query cube_query
|
23
|
+
|
24
|
+
table = model.arel_table
|
25
|
+
query = table
|
26
|
+
|
27
|
+
(cube_query.slices + cube_query.measures + cube_query.selectors + cube_query.options).each do |s|
|
28
|
+
query = s.append_query cube_query, table, query
|
29
|
+
end
|
30
|
+
|
31
|
+
query
|
32
|
+
end
|
33
|
+
|
34
|
+
def join cube_query, left_query, right_query
|
35
|
+
|
36
|
+
outer_table = model.arel_table.class.new('').project(Arel.star)
|
37
|
+
|
38
|
+
dimension_names = cube_query.join_fields
|
39
|
+
|
40
|
+
query = outer_table.from(left_query).
|
41
|
+
join(right_query, ::Arel::Nodes::FullOuterJoin).
|
42
|
+
using(*dimension_names)
|
43
|
+
|
44
|
+
cube_query.options.each do |option|
|
45
|
+
query = option.append_query cube_query, outer_table, query
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
query
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Activecube::Query
|
2
|
+
module ChainAppender
|
3
|
+
private
|
4
|
+
|
5
|
+
def append *args, list, def_class, definitions
|
6
|
+
list.concat args.map{|arg|
|
7
|
+
if arg.kind_of?(Symbol) && definitions
|
8
|
+
definitions[arg]
|
9
|
+
elsif arg.kind_of?(def_class)
|
10
|
+
arg
|
11
|
+
elsif arg.kind_of? Hash
|
12
|
+
arg.collect{|pair|
|
13
|
+
raise ArgumentError, "Unexpected #{pair.second.class.name} to use for #{def_class} as #{arg}[#{pair.first}]" unless pair.second.kind_of?(def_class)
|
14
|
+
pair.second.alias! pair.first
|
15
|
+
}
|
16
|
+
else
|
17
|
+
raise ArgumentError, "Unexpected #{arg.class} to use for #{def_class} as #{arg}"
|
18
|
+
end
|
19
|
+
}.flatten
|
20
|
+
self
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
module Activecube::Query
|
2
|
+
class CubeQuery
|
3
|
+
|
4
|
+
include ChainAppender
|
5
|
+
|
6
|
+
attr_reader :cube, :slices, :measures, :selectors, :orderings, :options
|
7
|
+
def initialize cube, slices = [], measures = [], selectors = [], options = []
|
8
|
+
@cube = cube
|
9
|
+
@slices = slices
|
10
|
+
@measures = measures
|
11
|
+
@selectors = selectors
|
12
|
+
@options = options
|
13
|
+
end
|
14
|
+
|
15
|
+
def slice *args
|
16
|
+
append *args, @slices, Slice, cube.dimensions
|
17
|
+
end
|
18
|
+
|
19
|
+
def measure *args
|
20
|
+
append *args, @measures, Measure, cube.metrics
|
21
|
+
end
|
22
|
+
|
23
|
+
def select *args
|
24
|
+
append *args, @selectors, Selector, cube.selectors
|
25
|
+
end
|
26
|
+
|
27
|
+
def desc *args
|
28
|
+
args.each{|arg|
|
29
|
+
options << Ordering.new(arg, :desc)
|
30
|
+
}
|
31
|
+
self
|
32
|
+
end
|
33
|
+
|
34
|
+
def asc *args
|
35
|
+
args.each{|arg|
|
36
|
+
options << Ordering.new( arg, :asc)
|
37
|
+
}
|
38
|
+
self
|
39
|
+
end
|
40
|
+
|
41
|
+
def skip *args
|
42
|
+
args.each{|arg|
|
43
|
+
options << Limit.new( arg, :skip)
|
44
|
+
}
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
def take *args
|
49
|
+
args.each{|arg|
|
50
|
+
options << Limit.new( arg, :take)
|
51
|
+
}
|
52
|
+
self
|
53
|
+
end
|
54
|
+
|
55
|
+
def query
|
56
|
+
composer = Activecube::Processor::Composer.new(self)
|
57
|
+
sql = composer.build_query.to_sql
|
58
|
+
composer.connection.exec_query(sql)
|
59
|
+
end
|
60
|
+
|
61
|
+
def to_query
|
62
|
+
Activecube::Processor::Composer.new(self).build_query
|
63
|
+
end
|
64
|
+
|
65
|
+
def to_sql
|
66
|
+
to_query.to_sql
|
67
|
+
end
|
68
|
+
|
69
|
+
def column_names measures = self.measures
|
70
|
+
(measures + slices + selectors).map(&:required_column_names).flatten.uniq
|
71
|
+
end
|
72
|
+
|
73
|
+
def selector_column_names measures = self.measures
|
74
|
+
(measures.map(&:selectors) + selectors).flatten.map(&:required_column_names).flatten.uniq
|
75
|
+
end
|
76
|
+
|
77
|
+
def reduced other_measures
|
78
|
+
|
79
|
+
common_selectors = []
|
80
|
+
other_measures.each_with_index do |m,i|
|
81
|
+
if i==0
|
82
|
+
common_selectors += m.selectors
|
83
|
+
else
|
84
|
+
common_selectors &= m.selectors
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
if common_selectors.empty?
|
89
|
+
reduced_measures = other_measures
|
90
|
+
reduced_selectors = self.selectors
|
91
|
+
else
|
92
|
+
reduced_measures = other_measures.collect{|m|
|
93
|
+
Measure.new m.cube, m.key, m.definition, (m.selectors - common_selectors)
|
94
|
+
}
|
95
|
+
reduced_selectors = self.selectors + common_selectors
|
96
|
+
end
|
97
|
+
|
98
|
+
unless reduced_measures.detect{|rm| rm.selectors.empty? }
|
99
|
+
reduced_selectors += [OrSelector.new(reduced_measures.map(&:selectors).flatten.uniq)]
|
100
|
+
end
|
101
|
+
|
102
|
+
return self if (reduced_measures == self.measures) && (reduced_selectors == self.selectors)
|
103
|
+
|
104
|
+
CubeQuery.new cube, slices, reduced_measures, reduced_selectors
|
105
|
+
end
|
106
|
+
|
107
|
+
def join_fields
|
108
|
+
slices.map{|s| s.dimension_class.identity || s.key }.uniq
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
112
|
+
end
|