mdquery 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +5 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +53 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +78 -0
- data/Rakefile +49 -0
- data/VERSION +1 -0
- data/lib/mdquery.rb +13 -0
- data/lib/mdquery/dataset.rb +283 -0
- data/lib/mdquery/dsl.rb +176 -0
- data/lib/mdquery/model.rb +270 -0
- data/lib/mdquery/util.rb +21 -0
- data/mdquery.gemspec +78 -0
- data/spec/mdquery/dataset_spec.rb +232 -0
- data/spec/mdquery/dsl_spec.rb +144 -0
- data/spec/mdquery/model_spec.rb +473 -0
- data/spec/mdquery/util_spec.rb +25 -0
- data/spec/mdquery_spec.rb +35 -0
- data/spec/spec_helper.rb +11 -0
- metadata +149 -0
data/lib/mdquery/dsl.rb
ADDED
@@ -0,0 +1,176 @@
|
|
1
|
+
require 'mdquery/model'
|
2
|
+
|
3
|
+
module MDQuery
|
4
|
+
module DSL
|
5
|
+
|
6
|
+
# DSL for describing a DimensionSegment
|
7
|
+
class DimensionSegmentDSL
|
8
|
+
|
9
|
+
# fix the Dimension value for this segment. exclusive of +extract_dimension+
|
10
|
+
# * +v+ the Dimension value for this segment
|
11
|
+
def fix_dimension(v)
|
12
|
+
@fixed_dimension_value=v
|
13
|
+
end
|
14
|
+
|
15
|
+
# extract DimensionValues from data for this segment. exclusive of +fix_dimension+
|
16
|
+
# * +q+ SQL select string fragment for the dimension value
|
17
|
+
def extract_dimension(q)
|
18
|
+
@extract_dimension_query=q
|
19
|
+
end
|
20
|
+
|
21
|
+
# Narrow the datasource to extract this segment. Optional
|
22
|
+
# * +proc+ a Proc of a single parameter, an ActiveRecord Scope to be narrowed
|
23
|
+
def narrow(&proc)
|
24
|
+
raise "no block!" if !proc
|
25
|
+
@narrow_proc = proc
|
26
|
+
end
|
27
|
+
|
28
|
+
# Define an ordered list of all possible Dimension Values for the segment. Optional
|
29
|
+
# * +proc+ a Proc of a single parameter, an ActiveRecord Scope which can be used
|
30
|
+
# to query for the values
|
31
|
+
def values(&proc)
|
32
|
+
raise "no block!" if !proc
|
33
|
+
@values_proc = proc
|
34
|
+
end
|
35
|
+
|
36
|
+
# set a Proc to be used to convert Dimension values into labels. Optional
|
37
|
+
# * +proc+ a Proc of a single parameter which will be called to convert Dimension values
|
38
|
+
# into labels
|
39
|
+
def label(&proc)
|
40
|
+
raise "no block!" if !proc
|
41
|
+
@label_proc = proc
|
42
|
+
end
|
43
|
+
|
44
|
+
# define a cast to convert values into the desired datatype
|
45
|
+
# * +c+ a keyword key for the casts in MDQuery::Model::Casts
|
46
|
+
def cast(c)
|
47
|
+
raise "unknown cast: #{c.inspect}" if !MDQuery::Model::CASTS.keys.include?(c)
|
48
|
+
@value_cast = c
|
49
|
+
end
|
50
|
+
|
51
|
+
# set a Proc to be used to modify the measure-value in any query using this segment
|
52
|
+
# * +measure+ a keyword describing the Measure
|
53
|
+
# * +proc+ a Proc of a single parameter which will be used to transform the measure value
|
54
|
+
def modify(measure, &proc)
|
55
|
+
raise "no block!" if !proc
|
56
|
+
@measure_modifiers[measure] = proc
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def initialize(key,&proc)
|
62
|
+
@key = key
|
63
|
+
@measure_modifiers = {}
|
64
|
+
self.instance_eval(&proc)
|
65
|
+
end
|
66
|
+
|
67
|
+
def build(dimension)
|
68
|
+
MDQuery::Model::DimensionSegmentModel.new(:dimension_model=>dimension,
|
69
|
+
:key=>@key,
|
70
|
+
:fixed_dimension_value=>@fixed_dimension_value,
|
71
|
+
:extract_dimension_query=>@extract_dimension_query,
|
72
|
+
:narrow_proc=>@narrow_proc,
|
73
|
+
:values_proc=>@values_proc,
|
74
|
+
:label_proc=>@label_proc,
|
75
|
+
:value_cast=>@value_cast,
|
76
|
+
:measure_modifiers=>@measure_modifiers)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# DSL for describing a Dimension consisting of an ordered list of segments
|
81
|
+
class DimensionDSL
|
82
|
+
# define a segment
|
83
|
+
# * +key+ the segment key, should be unique in the Dimension
|
84
|
+
# * +proc+ the DimensionSegmentDSL Proc
|
85
|
+
def segment(key, &proc)
|
86
|
+
raise "no block!" if !proc
|
87
|
+
@segments << DimensionSegmentDSL.new(key, &proc)
|
88
|
+
end
|
89
|
+
|
90
|
+
# set the Label for the segment
|
91
|
+
# * +label+ a label for the segment
|
92
|
+
def label(l)
|
93
|
+
@label = l
|
94
|
+
end
|
95
|
+
|
96
|
+
private
|
97
|
+
|
98
|
+
def initialize(key, &proc)
|
99
|
+
raise "no block!" if !proc
|
100
|
+
@key = key
|
101
|
+
@segments = []
|
102
|
+
self.instance_eval(&proc)
|
103
|
+
end
|
104
|
+
|
105
|
+
def build
|
106
|
+
dd = MDQuery::Model::DimensionModel.new(:key=>@key, :label=>@label)
|
107
|
+
ss = @segments.map{|dsdsl| dsdsl.send(:build, dd)}
|
108
|
+
dd.instance_eval{@segment_models = ss}
|
109
|
+
dd.validate
|
110
|
+
dd
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# DSL for defining Measures
|
115
|
+
class MeasureDSL
|
116
|
+
private
|
117
|
+
|
118
|
+
def initialize(key, definition, cast=nil)
|
119
|
+
@key = key
|
120
|
+
@definition = definition
|
121
|
+
@cast = cast
|
122
|
+
end
|
123
|
+
|
124
|
+
def build
|
125
|
+
MDQuery::Model::MeasureModel.new(:key=>@key,
|
126
|
+
:definition=>@definition,
|
127
|
+
:cast=>@cast)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
# DSL for defining a Dataset with a number of Measures over a number of Dimensions
|
132
|
+
# where each Dimension consists of a number of Segments
|
133
|
+
class DatasetDSL
|
134
|
+
|
135
|
+
# define the datasource for the Dataset
|
136
|
+
# * +scope+ an ActiveRecord scope, used as the basis for all region queries
|
137
|
+
def source(scope)
|
138
|
+
raise "source already set" if @source
|
139
|
+
@source = scope
|
140
|
+
end
|
141
|
+
|
142
|
+
# define a Dimension
|
143
|
+
# * +key+ the key identifying the Dimension in the Dataset
|
144
|
+
# * +proc+ a DimensionDSL Proc
|
145
|
+
def dimension(key, &proc)
|
146
|
+
@dimensions << DimensionDSL.new(key, &proc)
|
147
|
+
end
|
148
|
+
|
149
|
+
# define a Measure
|
150
|
+
# * +key+ the key identifying the Measure in the Dataset
|
151
|
+
# * +definition+ the SQL fragment defining the measure
|
152
|
+
# * +cast+ a symbol identifying a case from MDQuery::Model::CASTS. Optional
|
153
|
+
def measure(key, definition, cast=nil)
|
154
|
+
@measures << MeasureDSL.new(key, definition, cast)
|
155
|
+
end
|
156
|
+
|
157
|
+
private
|
158
|
+
|
159
|
+
def initialize(&proc)
|
160
|
+
raise "no block!" if !proc
|
161
|
+
@dimensions=[]
|
162
|
+
@measures=[]
|
163
|
+
self.instance_eval(&proc)
|
164
|
+
end
|
165
|
+
|
166
|
+
def build
|
167
|
+
ds = @dimensions.map{|d| d.send(:build)}
|
168
|
+
ms = @measures.map{|m| m.send(:build)}
|
169
|
+
MDQuery::Model::DatasetModel.new(:source=>@source,
|
170
|
+
:dimension_models=>ds,
|
171
|
+
:measure_models=>ms)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
end
|
176
|
+
end
|
@@ -0,0 +1,270 @@
|
|
1
|
+
require 'active_record'
|
2
|
+
require 'mdquery/dataset'
|
3
|
+
require 'mdquery/util'
|
4
|
+
require 'date'
|
5
|
+
require 'time'
|
6
|
+
|
7
|
+
module MDQuery
|
8
|
+
module Model
|
9
|
+
|
10
|
+
# casts which can be used to transform queried values
|
11
|
+
CASTS = {
|
12
|
+
:sym => lambda{|v| v.to_sym},
|
13
|
+
:int => lambda{|v| v.to_i},
|
14
|
+
:float => lambda{|v| v.to_f},
|
15
|
+
:date => lambda{|v| Date.parse(v)},
|
16
|
+
:datetime => lambda{|v| DateTime.parse(v)},
|
17
|
+
:time => lambda{|v| Time.parse(v)}
|
18
|
+
}
|
19
|
+
|
20
|
+
class DimensionSegmentModel
|
21
|
+
attr_reader :dimension_model
|
22
|
+
attr_reader :key
|
23
|
+
attr_reader :fixed_dimension_value
|
24
|
+
attr_reader :extract_dimension_query
|
25
|
+
attr_reader :narrow_proc
|
26
|
+
attr_reader :values_proc
|
27
|
+
attr_reader :label_proc
|
28
|
+
attr_reader :value_cast
|
29
|
+
attr_reader :measure_modifiers
|
30
|
+
|
31
|
+
DEFAULT_LABEL_PROC = Proc.new do |value|
|
32
|
+
value.to_s.gsub('_', ' ').split.map(&:capitalize).join(' ')
|
33
|
+
end
|
34
|
+
|
35
|
+
def initialize(attrs)
|
36
|
+
MDQuery::Util.assign_attributes(self,
|
37
|
+
attrs,
|
38
|
+
[:dimension_model, :key, :fixed_dimension_value, :extract_dimension_query, :narrow_proc, :values_proc, :label_proc, :value_cast, :measure_modifiers])
|
39
|
+
validate
|
40
|
+
end
|
41
|
+
|
42
|
+
def validate
|
43
|
+
raise "no dimension_model!" if !dimension_model
|
44
|
+
raise "no key!" if !key
|
45
|
+
raise "only one of fix_dimension and extract_dimension can be given" if fixed_dimension_value && extract_dimension_query
|
46
|
+
raise "one of fix_dimension or extract_dimension must be given" if !fixed_dimension_value && !extract_dimension_query
|
47
|
+
@measure_modifiers ||= {}
|
48
|
+
end
|
49
|
+
|
50
|
+
def inspect
|
51
|
+
"#<DimensionSegment: key=#{key.inspect}, fixed_dimension_value=#{fixed_dimension_value.inspect}, extract_dimension_query=#{extract_dimension_query.inspect}, narrow_proc=#{narrow_proc.inspect}, label_proc=#{label_proc.inspect}, value_cast=#{value_cast.inspect}, measure_modifiers=#{measure_modifiers.inspect}>"
|
52
|
+
end
|
53
|
+
|
54
|
+
def do_narrow(scope)
|
55
|
+
if narrow_proc
|
56
|
+
narrow_proc.call(scope)
|
57
|
+
else
|
58
|
+
scope
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def do_cast(value)
|
63
|
+
cast_lambda=CASTS[value_cast] if value_cast
|
64
|
+
if cast_lambda
|
65
|
+
cast_lambda.call(value)
|
66
|
+
else
|
67
|
+
value
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def do_modify(measure_key, measure_def)
|
72
|
+
if modifier = measure_modifiers[measure_key]
|
73
|
+
modifier.call(measure_def)
|
74
|
+
else
|
75
|
+
measure_def
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def select_string
|
80
|
+
if fixed_dimension_value
|
81
|
+
"#{ActiveRecord::Base.quote_value(fixed_dimension_value)} as #{dimension_model.key}"
|
82
|
+
else
|
83
|
+
"#{extract_dimension_query} as #{dimension_model.key}"
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def group_by_column
|
88
|
+
dimension_model.key.to_s
|
89
|
+
end
|
90
|
+
|
91
|
+
def get_values(scope)
|
92
|
+
if fixed_dimension_value
|
93
|
+
[fixed_dimension_value.to_s]
|
94
|
+
elsif values_proc
|
95
|
+
values_proc.call(scope)
|
96
|
+
else
|
97
|
+
narrowed_scope = do_narrow(scope)
|
98
|
+
records = narrowed_scope.select("distinct #{select_string}").all
|
99
|
+
records.map{|r| r.send(dimension_model.key)}.map{|v| do_cast(v)}
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# map of values to labels
|
104
|
+
def labels(values)
|
105
|
+
values.reduce({}) do |labels,value|
|
106
|
+
labels[value] = (label_proc || DEFAULT_LABEL_PROC).call(value)
|
107
|
+
labels
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
class DimensionModel
|
113
|
+
attr_reader :key
|
114
|
+
attr_reader :label
|
115
|
+
attr_reader :segment_models
|
116
|
+
|
117
|
+
def initialize(attrs)
|
118
|
+
MDQuery::Util.assign_attributes(self, attrs, [:key, :label, :segment_models])
|
119
|
+
# validate # don't call validate, it's called by the DSL builder
|
120
|
+
end
|
121
|
+
|
122
|
+
def validate
|
123
|
+
raise "no key!" if !key
|
124
|
+
raise "no segment_models!" if !segment_models || segment_models.empty?
|
125
|
+
end
|
126
|
+
|
127
|
+
def inspect
|
128
|
+
"#<DimensionDefinition: key=#{key.inspect}, segment_models=#{segment_models.inspect}>"
|
129
|
+
end
|
130
|
+
|
131
|
+
# for each prefix emit one item for the index of each segment_model. e.g. if
|
132
|
+
# we have 2 segment_models and are give prefixes [[0],[1]] then the result is
|
133
|
+
# [[0,0],[0,1],[1,0],[1,1]]. used in the calculation of the cross-join of segment indexes
|
134
|
+
# across all dimensions
|
135
|
+
def index_list(prefixes=nil)
|
136
|
+
(0...segment_models.length).reduce([]){|l, i| l + (prefixes||[[]]).map{|prefix| prefix.clone << i}}
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
class MeasureModel
|
141
|
+
attr_reader :key
|
142
|
+
attr_reader :definition
|
143
|
+
attr_reader :cast
|
144
|
+
|
145
|
+
def initialize(attrs)
|
146
|
+
MDQuery::Util.assign_attributes(self, attrs, [:key, :definition, :cast])
|
147
|
+
validate
|
148
|
+
end
|
149
|
+
|
150
|
+
def validate
|
151
|
+
raise "no key!" if !key
|
152
|
+
raise "no definition!" if !definition
|
153
|
+
raise "unknown cast: #{cast.inspect}" if cast && !CASTS.keys.include?(cast)
|
154
|
+
end
|
155
|
+
|
156
|
+
def inspect
|
157
|
+
"#<MeasureDefinition: key=#{key.inspect}, definition=#{definition.inspect}, cast=#{cast.inspect}>"
|
158
|
+
end
|
159
|
+
|
160
|
+
def select_string(region_segment_models)
|
161
|
+
modified_def = region_segment_models.reduce(definition){|modef,rsm| rsm.do_modify(key, modef)}
|
162
|
+
"#{modified_def} as #{key}"
|
163
|
+
end
|
164
|
+
|
165
|
+
def do_cast(value)
|
166
|
+
cast_lambda=CASTS[cast] if cast
|
167
|
+
if cast_lambda
|
168
|
+
cast_lambda.call(value)
|
169
|
+
else
|
170
|
+
value
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
class DatasetModel
|
176
|
+
attr_reader :source
|
177
|
+
attr_reader :dimension_models
|
178
|
+
attr_reader :measure_models
|
179
|
+
|
180
|
+
def initialize(attrs)
|
181
|
+
MDQuery::Util.assign_attributes(self, attrs, [:source, :dimension_models, :measure_models])
|
182
|
+
validate
|
183
|
+
end
|
184
|
+
|
185
|
+
def validate
|
186
|
+
raise "no source!" if !source
|
187
|
+
raise "no dimension_models!" if !dimension_models || dimension_models.empty?
|
188
|
+
raise "no measure_models!" if !measure_models || measure_models.empty?
|
189
|
+
end
|
190
|
+
|
191
|
+
def inspect
|
192
|
+
"#<DatasetDefinition: dimension_models=#{dimension_models.inspect}, measure_models=#{measure_models.inspect}>"
|
193
|
+
end
|
194
|
+
|
195
|
+
# a list of tuples of dimension-segment indexes, each tuple specifying
|
196
|
+
# one segment for each dimension. it is the cross-join of the dimension-segment indexes
|
197
|
+
def region_segment_model_indexes
|
198
|
+
dimension_models.reduce(nil){|indexes, dimension_model| dimension_model.index_list(indexes)}
|
199
|
+
end
|
200
|
+
|
201
|
+
# a list of lists of dimension-segments
|
202
|
+
def all_dimension_segment_models
|
203
|
+
dimension_models.map(&:segment_models)
|
204
|
+
end
|
205
|
+
|
206
|
+
# given a list of dimension-segment indexes, one for each dimension,
|
207
|
+
# retrieve a list of dimension-segments, one for each dimension,
|
208
|
+
# specifying a region
|
209
|
+
def region_segment_models(indexes)
|
210
|
+
ds = all_dimension_segment_models
|
211
|
+
d_i = (0...indexes.length).zip(indexes)
|
212
|
+
d_i.map{|d,i| ds[d][i]}
|
213
|
+
end
|
214
|
+
|
215
|
+
# call a block with a list of dimension-segments, one for each dimension
|
216
|
+
def with_regions(&proc)
|
217
|
+
region_segment_model_indexes.each do |indexes|
|
218
|
+
proc.call(region_segment_models(indexes))
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
# construct a query for a region
|
223
|
+
def construct_query(scope, region_segment_models, measure_models)
|
224
|
+
narrowed_scope = region_segment_models.reduce(scope){|scope, ds| ds.do_narrow(scope)}
|
225
|
+
|
226
|
+
dimension_select_strings = region_segment_models.map(&:select_string)
|
227
|
+
|
228
|
+
measure_select_strings = measure_models.map{|m| m.select_string(region_segment_models)}
|
229
|
+
|
230
|
+
select_string = (dimension_select_strings + measure_select_strings).join(",")
|
231
|
+
|
232
|
+
group_string = region_segment_models.map(&:group_by_column).join(",")
|
233
|
+
|
234
|
+
narrowed_scope.select(select_string).group(group_string)
|
235
|
+
end
|
236
|
+
|
237
|
+
# extract data points from a list of ActiveRecord models
|
238
|
+
def extract(rows, region_segment_models, measure_models)
|
239
|
+
rows.map do |row|
|
240
|
+
dimension_values = region_segment_models.map do |ds|
|
241
|
+
{ds.dimension_model.key => ds.do_cast(row.send(ds.dimension_model.key))}
|
242
|
+
end
|
243
|
+
measure_values = measure_models.map do |m|
|
244
|
+
{m.key => m.do_cast(row.send(m.key))}
|
245
|
+
end
|
246
|
+
|
247
|
+
(dimension_values + measure_values).reduce(&:merge)
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
# run the queries defined by the DatasetModel
|
252
|
+
def do_queries
|
253
|
+
data = []
|
254
|
+
|
255
|
+
with_regions do |region_segment_models|
|
256
|
+
q = construct_query(source, region_segment_models, measure_models)
|
257
|
+
points = extract(q.all, region_segment_models, measure_models)
|
258
|
+
data += points
|
259
|
+
end
|
260
|
+
|
261
|
+
data
|
262
|
+
end
|
263
|
+
|
264
|
+
# run the queries and put the results in a Dataset
|
265
|
+
def collect
|
266
|
+
MDQuery::Dataset::Dataset.new(self, do_queries)
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|
data/lib/mdquery/util.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
module MDQuery
|
4
|
+
module Util
|
5
|
+
|
6
|
+
# assigns instance variable attributes
|
7
|
+
# to an object
|
8
|
+
# * +obj+ - the instance
|
9
|
+
# * +attrs+ - a map of {attr_name=>attr_value}
|
10
|
+
module_function
|
11
|
+
def assign_attributes(obj, attrs, permitted_keys = nil)
|
12
|
+
unknown_keys = attrs.keys.map(&:to_s).to_set - permitted_keys.map(&:to_s).to_set if permitted_keys
|
13
|
+
raise "unknown keys: #{unknown_keys.to_a.inspect}. permitted keys are: #{permitted_keys.inspect}" if unknown_keys && !unknown_keys.empty?
|
14
|
+
|
15
|
+
attrs.each do |attr,val|
|
16
|
+
obj.instance_variable_set("@#{attr}", val)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|