mdquery 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +5 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +53 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +78 -0
- data/Rakefile +49 -0
- data/VERSION +1 -0
- data/lib/mdquery.rb +13 -0
- data/lib/mdquery/dataset.rb +283 -0
- data/lib/mdquery/dsl.rb +176 -0
- data/lib/mdquery/model.rb +270 -0
- data/lib/mdquery/util.rb +21 -0
- data/mdquery.gemspec +78 -0
- data/spec/mdquery/dataset_spec.rb +232 -0
- data/spec/mdquery/dsl_spec.rb +144 -0
- data/spec/mdquery/model_spec.rb +473 -0
- data/spec/mdquery/util_spec.rb +25 -0
- data/spec/mdquery_spec.rb +35 -0
- data/spec/spec_helper.rb +11 -0
- metadata +149 -0
data/lib/mdquery/dsl.rb
ADDED
@@ -0,0 +1,176 @@
|
|
1
|
+
require 'mdquery/model'
|
2
|
+
|
3
|
+
module MDQuery
|
4
|
+
module DSL
|
5
|
+
|
6
|
+
# DSL for describing a DimensionSegment
|
7
|
+
class DimensionSegmentDSL
|
8
|
+
|
9
|
+
# fix the Dimension value for this segment. exclusive of +extract_dimension+
|
10
|
+
# * +v+ the Dimension value for this segment
|
11
|
+
def fix_dimension(v)
|
12
|
+
@fixed_dimension_value=v
|
13
|
+
end
|
14
|
+
|
15
|
+
# extract DimensionValues from data for this segment. exclusive of +fix_dimension+
|
16
|
+
# * +q+ SQL select string fragment for the dimension value
|
17
|
+
def extract_dimension(q)
|
18
|
+
@extract_dimension_query=q
|
19
|
+
end
|
20
|
+
|
21
|
+
# Narrow the datasource to extract this segment. Optional
|
22
|
+
# * +proc+ a Proc of a single parameter, an ActiveRecord Scope to be narrowed
|
23
|
+
def narrow(&proc)
|
24
|
+
raise "no block!" if !proc
|
25
|
+
@narrow_proc = proc
|
26
|
+
end
|
27
|
+
|
28
|
+
# Define an ordered list of all possible Dimension Values for the segment. Optional
|
29
|
+
# * +proc+ a Proc of a single parameter, an ActiveRecord Scope which can be used
|
30
|
+
# to query for the values
|
31
|
+
def values(&proc)
|
32
|
+
raise "no block!" if !proc
|
33
|
+
@values_proc = proc
|
34
|
+
end
|
35
|
+
|
36
|
+
# set a Proc to be used to convert Dimension values into labels. Optional
|
37
|
+
# * +proc+ a Proc of a single parameter which will be called to convert Dimension values
|
38
|
+
# into labels
|
39
|
+
def label(&proc)
|
40
|
+
raise "no block!" if !proc
|
41
|
+
@label_proc = proc
|
42
|
+
end
|
43
|
+
|
44
|
+
# define a cast to convert values into the desired datatype
|
45
|
+
# * +c+ a keyword key for the casts in MDQuery::Model::Casts
|
46
|
+
def cast(c)
|
47
|
+
raise "unknown cast: #{c.inspect}" if !MDQuery::Model::CASTS.keys.include?(c)
|
48
|
+
@value_cast = c
|
49
|
+
end
|
50
|
+
|
51
|
+
# set a Proc to be used to modify the measure-value in any query using this segment
|
52
|
+
# * +measure+ a keyword describing the Measure
|
53
|
+
# * +proc+ a Proc of a single parameter which will be used to transform the measure value
|
54
|
+
def modify(measure, &proc)
|
55
|
+
raise "no block!" if !proc
|
56
|
+
@measure_modifiers[measure] = proc
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def initialize(key,&proc)
|
62
|
+
@key = key
|
63
|
+
@measure_modifiers = {}
|
64
|
+
self.instance_eval(&proc)
|
65
|
+
end
|
66
|
+
|
67
|
+
def build(dimension)
|
68
|
+
MDQuery::Model::DimensionSegmentModel.new(:dimension_model=>dimension,
|
69
|
+
:key=>@key,
|
70
|
+
:fixed_dimension_value=>@fixed_dimension_value,
|
71
|
+
:extract_dimension_query=>@extract_dimension_query,
|
72
|
+
:narrow_proc=>@narrow_proc,
|
73
|
+
:values_proc=>@values_proc,
|
74
|
+
:label_proc=>@label_proc,
|
75
|
+
:value_cast=>@value_cast,
|
76
|
+
:measure_modifiers=>@measure_modifiers)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# DSL for describing a Dimension consisting of an ordered list of segments
|
81
|
+
class DimensionDSL
|
82
|
+
# define a segment
|
83
|
+
# * +key+ the segment key, should be unique in the Dimension
|
84
|
+
# * +proc+ the DimensionSegmentDSL Proc
|
85
|
+
def segment(key, &proc)
|
86
|
+
raise "no block!" if !proc
|
87
|
+
@segments << DimensionSegmentDSL.new(key, &proc)
|
88
|
+
end
|
89
|
+
|
90
|
+
# set the Label for the segment
|
91
|
+
# * +label+ a label for the segment
|
92
|
+
def label(l)
|
93
|
+
@label = l
|
94
|
+
end
|
95
|
+
|
96
|
+
private
|
97
|
+
|
98
|
+
def initialize(key, &proc)
|
99
|
+
raise "no block!" if !proc
|
100
|
+
@key = key
|
101
|
+
@segments = []
|
102
|
+
self.instance_eval(&proc)
|
103
|
+
end
|
104
|
+
|
105
|
+
def build
|
106
|
+
dd = MDQuery::Model::DimensionModel.new(:key=>@key, :label=>@label)
|
107
|
+
ss = @segments.map{|dsdsl| dsdsl.send(:build, dd)}
|
108
|
+
dd.instance_eval{@segment_models = ss}
|
109
|
+
dd.validate
|
110
|
+
dd
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# DSL for defining Measures
|
115
|
+
class MeasureDSL
|
116
|
+
private
|
117
|
+
|
118
|
+
def initialize(key, definition, cast=nil)
|
119
|
+
@key = key
|
120
|
+
@definition = definition
|
121
|
+
@cast = cast
|
122
|
+
end
|
123
|
+
|
124
|
+
def build
|
125
|
+
MDQuery::Model::MeasureModel.new(:key=>@key,
|
126
|
+
:definition=>@definition,
|
127
|
+
:cast=>@cast)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
# DSL for defining a Dataset with a number of Measures over a number of Dimensions
|
132
|
+
# where each Dimension consists of a number of Segments
|
133
|
+
class DatasetDSL
|
134
|
+
|
135
|
+
# define the datasource for the Dataset
|
136
|
+
# * +scope+ an ActiveRecord scope, used as the basis for all region queries
|
137
|
+
def source(scope)
|
138
|
+
raise "source already set" if @source
|
139
|
+
@source = scope
|
140
|
+
end
|
141
|
+
|
142
|
+
# define a Dimension
|
143
|
+
# * +key+ the key identifying the Dimension in the Dataset
|
144
|
+
# * +proc+ a DimensionDSL Proc
|
145
|
+
def dimension(key, &proc)
|
146
|
+
@dimensions << DimensionDSL.new(key, &proc)
|
147
|
+
end
|
148
|
+
|
149
|
+
# define a Measure
|
150
|
+
# * +key+ the key identifying the Measure in the Dataset
|
151
|
+
# * +definition+ the SQL fragment defining the measure
|
152
|
+
# * +cast+ a symbol identifying a case from MDQuery::Model::CASTS. Optional
|
153
|
+
def measure(key, definition, cast=nil)
|
154
|
+
@measures << MeasureDSL.new(key, definition, cast)
|
155
|
+
end
|
156
|
+
|
157
|
+
private
|
158
|
+
|
159
|
+
def initialize(&proc)
|
160
|
+
raise "no block!" if !proc
|
161
|
+
@dimensions=[]
|
162
|
+
@measures=[]
|
163
|
+
self.instance_eval(&proc)
|
164
|
+
end
|
165
|
+
|
166
|
+
def build
|
167
|
+
ds = @dimensions.map{|d| d.send(:build)}
|
168
|
+
ms = @measures.map{|m| m.send(:build)}
|
169
|
+
MDQuery::Model::DatasetModel.new(:source=>@source,
|
170
|
+
:dimension_models=>ds,
|
171
|
+
:measure_models=>ms)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
end
|
176
|
+
end
|
@@ -0,0 +1,270 @@
|
|
1
|
+
require 'active_record'
|
2
|
+
require 'mdquery/dataset'
|
3
|
+
require 'mdquery/util'
|
4
|
+
require 'date'
|
5
|
+
require 'time'
|
6
|
+
|
7
|
+
module MDQuery
|
8
|
+
module Model
|
9
|
+
|
10
|
+
# casts which can be used to transform queried values
|
11
|
+
CASTS = {
|
12
|
+
:sym => lambda{|v| v.to_sym},
|
13
|
+
:int => lambda{|v| v.to_i},
|
14
|
+
:float => lambda{|v| v.to_f},
|
15
|
+
:date => lambda{|v| Date.parse(v)},
|
16
|
+
:datetime => lambda{|v| DateTime.parse(v)},
|
17
|
+
:time => lambda{|v| Time.parse(v)}
|
18
|
+
}
|
19
|
+
|
20
|
+
class DimensionSegmentModel
|
21
|
+
attr_reader :dimension_model
|
22
|
+
attr_reader :key
|
23
|
+
attr_reader :fixed_dimension_value
|
24
|
+
attr_reader :extract_dimension_query
|
25
|
+
attr_reader :narrow_proc
|
26
|
+
attr_reader :values_proc
|
27
|
+
attr_reader :label_proc
|
28
|
+
attr_reader :value_cast
|
29
|
+
attr_reader :measure_modifiers
|
30
|
+
|
31
|
+
DEFAULT_LABEL_PROC = Proc.new do |value|
|
32
|
+
value.to_s.gsub('_', ' ').split.map(&:capitalize).join(' ')
|
33
|
+
end
|
34
|
+
|
35
|
+
def initialize(attrs)
|
36
|
+
MDQuery::Util.assign_attributes(self,
|
37
|
+
attrs,
|
38
|
+
[:dimension_model, :key, :fixed_dimension_value, :extract_dimension_query, :narrow_proc, :values_proc, :label_proc, :value_cast, :measure_modifiers])
|
39
|
+
validate
|
40
|
+
end
|
41
|
+
|
42
|
+
def validate
|
43
|
+
raise "no dimension_model!" if !dimension_model
|
44
|
+
raise "no key!" if !key
|
45
|
+
raise "only one of fix_dimension and extract_dimension can be given" if fixed_dimension_value && extract_dimension_query
|
46
|
+
raise "one of fix_dimension or extract_dimension must be given" if !fixed_dimension_value && !extract_dimension_query
|
47
|
+
@measure_modifiers ||= {}
|
48
|
+
end
|
49
|
+
|
50
|
+
def inspect
|
51
|
+
"#<DimensionSegment: key=#{key.inspect}, fixed_dimension_value=#{fixed_dimension_value.inspect}, extract_dimension_query=#{extract_dimension_query.inspect}, narrow_proc=#{narrow_proc.inspect}, label_proc=#{label_proc.inspect}, value_cast=#{value_cast.inspect}, measure_modifiers=#{measure_modifiers.inspect}>"
|
52
|
+
end
|
53
|
+
|
54
|
+
def do_narrow(scope)
|
55
|
+
if narrow_proc
|
56
|
+
narrow_proc.call(scope)
|
57
|
+
else
|
58
|
+
scope
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def do_cast(value)
|
63
|
+
cast_lambda=CASTS[value_cast] if value_cast
|
64
|
+
if cast_lambda
|
65
|
+
cast_lambda.call(value)
|
66
|
+
else
|
67
|
+
value
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def do_modify(measure_key, measure_def)
|
72
|
+
if modifier = measure_modifiers[measure_key]
|
73
|
+
modifier.call(measure_def)
|
74
|
+
else
|
75
|
+
measure_def
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def select_string
|
80
|
+
if fixed_dimension_value
|
81
|
+
"#{ActiveRecord::Base.quote_value(fixed_dimension_value)} as #{dimension_model.key}"
|
82
|
+
else
|
83
|
+
"#{extract_dimension_query} as #{dimension_model.key}"
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def group_by_column
|
88
|
+
dimension_model.key.to_s
|
89
|
+
end
|
90
|
+
|
91
|
+
def get_values(scope)
|
92
|
+
if fixed_dimension_value
|
93
|
+
[fixed_dimension_value.to_s]
|
94
|
+
elsif values_proc
|
95
|
+
values_proc.call(scope)
|
96
|
+
else
|
97
|
+
narrowed_scope = do_narrow(scope)
|
98
|
+
records = narrowed_scope.select("distinct #{select_string}").all
|
99
|
+
records.map{|r| r.send(dimension_model.key)}.map{|v| do_cast(v)}
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# map of values to labels
|
104
|
+
def labels(values)
|
105
|
+
values.reduce({}) do |labels,value|
|
106
|
+
labels[value] = (label_proc || DEFAULT_LABEL_PROC).call(value)
|
107
|
+
labels
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
class DimensionModel
|
113
|
+
attr_reader :key
|
114
|
+
attr_reader :label
|
115
|
+
attr_reader :segment_models
|
116
|
+
|
117
|
+
def initialize(attrs)
|
118
|
+
MDQuery::Util.assign_attributes(self, attrs, [:key, :label, :segment_models])
|
119
|
+
# validate # don't call validate, it's called by the DSL builder
|
120
|
+
end
|
121
|
+
|
122
|
+
def validate
|
123
|
+
raise "no key!" if !key
|
124
|
+
raise "no segment_models!" if !segment_models || segment_models.empty?
|
125
|
+
end
|
126
|
+
|
127
|
+
def inspect
|
128
|
+
"#<DimensionDefinition: key=#{key.inspect}, segment_models=#{segment_models.inspect}>"
|
129
|
+
end
|
130
|
+
|
131
|
+
# for each prefix emit one item for the index of each segment_model. e.g. if
|
132
|
+
# we have 2 segment_models and are give prefixes [[0],[1]] then the result is
|
133
|
+
# [[0,0],[0,1],[1,0],[1,1]]. used in the calculation of the cross-join of segment indexes
|
134
|
+
# across all dimensions
|
135
|
+
def index_list(prefixes=nil)
|
136
|
+
(0...segment_models.length).reduce([]){|l, i| l + (prefixes||[[]]).map{|prefix| prefix.clone << i}}
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
class MeasureModel
|
141
|
+
attr_reader :key
|
142
|
+
attr_reader :definition
|
143
|
+
attr_reader :cast
|
144
|
+
|
145
|
+
def initialize(attrs)
|
146
|
+
MDQuery::Util.assign_attributes(self, attrs, [:key, :definition, :cast])
|
147
|
+
validate
|
148
|
+
end
|
149
|
+
|
150
|
+
def validate
|
151
|
+
raise "no key!" if !key
|
152
|
+
raise "no definition!" if !definition
|
153
|
+
raise "unknown cast: #{cast.inspect}" if cast && !CASTS.keys.include?(cast)
|
154
|
+
end
|
155
|
+
|
156
|
+
def inspect
|
157
|
+
"#<MeasureDefinition: key=#{key.inspect}, definition=#{definition.inspect}, cast=#{cast.inspect}>"
|
158
|
+
end
|
159
|
+
|
160
|
+
def select_string(region_segment_models)
|
161
|
+
modified_def = region_segment_models.reduce(definition){|modef,rsm| rsm.do_modify(key, modef)}
|
162
|
+
"#{modified_def} as #{key}"
|
163
|
+
end
|
164
|
+
|
165
|
+
def do_cast(value)
|
166
|
+
cast_lambda=CASTS[cast] if cast
|
167
|
+
if cast_lambda
|
168
|
+
cast_lambda.call(value)
|
169
|
+
else
|
170
|
+
value
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
class DatasetModel
|
176
|
+
attr_reader :source
|
177
|
+
attr_reader :dimension_models
|
178
|
+
attr_reader :measure_models
|
179
|
+
|
180
|
+
def initialize(attrs)
|
181
|
+
MDQuery::Util.assign_attributes(self, attrs, [:source, :dimension_models, :measure_models])
|
182
|
+
validate
|
183
|
+
end
|
184
|
+
|
185
|
+
def validate
|
186
|
+
raise "no source!" if !source
|
187
|
+
raise "no dimension_models!" if !dimension_models || dimension_models.empty?
|
188
|
+
raise "no measure_models!" if !measure_models || measure_models.empty?
|
189
|
+
end
|
190
|
+
|
191
|
+
def inspect
|
192
|
+
"#<DatasetDefinition: dimension_models=#{dimension_models.inspect}, measure_models=#{measure_models.inspect}>"
|
193
|
+
end
|
194
|
+
|
195
|
+
# a list of tuples of dimension-segment indexes, each tuple specifying
|
196
|
+
# one segment for each dimension. it is the cross-join of the dimension-segment indexes
|
197
|
+
def region_segment_model_indexes
|
198
|
+
dimension_models.reduce(nil){|indexes, dimension_model| dimension_model.index_list(indexes)}
|
199
|
+
end
|
200
|
+
|
201
|
+
# a list of lists of dimension-segments
|
202
|
+
def all_dimension_segment_models
|
203
|
+
dimension_models.map(&:segment_models)
|
204
|
+
end
|
205
|
+
|
206
|
+
# given a list of dimension-segment indexes, one for each dimension,
|
207
|
+
# retrieve a list of dimension-segments, one for each dimension,
|
208
|
+
# specifying a region
|
209
|
+
def region_segment_models(indexes)
|
210
|
+
ds = all_dimension_segment_models
|
211
|
+
d_i = (0...indexes.length).zip(indexes)
|
212
|
+
d_i.map{|d,i| ds[d][i]}
|
213
|
+
end
|
214
|
+
|
215
|
+
# call a block with a list of dimension-segments, one for each dimension
|
216
|
+
def with_regions(&proc)
|
217
|
+
region_segment_model_indexes.each do |indexes|
|
218
|
+
proc.call(region_segment_models(indexes))
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
# construct a query for a region
|
223
|
+
def construct_query(scope, region_segment_models, measure_models)
|
224
|
+
narrowed_scope = region_segment_models.reduce(scope){|scope, ds| ds.do_narrow(scope)}
|
225
|
+
|
226
|
+
dimension_select_strings = region_segment_models.map(&:select_string)
|
227
|
+
|
228
|
+
measure_select_strings = measure_models.map{|m| m.select_string(region_segment_models)}
|
229
|
+
|
230
|
+
select_string = (dimension_select_strings + measure_select_strings).join(",")
|
231
|
+
|
232
|
+
group_string = region_segment_models.map(&:group_by_column).join(",")
|
233
|
+
|
234
|
+
narrowed_scope.select(select_string).group(group_string)
|
235
|
+
end
|
236
|
+
|
237
|
+
# extract data points from a list of ActiveRecord models
|
238
|
+
def extract(rows, region_segment_models, measure_models)
|
239
|
+
rows.map do |row|
|
240
|
+
dimension_values = region_segment_models.map do |ds|
|
241
|
+
{ds.dimension_model.key => ds.do_cast(row.send(ds.dimension_model.key))}
|
242
|
+
end
|
243
|
+
measure_values = measure_models.map do |m|
|
244
|
+
{m.key => m.do_cast(row.send(m.key))}
|
245
|
+
end
|
246
|
+
|
247
|
+
(dimension_values + measure_values).reduce(&:merge)
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
# run the queries defined by the DatasetModel
|
252
|
+
def do_queries
|
253
|
+
data = []
|
254
|
+
|
255
|
+
with_regions do |region_segment_models|
|
256
|
+
q = construct_query(source, region_segment_models, measure_models)
|
257
|
+
points = extract(q.all, region_segment_models, measure_models)
|
258
|
+
data += points
|
259
|
+
end
|
260
|
+
|
261
|
+
data
|
262
|
+
end
|
263
|
+
|
264
|
+
# run the queries and put the results in a Dataset
|
265
|
+
def collect
|
266
|
+
MDQuery::Dataset::Dataset.new(self, do_queries)
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|
data/lib/mdquery/util.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
module MDQuery
|
4
|
+
module Util
|
5
|
+
|
6
|
+
# assigns instance variable attributes
|
7
|
+
# to an object
|
8
|
+
# * +obj+ - the instance
|
9
|
+
# * +attrs+ - a map of {attr_name=>attr_value}
|
10
|
+
module_function
|
11
|
+
def assign_attributes(obj, attrs, permitted_keys = nil)
|
12
|
+
unknown_keys = attrs.keys.map(&:to_s).to_set - permitted_keys.map(&:to_s).to_set if permitted_keys
|
13
|
+
raise "unknown keys: #{unknown_keys.to_a.inspect}. permitted keys are: #{permitted_keys.inspect}" if unknown_keys && !unknown_keys.empty?
|
14
|
+
|
15
|
+
attrs.each do |attr,val|
|
16
|
+
obj.instance_variable_set("@#{attr}", val)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|