mdquery 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,5 @@
1
+ script: "JRUBY_OPTS=-J-Djruby.objectspace.enabled=true rake"
2
+ rvm:
3
+ - 1.8.7
4
+ - 1.9.3
5
+ - jruby
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem "activerecord", ">= 3.1.0"
4
+
5
+ group :development do
6
+ gem "rake", "~> 0.9.2"
7
+ gem "rspec", "~> 2.8.0"
8
+ gem "rdoc", "~> 3.12"
9
+ gem "bundler", "~> 1.1.0"
10
+ gem "jeweler", "~> 1.8.3"
11
+ # gem "rcov", ">= 0"
12
+ gem 'rr', ">= 1.0.4"
13
+ end
@@ -0,0 +1,53 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ activemodel (3.2.2)
5
+ activesupport (= 3.2.2)
6
+ builder (~> 3.0.0)
7
+ activerecord (3.2.2)
8
+ activemodel (= 3.2.2)
9
+ activesupport (= 3.2.2)
10
+ arel (~> 3.0.2)
11
+ tzinfo (~> 0.3.29)
12
+ activesupport (3.2.2)
13
+ i18n (~> 0.6)
14
+ multi_json (~> 1.0)
15
+ arel (3.0.2)
16
+ builder (3.0.0)
17
+ diff-lcs (1.1.3)
18
+ git (1.2.5)
19
+ i18n (0.6.0)
20
+ jeweler (1.8.3)
21
+ bundler (~> 1.0)
22
+ git (>= 1.2.5)
23
+ rake
24
+ rdoc
25
+ json (1.6.5)
26
+ json (1.6.5-java)
27
+ multi_json (1.1.0)
28
+ rake (0.9.2.2)
29
+ rdoc (3.12)
30
+ json (~> 1.4)
31
+ rr (1.0.4)
32
+ rspec (2.8.0)
33
+ rspec-core (~> 2.8.0)
34
+ rspec-expectations (~> 2.8.0)
35
+ rspec-mocks (~> 2.8.0)
36
+ rspec-core (2.8.0)
37
+ rspec-expectations (2.8.0)
38
+ diff-lcs (~> 1.1.2)
39
+ rspec-mocks (2.8.0)
40
+ tzinfo (0.3.32)
41
+
42
+ PLATFORMS
43
+ java
44
+ ruby
45
+
46
+ DEPENDENCIES
47
+ activerecord (>= 3.1.0)
48
+ bundler (~> 1.1.0)
49
+ jeweler (~> 1.8.3)
50
+ rake (~> 0.9.2)
51
+ rdoc (~> 3.12)
52
+ rr (>= 1.0.4)
53
+ rspec (~> 2.8.0)
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 mccraigmccraig
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,78 @@
1
+ = mdquery
2
+
3
+ {<img src="https://secure.travis-ci.org/mccraigmccraig/mdquery.png" />}[http://travis-ci.org/mccraigmccraig/mdquery]
4
+
5
+ Defines a DSL for specifying and executing segmented multi-dimensional queries on your ActiveRecord-3 models
6
+
7
+ == Installation
8
+
9
+ gem install mdquery
10
+
11
+ == Usage
12
+
13
+ A Dataset consists of some Measures over some Dimensions of a Source
14
+
15
+ * Measures are SQL aggregate functions to be computed, e.g. "count(*)" or "avg(age)"
16
+ * Each Dimension consists of 1 or more Segments, and each Segment either fixes a value for the Dimension or extracts values from the Source. Each Segment may narrow the Source scope according to the Dimension value or value-range for the Segment
17
+ * A Source is an ActiveRecord-3 Scope
18
+
19
+ Given the definition of the Dataset, it will query the Sources to extract data points, each of which will have a value for each Dimension and a value for each Measure
20
+
21
+ require 'mdquery'
22
+
23
+ Q = MDQuery.dataset do
24
+ source Foo
25
+
26
+ measure :count, "count(*)", :int
27
+
28
+ dimension :time do
29
+ label "Time"
30
+
31
+ segment(:all) do
32
+ fix_dimension :all
33
+ end
34
+
35
+ segment(:five_years) do
36
+ narrow{|scope| scope.where("foos.created_at > now() - interval '5 years'")}
37
+ extract_dimension "extract(year from foos.created_at)"
38
+ values{|scope| (Date.today.year-4..Date.today.year).to_a.map(&:to_s)}
39
+ label{|value| "Year: #{value}"}
40
+ end
41
+ end
42
+
43
+ dimension :users do
44
+ label "Users"
45
+
46
+ segment(:all) do
47
+ fix_dimension :all
48
+ end
49
+
50
+ segment(:by_type) do
51
+ extract_dimension "user_types.name"
52
+ narrow{|scope| scope.joins(:user_types)}
53
+ end
54
+ end
55
+ end
56
+
57
+ # run queries, collect data. returns an MDQuery::Dataset::Dataset
58
+ dataset = Q.collect
59
+
60
+ # retrieve dimension values, segment values and labels
61
+ dataset.dimensions[:time][:five_years].values # => ["2008", "2009", "2010", "2011", "2012"]
62
+ dataset.dimensions[:time].label_for("2008") #=> "Year: 2008"
63
+ dataset.datapoint({:time=>'2008', :users=>'all'}, :count) # => 100 # 100 users in 2008
64
+
65
+ == Contributing to mdquery
66
+
67
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
68
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
69
+ * Fork the project.
70
+ * Start a feature/bugfix branch.
71
+ * Commit and push until you are happy with your contribution.
72
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
73
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
74
+
75
+ == Copyright
76
+
77
+ Copyright (c) 2012 mccraigmccraig. See LICENSE.txt for
78
+ further details.
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "mdquery"
18
+ gem.homepage = "http://github.com/mccraigmccraig/mdquery"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{simple multi-dimensional queries on top of active-record-3}
21
+ gem.description = %Q{provides a DSL for simply specifying and executing segmented multi-dimensional queries on your active-record-3 models}
22
+ gem.email = "mccraigmccraig@gmail.com"
23
+ gem.authors = ["mccraigmccraig"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ # RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ # spec.pattern = 'spec/**/*_spec.rb'
36
+ # spec.rcov = true
37
+ # end
38
+
39
+ task :default => :spec
40
+
41
+ require 'rdoc/task'
42
+ Rake::RDocTask.new do |rdoc|
43
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "mdquery #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.3.0
@@ -0,0 +1,13 @@
1
+ $: << File.expand_path('..', __FILE__) if !$:.include?(File.expand_path('..', __FILE__))
2
+ require 'mdquery/dsl'
3
+
4
+ # a DSL for specifying analytic queries
5
+ module MDQuery
6
+
7
+ # define a Dataset with the DSL
8
+ # * +proc+ the DatasetDSL Proc
9
+ def self.dataset(&proc)
10
+ MDQuery::DSL::DatasetDSL.new(&proc).send(:build)
11
+ end
12
+
13
+ end
@@ -0,0 +1,283 @@
1
+ require 'mdquery/util'
2
+
3
+ module MDQuery
4
+ module Dataset
5
+
6
+ # describes a value on a segment of a dimension
7
+ class DimensionValue
8
+ # DimensionSegment this value belongs to
9
+ attr_reader :dimension_segment
10
+
11
+ # the value
12
+ attr_reader :value
13
+
14
+ # Optional label for the value
15
+ attr_reader :label
16
+
17
+ def initialize(dimension_segment, value, label)
18
+ @dimension_segment = dimension_segment
19
+ @value = value
20
+ @label = label
21
+ validate
22
+ end
23
+
24
+ def validate
25
+ raise "no dimension_segment!" if !dimension_segment
26
+ raise "no value!" if !value
27
+ end
28
+
29
+ def inspect
30
+ "#<DimensionValue: value=#{value.inspect}, label=#{label.inspect}>"
31
+ end
32
+ end
33
+
34
+ # describes a segment of a dimension, a segment being some part of
35
+ # the dimension value line. Dimension values should not be present
36
+ # in more than one segment of a dimension, or results will be
37
+ # unexpected, though it's fine for aggregate values to be present
38
+ # which cover the same range as other values. e.g. having values of
39
+ # "jan", "feb", "march"... in one segment and "q1","q2","q3","q4"
40
+ # in another segment is fine
41
+ class DimensionSegment
42
+ # Dimension this Segment belongs to
43
+ attr_reader :dimension
44
+
45
+ # key of segment, unique within Dimension
46
+ attr_reader :key
47
+
48
+ # ordered list of DimensionValues in segment
49
+ attr_reader :dimension_values
50
+
51
+ # ordered list of all values in segment
52
+ attr_reader :values
53
+
54
+ def initialize(model, dimension)
55
+ @dimension = dimension
56
+ @key = model.key
57
+
58
+ @values = model.get_values(dimension.dataset.model.source)
59
+
60
+ label_index = model.labels(@values)
61
+ @dimension_values = @values.map{|v| DimensionValue.new(self, v, label_index[v]) }
62
+ @dimension_value_index = @dimension_values.reduce({}){|dvi,dv| dvi[dv.value] = dv ; dvi}
63
+
64
+ validate
65
+ end
66
+
67
+ def validate
68
+ raise "no dimension!" if !dimension
69
+ raise "no key!" if !key
70
+ raise "no values!" if !values
71
+ end
72
+
73
+ def inspect
74
+ "#<DimensionSegment: key=#{key.inspect}, dimension_values=#{dimension_values.inspect}>"
75
+ end
76
+
77
+ # retrieve a DimensionValue describing the given +value+
78
+ def dimension_value_for(value)
79
+ @dimension_value_index[value]
80
+ end
81
+
82
+ # retrieve a DimensionValue describing the given +value+
83
+ def [](value)
84
+ dimension_value_for(value)
85
+ end
86
+
87
+ # retrieve a label describing the given +value+
88
+ def label_for(value)
89
+ (dv = dimension_value_for(value)) && dv.label
90
+ end
91
+ end
92
+
93
+ # describes a Dimension consisting of one or more segments
94
+ class Dimension
95
+ # Dataset this Dimension belongs to
96
+ attr_reader :dataset
97
+
98
+ # key for this Dimension
99
+ attr_reader :key
100
+
101
+ # Optional label of the Dimension
102
+ attr_reader :label
103
+
104
+ # ordered list of one or more DimensionSegments
105
+ attr_reader :segments
106
+
107
+ # an ordered list of values for the dimension. May be static or
108
+ # extracted from the data source, depending on DimensionSegment
109
+ # definitions. It is the concatentation of the +values+ from each
110
+ # DimensionSegment in the Dimension
111
+ attr_reader :values
112
+
113
+ def initialize(model, dataset)
114
+ @dataset = dataset
115
+ @key = model.key
116
+ @label = model.label
117
+
118
+ @segments = model.segment_models.map{|sm| DimensionSegment.new(sm, self) }
119
+ @segment_index = @segments.reduce({}){|si, s| si[s.key] = s ; si}
120
+
121
+ @values = segments.map(&:values).reduce(&:+)
122
+ @dimension_value_index = segments.map(&:dimension_values).reduce(&:+).reduce({}){|dvi,dv| dvi[dv.value] = dv ; dvi}
123
+
124
+ validate
125
+ end
126
+
127
+ def validate
128
+ raise "no dataset!" if !dataset
129
+ raise "no key!" if !key
130
+ raise "no segments!" if !segments || segments.empty?
131
+ end
132
+
133
+ def inspect
134
+ "#<Dimension: key=#{key.inspect}, label=#{label.inspect}, segments=#{segments.inspect}>"
135
+ end
136
+
137
+ # lookup a segment by +key+
138
+ def segment(key)
139
+ @segment_index[key]
140
+ end
141
+
142
+ # lookup a segment by +key+
143
+ def [](key)
144
+ segment(key)
145
+ end
146
+
147
+ # return an ordered list of values for 0 or more segments.
148
+ # * +segment_keys+ a list of segment keys. if empty, methods returns +values+,
149
+ # otherwise returns the concatentation of +values+ for each identified segment
150
+ def values_for_segments(segment_keys)
151
+ if segment_keys && !segment_keys.empty?
152
+ segment_keys.map{|sk| segment(sk)}.map(&:values).reduce(&:+)
153
+ else
154
+ values
155
+ end
156
+ end
157
+
158
+ # return an ordered list of DimensionValues for 0 or more segments.
159
+ # * +segment_keys+ a list of segment keys. if empty, methods return all DimensionValues
160
+ # for all segments, otherwise returns the concatenation of DimensionValues for
161
+ # each identified segment
162
+ def dimension_values_for_segments(segment_keys)
163
+ if segment_keys && !segment_keys.empty?
164
+ segment_keys.map{|sk| segment(sk)}.map(&:dimension_values).reduce(&:+)
165
+ else
166
+ dimension_values
167
+ end
168
+ end
169
+
170
+ def dimension_values
171
+ segments.map(&:dimension_values).reduce(&:+)
172
+ end
173
+
174
+ # the DimensionValue describing +value+ or nil
175
+ def dimension_value_for(value)
176
+ @dimension_value_index[value]
177
+ end
178
+
179
+ # the label for the +value+ or nil
180
+ def label_for(value)
181
+ (dv = dimension_value_for(value)) && dv.label
182
+ end
183
+ end
184
+
185
+ # describes a Measure computed from the source data over the Dimensions
186
+ class Measure
187
+ # the +dataset+ this Measure belongs to
188
+ attr_reader :dataset
189
+
190
+ # the +key+ identifying this Measure
191
+ attr_reader :key
192
+
193
+ # the SQL fragment definition of the Measure
194
+ attr_reader :definition
195
+
196
+ def initialize(model, dataset)
197
+ @dataset = dataset
198
+ @key = model.key
199
+ @definition = model.definition
200
+ validate
201
+ end
202
+
203
+ def validate
204
+ raise "no dataset" if !dataset
205
+ raise "no key!" if !key
206
+ raise "no definition!" if !definition || definition=~/^\s*$/
207
+ end
208
+
209
+ def inspect
210
+ "#<Measure: key=#{key.inspect}, definition=#{definition.inspect}>"
211
+ end
212
+ end
213
+
214
+ # a Dataset is defined over a number of Dimensions with a number of Measures.
215
+ #
216
+ class Dataset
217
+ # the +Model+ describing how the +Dataset+ is to be assembled
218
+ attr_reader :model
219
+
220
+ # a list of points. each point is a Hash with a value for each +Dimension+ and a value for each +Measure+.
221
+ # keys are as given in the +Dimension+ and +Measure+ objects
222
+ attr_reader :data
223
+
224
+ # a Hash of +Dimensions+ keyed by their +keys+
225
+ attr_reader :dimensions
226
+
227
+ # a Hash of +Measures+ keyed by their +keys+
228
+ attr_reader :measures
229
+
230
+ # index of points from +data+, where key is a Hash of all Dimension {key=>value} pairs, and value is all Measure {key=>value} pairs
231
+ attr_reader :indexed_data
232
+
233
+ def initialize(model, data)
234
+ @model = model
235
+ @data = data
236
+
237
+ @measures = model.measure_models.map{|mm| Measure.new(mm, self) }.reduce({}){|mi,m| mi[m.key] = m ; mi}
238
+ @dimensions = model.dimension_models.map{|dm| Dimension.new(dm, self) }.reduce({}){|di,d| di[d.key] = d ; di}
239
+
240
+ validate
241
+ index
242
+ end
243
+
244
+ def validate
245
+ raise "no model!" if !model
246
+ raise "no data!" if !data
247
+ raise "no dimensions!" if !dimensions || dimensions.empty?
248
+ raise "no measures!" if !measures || measures.empty?
249
+ end
250
+
251
+ def inspect
252
+ "#<Dataset: dimensions=#{dimensions.inspect}, measures=#{measures.inspect}, data=#{data.inspect}>"
253
+ end
254
+
255
+ # retrieve a datapoint given a hash of {dimension_key=>dimension_values}
256
+ def datapoint(dimension_values, measure)
257
+ d = @indexed_data[dimension_values]
258
+ d[measure] if d
259
+ end
260
+
261
+ private
262
+
263
+ def index_key(point)
264
+ Hash[dimensions.keys.map{|k| [k, point[k]]}]
265
+ end
266
+
267
+ def index_data(point)
268
+ pc = point.clone
269
+ dks = dimensions.keys
270
+ pc.delete_if{|k,v| dks.include?(k)}
271
+ pc
272
+ end
273
+
274
+ def index
275
+ @indexed_data = {}
276
+ data.each do |point|
277
+ @indexed_data[index_key(point)] = index_data(point)
278
+ end
279
+ @indexed_data
280
+ end
281
+ end
282
+ end
283
+ end