aggrobot 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +8 -8
  2. data/Gemfile +3 -0
  3. data/README.md +11 -1
  4. data/aggrobot.gemspec +10 -2
  5. data/aggrobot_test +0 -0
  6. data/asmemory +0 -0
  7. data/lib/aggrobot.rb +7 -3
  8. data/lib/aggrobot/aggregator.rb +18 -5
  9. data/lib/aggrobot/aggrobot.rb +24 -13
  10. data/lib/aggrobot/errors.rb +6 -0
  11. data/lib/aggrobot/query_planner.rb +8 -19
  12. data/lib/aggrobot/query_planner/agg.rb +28 -0
  13. data/lib/aggrobot/query_planner/bucketed_groups_query_planner.rb +18 -15
  14. data/lib/aggrobot/query_planner/default_query_planner.rb +27 -8
  15. data/lib/aggrobot/query_planner/group_limit_query_planner.rb +30 -10
  16. data/lib/aggrobot/query_planner/parameters_validator.rb +33 -0
  17. data/lib/aggrobot/sql_functions.rb +23 -50
  18. data/lib/aggrobot/sql_functions/common.rb +65 -0
  19. data/lib/aggrobot/sql_functions/mysql.rb +6 -0
  20. data/lib/aggrobot/sql_functions/pgsql.rb +6 -0
  21. data/lib/aggrobot/sql_functions/sqlite.rb +6 -0
  22. data/lib/aggrobot/version.rb +1 -1
  23. data/pbcopy +1 -0
  24. data/spec/factories.rb +0 -0
  25. data/spec/factories/users.rb +4 -0
  26. data/spec/spec_helper.rb +49 -7
  27. data/spec/support/factory_robot/distribution_evaluator.rb +44 -0
  28. data/spec/support/factory_robot/factory_robot.rb +108 -0
  29. data/spec/support/user.rb +2 -0
  30. data/spec/unit/aggrobot/query_planners/bucketed_groups_query_planner_spec.rb +42 -55
  31. data/spec/unit/aggrobot/query_planners/default_query_planner_spec.rb +45 -21
  32. data/spec/unit/aggrobot/query_planners/group_limit_query_planner_spec.rb +69 -70
  33. data/spec/unit/aggrobot/sql_functions_spec.rb +20 -19
  34. metadata +97 -7
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- YjhjNWE3NWU3M2IxOWQxZWIxMjIwYTVjMmQ5Mjc5ZDFmYTRhZTI5Yg==
4
+ MDBhY2U5ZGY4YWYzZTNmMjE5MWFlNmQ4OGYyYWY5Y2M4YWE3MWUwMw==
5
5
  data.tar.gz: !binary |-
6
- OTc4YjYzNjlmMjdlMjIzNzY4MTcwZTA5YWUwNjk2MTkyMzM4M2Q5MQ==
6
+ MjZhMzBmMTFhZTg1MmE5ZWZhMGNlMjI5MGQwZTY2NWYxMTk0MWM0MA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- ODk4NjY1NGJjZTIxMWZjMWZhODk4OTI4YTBhYmFiMzRjMDM1NjkzOTM4YTJi
10
- OTdkYTA1MjEwNTBhODk2OGQwODU0NDIyNzA5YzI5OWQ3OTJlN2I4MWI4ZjMw
11
- NzczYjY1OWRlYzY4ZDI5ZjE1MDcwMmZkZTdkYTYyMzAwYzgyNTE=
9
+ ZjhhMzcyMmU0ZjU4MThhNmY4NGI5MDI5MmQ4YWNkMzMwMDg1M2Y4ZDlhM2Yw
10
+ MzNjNWNkY2RlNWUzNDc2YTViNTVlM2I1YjUxYjJkN2M2OTM3ODEwMjJmNjQ0
11
+ NjE3YTc1YTZkM2FmMjZlYjQ4YmFkYjljY2IxYWYxNzAzNGNlNjE=
12
12
  data.tar.gz: !binary |-
13
- YTA2MjhlMzI0ZjU1YzY4NWRlYWE3Y2VhMjRiOGJlNzA5YjljMWJhYWM1MGY4
14
- MjZmZTA0MjEwOGNkNmZiMDc4ZWJmMmIwOWNjNTZkMjQ5MTgxYTFkMWRmZTcx
15
- NjZjNDA1YzcxODgzYTRkZmY2N2VkZjU4OGJlMDI3YTU3YWI4ZTM=
13
+ ZjY4Y2IwYzBkZmJkNDg3MjRmMzBkNGM1ZGNjODkxNjgyNDdhYTdkMmI5YTEz
14
+ NDI4N2ZhZGUzMjZjMWJmNmQ1MWRhYTkwOTA5OWM3ZGI2ZWVlYTI0NjI4Njdh
15
+ YmMyZTMxMTBhNGI4MDg0YzZhOTllNmRiY2MwMmVkYjM2MDVhMWI=
data/Gemfile CHANGED
@@ -1,8 +1,11 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in aggrobot.gemspec
4
+ gem 'activerecord'
5
+
4
6
  gemspec
5
7
 
8
+
6
9
  group :test do
7
10
  gem 'guard-rspec', require: false
8
11
  gem 'growl', require: false
data/README.md CHANGED
@@ -1,6 +1,15 @@
1
1
  # Aggrobot
2
2
 
3
- TODO: Write a gem description
3
+ Aggrobot is an aggregation framework in Ruby. It provides a powerful DSL to perform aggregations over large dataset. It has been tested to work with MySQL, Postgres and SQLite.
4
+
5
+ Many other features are provided:
6
+ * Bucketing over data ranges. For e.g. grouping orders over price ranges like 100-200, 200-300 and 300+
7
+ * Grouping over top **n groups**. For e.g. group top 2 selling products based on quantity, while group rest of the products in a single bucket of **others**
8
+ * Sub-aggregations (avg, sum, add, multiply, divide, percent, etc) on columns are also provided.
9
+
10
+ All the aggregations are calculated in the database and only the aggregated data is sent over to Ruby, to keep it performant. This greatly speeds up performance and reduces the memory requirement, had the aggregations been done directly in Ruby.
11
+
12
+ Aggrobot also allows nested aggregations and each level of aggregation can be passed around as a code block and used in higher level aggregations. This provides great amount of code-reuse.
4
13
 
5
14
  ## Installation
6
15
 
@@ -19,6 +28,7 @@ Or install it yourself as:
19
28
  ## Usage
20
29
 
21
30
  TODO: Write usage instructions here
31
+ TODO: Write example usage here
22
32
 
23
33
  ## Contributing
24
34
 
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
10
10
  spec.email = ['shadab.ansari@gmail.com']
11
11
  spec.description = %q{Easy and performant aggregation for rails}
12
12
  spec.summary = %q{Rails aggregation library}
13
- spec.homepage = ''
13
+ spec.homepage = 'https://github.com/shadabahmed/aggrobot'
14
14
  spec.license = 'MIT'
15
15
 
16
16
  spec.files = `git ls-files`.split($/)
@@ -18,9 +18,17 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ['lib']
20
20
 
21
- spec.add_runtime_dependency 'rails', '~> 4.0'
21
+ spec.add_dependency 'rails', '>=4.0'
22
+ spec.add_dependency 'activerecord', '>=4.0'
23
+ spec.add_dependency 'arel'
24
+
25
+ spec.required_ruby_version = '>= 1.9.3'
26
+
22
27
  spec.add_development_dependency 'bundler', '~> 1.3'
28
+ spec.add_development_dependency 'sqlite3'
23
29
  spec.add_development_dependency 'rake'
24
30
  spec.add_development_dependency('rspec', ['~> 2.14.1'])
25
31
  spec.add_development_dependency('rdoc')
32
+ spec.add_development_dependency('factory_girl')
33
+ spec.add_development_dependency('database_cleaner')
26
34
  end
Binary file
Binary file
@@ -1,3 +1,4 @@
1
+ require 'active_record'
1
2
  require 'aggrobot/railtie'
2
3
  require 'active_support/core_ext/module/delegation.rb'
3
4
  require 'active_support/core_ext/hash/indifferent_access'
@@ -9,11 +10,13 @@ require 'aggrobot/query_planner'
9
10
  require 'aggrobot/aggregator'
10
11
  require 'aggrobot/aggrobot'
11
12
 
12
-
13
13
  module Aggrobot
14
14
 
15
- DEFAULT_GROUP_BY = SqlFunctions.sanitize('aggrobot_default_group')
15
+ DEFAULT_GROUP_BY = 1 #'aggrobot_default_group'
16
16
 
17
+ # when
18
+ # collection is given, starts aggregation by evaluating block on collection
19
+ # collection is not given, starts aggregation by evaluating block on full data set
17
20
  def self.start(collection = nil, block_arg = nil, block_opts = nil, &block)
18
21
  block_opts ||= block_arg if block
19
22
  block = block_arg if block_arg && block_arg.respond_to?(:call)
@@ -29,8 +32,9 @@ module Aggrobot
29
32
  block
30
33
  end
31
34
 
35
+ # sets ROUNDING_DIGITS to percent_precision, default is 2
32
36
  def self.setup(app)
33
- SqlFunctions.const_set(:ROUNDING_DIGITS, app.config.aggrobot.percent_precision || 2)
37
+ SQLFunctions.setup(app.config.aggrobot.percent_precision)
34
38
  end
35
39
 
36
40
  end
@@ -11,7 +11,7 @@ module Aggrobot
11
11
  self.collection(collection) if collection
12
12
  end
13
13
 
14
-
14
+ # returns hash of group label(s) as key and actual column(s) as value
15
15
  def group_labels(map = nil, &block)
16
16
  if map || block
17
17
  if map.is_a?(Hash)
@@ -24,16 +24,25 @@ module Aggrobot
24
24
  end
25
25
  end
26
26
 
27
+ # returns collection if it is ActiveRecord::Relation or ActiveRecord::Base
28
+ # raises error when collection is none of the above
29
+ # returns @collection otherwise (which is nil)
27
30
  def collection(values = nil)
28
31
  if values
29
- raise_error 'Collection should be an ActiveRecord::Relation or ActiveRecord::Base' unless
30
- [ActiveRecord::Relation, ActiveRecord::Base].any?{|m| values.is_a?(m) }
32
+ if !values.is_a?(ActiveRecord::Relation) && values < ActiveRecord::Base
33
+ values = values.unscoped
34
+ end
35
+ raise_error 'Collection should be an ActiveRecord::Relation or ActiveRecord::Base' unless values.is_a?(ActiveRecord::Relation)
31
36
  @collection = values
32
37
  else
33
38
  @collection
34
39
  end
35
40
  end
36
41
 
42
+ # when
43
+ # : opts is nil, groups by group on @collection
44
+ # : opts is a map as {limit_to: limit}, creats groups by group on @collection with a limit
45
+ # : opts is a map as {buckets: [list_items]}, creats groups by [list_items] on @collection
37
46
  def group_by(group, opts = nil)
38
47
  raise_error "Group_by takes only symbol or a string as argument" unless group.is_a?(Symbol) or group.is_a?(String)
39
48
  @query_planner = QueryPlanner.create(@collection, group, opts)
@@ -50,7 +59,11 @@ module Aggrobot
50
59
  end
51
60
  end
52
61
 
53
- def set(name = nil, opts)
62
+ # creates attribute map
63
+ # when:
64
+ # given as hash, sets all keys as attributes to show and values as columns to fetch
65
+ # given as list (of 2 items), first item is key to show and second item is column to fetch
66
+ def select(name = nil, opts)
54
67
  if opts.is_a? Hash
55
68
  @attribute_mapping.merge!(opts)
56
69
  elsif name && opts
@@ -74,7 +87,7 @@ module Aggrobot
74
87
  private
75
88
 
76
89
  def extra_columns
77
- @attribute_mapping.values
90
+ @attribute_mapping.map{|k, v| "#{v} as #{k}"}
78
91
  end
79
92
 
80
93
  def extra_attributes
@@ -1,13 +1,13 @@
1
1
  module Aggrobot
2
2
  class Aggrobot
3
-
4
- include SqlFunctions
5
3
  include Helper
6
4
 
7
- delegate :collection, :group_by, :default_groups, :override, :set, :group_labels, :to => :@aggregator
5
+ delegate :collection, :group_by, :default_groups, :override, :select, :group_labels, :to => :@aggregator
6
+ delegate :count,:sanitize, :desc, :asc, :count, :unique_count, :max, :min, :sum, :avg, :average,
7
+ :group_collect, :percent, :multiply, :divide, :to => SQLFunctions
8
8
 
9
- def run(block)
10
- instance_eval(&block)
9
+ def run(block, args = {})
10
+ instance_exec(args, &block)
11
11
  end
12
12
 
13
13
  def initialize(caller_context, collection = nil)
@@ -19,18 +19,21 @@ module Aggrobot
19
19
  @caller_context.send method, *args, &block
20
20
  end
21
21
 
22
+ # creates top level data structure as hash and call block to process further
22
23
  def hash(collection = nil, opts = {}, &block)
23
24
  self.collection(collection) if collection
24
25
  @top_level_object = ActiveSupport::HashWithIndifferentAccess.new
25
26
  proceed(block, opts)
26
27
  end
27
28
 
29
+ # creates top level data structure as array and call block to process further
28
30
  def list(collection = nil, opts = {}, &block)
29
31
  self.collection(collection) if collection
30
32
  @top_level_object = []
31
33
  proceed(block, opts)
32
34
  end
33
35
 
36
+ # sets default/current values to top_level_object hash/list
34
37
  def default(default_val = nil, &block)
35
38
  block = block_from_args(default_val, block, false)
36
39
  default_val = ::Aggrobot.start(collection, &block) if block
@@ -39,6 +42,7 @@ module Aggrobot
39
42
 
40
43
  alias set_current_value default
41
44
 
45
+ # sets default group attrs as a hash, if opts is passed as param
42
46
  def default_group_attrs(opts = nil)
43
47
  if opts
44
48
  raise_error 'Arguments must be a hash' unless opts.is_a?(Hash)
@@ -48,10 +52,15 @@ module Aggrobot
48
52
  end
49
53
  end
50
54
 
55
+ alias default_values default_group_attrs
56
+
57
+ # returns top level object hash/list
51
58
  def current_value
52
59
  @top_level_object
53
60
  end
54
61
 
62
+ # starts aggrobot on collection and block, when block is given and
63
+ # adds {attribute: value} pair to the top level object
55
64
  def attr(attribute, value = nil, &block)
56
65
  block = block_from_args(value, block, false)
57
66
  raise_error 'attr can only be used with a hash type' unless @top_level_object.is_a?(Hash)
@@ -61,27 +70,29 @@ module Aggrobot
61
70
  @top_level_object[attribute] = value
62
71
  end
63
72
 
73
+ # gets attribute's value from top level object, only works when top level is hash
64
74
  def get_attr(attribute)
65
75
  @top_level_object[attribute]
66
76
  end
67
77
 
68
- def collect_each_group_attributes
69
- each_group do |attr|
70
- attr
71
- end
72
- end
73
-
74
78
  def each_group(block_arg = nil, &block)
75
- block = block_from_args(block_arg, block)
79
+ block = block_from_args(block_arg, block, false)
76
80
  @aggregator.yield_results do |attrs, group_name, sub_collection|
77
81
  attrs = @default_group_attrs.merge(attrs) if @default_group_attrs
78
82
  block_value = ::Aggrobot.start(sub_collection) do
79
- instance_exec(attrs, &block)
83
+ if block
84
+ instance_exec(attrs, &block)
85
+ else
86
+ attrs
87
+ end
80
88
  end
81
89
  update_top_level_obj(group_name, block_value)
82
90
  end
83
91
  end
84
92
 
93
+ alias iterate each_group
94
+ alias recurse each_group
95
+
85
96
  def evaluate(block_arg = nil, &block)
86
97
  block = block_from_args(block_arg, block)
87
98
  list(&block).first
@@ -0,0 +1,6 @@
1
+ module Aggrobot
2
+ class Error < StandardError
3
+ end
4
+ class ArgumentError < ::ArgumentError
5
+ end
6
+ end
@@ -1,36 +1,25 @@
1
+ require 'aggrobot/query_planner/parameters_validator'
1
2
  require 'aggrobot/query_planner/default_query_planner'
2
3
  require 'aggrobot/query_planner/group_limit_query_planner'
3
4
  require 'aggrobot/query_planner/bucketed_groups_query_planner'
4
5
 
6
+
7
+ # plans queries in a Aggrobot
5
8
  module Aggrobot::QueryPlanner
6
9
 
10
+ # creates query object
7
11
  def self.create(collection, group_by, opts = nil)
8
12
  case
9
13
  when opts.nil?
10
14
  DefaultQueryPlanner.new(collection, group_by)
11
15
  when opts.key?(:limit_to)
16
+ # GROUP attrs by 'group_by' with limit
12
17
  GroupLimitQueryPlanner.new(collection, group_by, opts)
13
18
  when opts.key?(:buckets)
19
+ # GROUP attrs by 'group_by' in buckets of opts[:buckets], e.g. 1..100, 101..200 etc
14
20
  BucketedGroupsQueryPlanner.new(collection, group_by, opts)
21
+ else
22
+ raise ArgumentError.new "Invalid options to group_by : #{opts}"
15
23
  end
16
24
  end
17
-
18
- module ParametersValidator
19
- def self.validate_options(opts, required_parameters, optional_parameters)
20
- params = opts.keys
21
- # raise errors for required parameters
22
- raise_argument_error(opts, required_parameters, optional_parameters) unless (required_parameters - params).empty?
23
- # raise errors if any extra arguments given
24
- raise_argument_error(opts, required_parameters, optional_parameters) unless (params - required_parameters - optional_parameters).empty?
25
- end
26
-
27
- def self.raise_argument_error(opts, required_parameters, optional_parameters)
28
- raise ArgumentError, <<-ERR
29
- Wrong arguments given - #{opts}
30
- Required parameters are #{required_parameters}
31
- Optional parameters are #{optional_parameters}
32
- ERR
33
- end
34
- end
35
-
36
25
  end
@@ -0,0 +1,28 @@
1
+
2
+ route = Route.first
3
+ Infinity = (1.0/0)
4
+
5
+ def los_labels
6
+ ->(label){
7
+ range = label.match(/(?<begin>\-?\d+)?(?<separator>[\-\<])?(?<end>\-?\d+)?/)
8
+ case range[:separator]
9
+ when '<'
10
+ "#{range[:end]}+"
11
+ when '-'
12
+ "#{range[:begin]} to #{range[:end]}"
13
+ else
14
+ label
15
+ end
16
+ }
17
+ end
18
+
19
+ los_agg = Aggrobot.start(route.passengers) do |attrs|
20
+ hash do
21
+ group_by :length_of_stay, limit_to: 4, sort_by: count, other_group: 'others'
22
+ set advance: :ap, ptype: :pax_type, pct: percent(count, attrs[:count])
23
+
24
+ each_group do |attrs|
25
+ attrs
26
+ end
27
+ end
28
+ end
@@ -3,19 +3,21 @@ module Aggrobot
3
3
  class BucketedGroupsQueryPlanner < DefaultQueryPlanner
4
4
 
5
5
  def initialize(collection, group, opts = {})
6
- ParametersValidator.validate_options(opts, [:buckets], [:keep_empty])
6
+ required_params = [:buckets]
7
+ optional_params = [:keep_empty]
8
+ validate_options(opts, required_params, optional_params)
7
9
  raise_error 'Need to set group first' unless group
8
10
  super(collection, group)
9
11
  create_query_map(opts[:buckets])
10
12
  @keep_empty = opts[:keep_empty]
11
13
  end
12
14
 
13
- def sub_query(group_name)
14
- @query_map[group_name]
15
+ def sub_query(group_value)
16
+ @query_map[group_value]
15
17
  end
16
18
 
17
19
  def query_results(extra_cols = [])
18
- return empty_default_groups if collection_is_none?
20
+ return empty_buckets if collection_is_none?
19
21
  results = collect_query_results(extra_cols)
20
22
  results.reject! { |r| r[1] == 0 } unless @keep_empty
21
23
  results
@@ -24,24 +26,25 @@ module Aggrobot
24
26
  private
25
27
 
26
28
  def collect_query_results(extra_cols)
27
- columns = ['', SqlFunctions.count] + extra_cols
28
- @query_map.collect do |group_name, query|
29
- sanitized_group_name = SqlFunctions.sanitize(group_name)
30
- columns[0] = sanitized_group_name
31
- results = query.group(sanitized_group_name).limit(1).pluck(*columns).first
32
- @query_map[group_name] = @query_map[group_name].none unless results
33
- results || [group_name, 0]
29
+ columns = [SQLFunctions.count] + extra_cols
30
+ @query_map.collect do |group_value, query|
31
+ results = query.limit(1).pluck(*columns).flatten
32
+ if results[0] == 0
33
+ @query_map[group_value] = @query_map[group_value].none
34
+ results = [0]
35
+ end
36
+ results.unshift(group_value)
34
37
  end
35
38
  end
36
39
 
37
- def empty_default_groups
40
+ def empty_buckets
38
41
  @keep_empty ? @query_map.keys.collect { |k| [k, 0] } : []
39
42
  end
40
43
 
41
- def create_query_map(groups)
44
+ def create_query_map(buckets)
42
45
  @query_map = {}
43
- groups.each do |group|
44
- @query_map[group.to_s] = @collection.where(@group => group)
46
+ buckets.each do |bucket|
47
+ @query_map[bucket] = @collection.where(group_condition(bucket))
45
48
  end
46
49
  end
47
50
 
@@ -1,23 +1,43 @@
1
1
  module Aggrobot
2
2
  module QueryPlanner
3
3
  class DefaultQueryPlanner
4
- include Aggrobot::Helper
4
+ include ParametersValidator
5
5
 
6
- def initialize(collection, group)
7
- @collection, @group = collection, group
6
+ def initialize(collection, group = DEFAULT_GROUP_BY)
7
+ @collection, @group = validate_and_extract_relation(collection), group
8
8
  end
9
9
 
10
- def sub_query(group_name)
11
- @group == DEFAULT_GROUP_BY ? @collection : @collection.where(@group => group_name)
10
+ def sub_query(group_value)
11
+ if @group == DEFAULT_GROUP_BY
12
+ @collection
13
+ else
14
+ @collection.where(group_condition(group_value))
15
+ end
12
16
  end
13
17
 
14
18
  def query_results(extra_cols = [])
15
19
  return [] if collection_is_none?
16
- columns = [@group, SqlFunctions.count] + extra_cols
17
- results_query.pluck(*columns)
20
+ if @group.is_a? Array
21
+ columns = @group + [SQLFunctions.count] + extra_cols
22
+ results_query.pluck(*columns).collect do |result_row|
23
+ [result_row[0..(@group.count - 1)]] + result_row[@group.count..-1]
24
+ end
25
+ else
26
+ columns = [@group, SQLFunctions.count] + extra_cols
27
+ results_query.pluck(*columns)
28
+ end
18
29
  end
19
30
 
20
31
  protected
32
+
33
+ def group_condition(group_value)
34
+ if @group.is_a?(Array)
35
+ Hash[@group.zip(group_value)]
36
+ else
37
+ {@group => group_value}
38
+ end
39
+ end
40
+
21
41
  def results_query
22
42
  @result_query ||= @collection.group(@group)
23
43
  end
@@ -25,7 +45,6 @@ module Aggrobot
25
45
  def collection_is_none?
26
46
  @collection.extending_values.include?(ActiveRecord::NullRelation)
27
47
  end
28
-
29
48
  end
30
49
  end
31
50
  end