reduceable 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ pkg/*
2
+ *.gem
3
+ .bundle
4
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source :rubygems
2
+ gem "mongo_mapper", "~>0.9.0"
3
+ gem "bson_ext"
4
+ gem "activesupport", "~>3.0.7"
5
+
6
+ source :gemcutter
7
+
8
+ # Specify your gem's dependencies in the .gemspec file
9
+ gemspec
@@ -0,0 +1,99 @@
1
+ Reducable
2
+ =========
3
+
4
+ This is a module for MongoMapper which provides an easy way to add in some simple
5
+ map/reduce functions to your data. If you have time series data and you want to show
6
+ some sort of counter per date or time, then this should do it.
7
+
8
+ Concept
9
+ -------
10
+ You have a bunch of objects in your MongoDB. You need to get some basic information
11
+ about them such as:
12
+ Simple aggregation of documents per key,
13
+ Finding an average of a value,
14
+ Counting the number of documents that contain a key.
15
+
16
+ You've probably read you can do this sort of stuff with MongoDB's map/reduce
17
+ functionality, maybe you already know exactly how that works or maybe you don't
18
+ really have a clue. Every guide I've seen for MongoMapper recommends you execute
19
+ the map/reduce calculation every single time it's accessed, and they all demand
20
+ that you write your own map and reduce functions.
21
+
22
+ Here are some use cases
23
+
24
+ ```ruby
25
+ # Count how many times each tag is used
26
+ Model.count_by(:tag, query = {})
27
+ # Sum all the weights of the different types of wresters
28
+ Model.sum_of(:weight, :wrester_type, query = {})
29
+ ```
30
+
31
+ Coming Soon
32
+ ----------
33
+ + mongoid support
34
+ + Sum by composite index
35
+ + Averages
36
+ + More Unit Tests :(
37
+
38
+ Installation
39
+ ------------
40
+ ```
41
+ gem install reduceable
42
+ # or
43
+ sudo gem install reduceable
44
+ ```
45
+
46
+ Usage
47
+ -----
48
+ ```ruby
49
+ require 'mongo_mapper'
50
+ require 'reduceable'
51
+
52
+ MongoMapper.database = 'my_database_name'
53
+
54
+ class BlogPost
55
+ include MongoMapper::Document
56
+ include Reduceable
57
+
58
+ key :article_body, String
59
+ key :categories, Array
60
+ key :time_posted, Time
61
+ key :article_length, Integer
62
+ end
63
+
64
+ # Insert some data
65
+
66
+ BlogPost.count_by(:categories).to_a.each do |x|
67
+ puts "You have posted #{x['value']} posts from catefory #{x['_id']}"
68
+ end
69
+ BlogPost.sum_of(:article_length, :categories).to_a.each do |x|
70
+ puts "You have written #{x['value']} characters in category #{x['_id']}"
71
+ end
72
+ ```
73
+
74
+ See example.rb
75
+
76
+ ```ruby
77
+ # require the example model
78
+ require './example.rb' #=> true
79
+ # setup some base data
80
+ setup #=> #<Test _id: BSON: ......
81
+ #
82
+ # Calculate how many times each tag is used
83
+ # You will use a similar map/reduce for a tag cloud
84
+ Test.count_by(:tags).to_a
85
+ #=> [{"_id"=>"alternative", "value"=>1.0}, {"_id"=>"book", "value"=>5.0}, {"_id"=>"classical", "value"=>1.0}, {"_id"=>"fantasy", "value"=>2.0}, {"_id"=>"fiction", "value"=>2.0}, {"_id"=>"music", "value"=>4.0}, {"_id"=>"non-fiction", "value"=>1.0}, {"_id"=>"pop", "value"=>1.0}, {"_id"=>"rock", "value"=>1.0}]
86
+
87
+ # Sum up the sale_amounts per tag
88
+ Test.sum_of(:sale_amount, :tags).to_a
89
+
90
+ # Sum up the sale_amounts per tag where tags contains 'book'
91
+ Test.sum_of(:sale_amount, :tags, {:tags => 'book'}).to_a
92
+ # you can optionally pass in a mongo query that limits the initial dataset being
93
+ # fed to the map function.
94
+ ```
95
+
96
+ For such a small collection the speed benefits aren't present, but once you get to
97
+ several hundred thousand record, recreating the map_reduce collection on every call
98
+ really slows things down. Reduceable solves that problem.
99
+
@@ -0,0 +1,15 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:core) do |spec|
7
+ spec.pattern = 'spec/**/*_spec.rb'
8
+ spec.rspec_opts = %w(-fs --color --format documentation)
9
+ end
10
+
11
+ Dir["tasks/*.rake"].each do |rake_file|
12
+ load rake_file
13
+ end
14
+
15
+ task :default => [:core]
@@ -0,0 +1,29 @@
1
+ require 'mongo_mapper'
2
+ require 'reduceable'
3
+
4
+ MongoMapper.database = 'mr_test'
5
+
6
+ def setup
7
+ # add some data
8
+ Test.collection.remove
9
+ Test.create(:date => Date.today, :tags => ['book', 'fiction'], :sale_amount => 40)
10
+ Test.create(:date => Date.today, :tags => ['music', 'rock'], :sale_amount => 20.5)
11
+ Test.create(:date => Date.today, :tags => ['music', 'pop'], :sale_amount => 20.5)
12
+ Test.create(:date => Date.today, :tags => ['music', 'classical'], :sale_amount => 20.5)
13
+ Test.create(:date => Date.today, :tags => ['music', 'alternative'], :sale_amount => 20.5)
14
+ Test.create(:date => Date.today, :tags => ['book', 'fiction'], :sale_amount => 40)
15
+ Test.create(:date => Date.today, :tags => ['book', 'fantasy'], :sale_amount => 40)
16
+ Test.create(:date => Date.today, :tags => ['book', 'fantasy'], :sale_amount => 40)
17
+ Test.create(:date => Date.today, :tags => ['book', 'non-fiction'], :sale_amount => 40)
18
+ end
19
+
20
+ class Test
21
+ include MongoMapper::Document
22
+ include Reduceable
23
+
24
+ key :date, String # or Time YYYYMMDD format
25
+ key :sale_amount, Float
26
+ key :tags, Array # a list of tags you might want to query on
27
+ # eg: you can query on 'book' to find out
28
+ # how many book sales you have per day
29
+ end
@@ -0,0 +1,121 @@
1
+ require 'active_support'
2
+ require 'mongo_mapper'
3
+ require 'base64'
4
+
5
+ class MrStatus
6
+ include MongoMapper::Document
7
+ safe
8
+
9
+ # mr collection to query
10
+ key :collection_name, String, :unique => true
11
+ # the base class the mr results are calculated from
12
+ key :base_class, String
13
+ key :status, Boolean # true = dirty, clean = false
14
+
15
+ end
16
+
17
+ module Reduceable
18
+ extend ActiveSupport::Concern
19
+
20
+ included do
21
+ after_save :mr_dirty!
22
+ end
23
+
24
+ module ClassMethods
25
+ def sum_of(property, index, query={})
26
+ collection = mr_collection_name("sum_of_#{property}_by_#{index}", query)
27
+ map = sum_map(property, index)
28
+ reduce = sum_reduce
29
+ return build(collection, map, reduce, query).find
30
+ end
31
+ def sum_map(property, index)
32
+ index = index.to_s if index.is_a? Symbol
33
+ if self.keys[index].type == Array
34
+ "function(){var amount = this.#{property};this.#{index}.forEach(function(value){emit(value, amount);});}"
35
+ else
36
+ "function(){emit(this.#{index}, this.#{property});}"
37
+ end
38
+ end
39
+ def sum_reduce
40
+ <<-REDUCE
41
+ function(key, values) {
42
+ var total = 0;
43
+ for (var i=0; i<values.length; i++){
44
+ total += values[i];
45
+ }
46
+ return total;
47
+ }
48
+ REDUCE
49
+ end
50
+
51
+ def count_by(index, query={})
52
+ collection = mr_collection_name("count_by_#{index}", query)
53
+ map = count_map(index)
54
+ reduce = count_reduce
55
+
56
+ return build(collection, map, reduce, query).find
57
+ end
58
+ def count_map(key)
59
+ # Not sure how to handle hashes yet
60
+ key = key.to_s if key.is_a? Symbol
61
+ if self.keys[key].type == Array
62
+ "function(){this.#{key}.forEach(function(value){emit(value, 1);});}"
63
+ else
64
+ "function(){emit(this.#{key}, 1);}"
65
+ end
66
+ end
67
+ def count_reduce
68
+ <<-REDUCE
69
+ function(key, values) {
70
+ var total = 0;
71
+ for (var i=0; i<values.length; i++){
72
+ total += values[i];
73
+ }
74
+ return total;
75
+ }
76
+ REDUCE
77
+ end
78
+
79
+ def mr_collection_name(action, query = {})
80
+ # we need a unique collection name based off the query
81
+ # this introduces a fun bug where if your query params are in a random order
82
+ # you won't get the performance increase of reusing map/reduce collections
83
+ # TODO: come up with a better way of getting the collection name
84
+ name = (self.to_s.downcase + "#{action}_mr_" + Base64.urlsafe_encode64(query.to_s)).gsub('=','_').gsub(':','_')
85
+ return name
86
+ end
87
+
88
+ def build(collection, map, reduce, query = {})
89
+ if requires_mr_update collection
90
+ mr_status = MrStatus.new
91
+ mr_status.collection_name = collection
92
+ mr_status.status = false
93
+ mr_status.base_class = self.to_s
94
+ mr_status.save
95
+ opts = {:out => {:replace => collection}, :query => query}
96
+ self.collection.map_reduce(map, reduce, opts)
97
+ else
98
+ self.database[collection]
99
+ end
100
+ end
101
+
102
+ # Does this particular map reduce require an update?
103
+ def requires_mr_update(collection)
104
+ status_list = MrStatus.where(:collection_name => collection).all
105
+ return true if status_list.count == 0
106
+ status_list.each do |status|
107
+ return status.status
108
+ end
109
+ end
110
+ end
111
+
112
+ module InstanceMethods
113
+ def mr_dirty!
114
+ MrStatus.where({:base_class => self.class.to_s}).all.each do |m|
115
+ m.status = true
116
+ m.save
117
+ end
118
+ end
119
+ end
120
+ end
121
+
@@ -0,0 +1,3 @@
1
+ module Reduceable
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path("../lib/reduceable/version", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "reduceable"
6
+ s.version = Reduceable::VERSION
7
+ s.platform = Gem::Platform::RUBY
8
+ s.authors = ['Leonard Garvey']
9
+ s.email = ['lengarvey@gmail.com']
10
+ s.homepage = "http://github.com/lengarvey/reduceable"
11
+ s.summary = "Reduceable makes map reduce in mongo easy"
12
+ s.description = ""
13
+
14
+ s.required_rubygems_version = ">= 1.3.6"
15
+
16
+ s.add_development_dependency "bundler", ">= 1.0.0"
17
+ s.add_development_dependency "rspec", ">= 2.3.0"
18
+
19
+ s.files = `git ls-files`.split("\n")
20
+ s.executables = `git ls-files`.split("\n").map{|f| f =~ /^bin\/(.*)/ ? $1 : nil}.compact
21
+ s.require_path = 'lib'
22
+ end
@@ -0,0 +1,44 @@
1
+ require 'mongo_mapper'
2
+ require './lib/reduceable'
3
+
4
+ module Helpers
5
+ MongoMapper.database = 'reduceable_dev'
6
+
7
+ def clear_database
8
+ MongoMapper.database.collections.each do |collection|
9
+ begin
10
+ collection.drop
11
+ rescue Exception => e
12
+ end
13
+ end
14
+ end
15
+ def count_answers
16
+ {'book'=>5.0, 'fiction'=>2.0, 'music'=>4.0, 'fantasy'=>2.0, 'non-fiction'=>1.0, 'rock'=>1.0, 'pop'=>1.0, 'classical'=>1.0, 'alternative'=>1.0}
17
+ end
18
+ def sum_answers
19
+ {'book'=>200.0, 'fiction'=>80.0, 'music'=>82.0, 'fantasy'=>80.0, 'non-fiction'=>40.0, 'rock'=>20.5, 'pop'=>20.5, 'classical'=>20.5, 'alternative'=>20.5}
20
+ end
21
+ def load_data
22
+ # add some data
23
+ Sale.create(:date => Date.today, :tags => ['book', 'fiction'], :sale_amount => 40)
24
+ Sale.create(:date => Date.today, :tags => ['music', 'rock'], :sale_amount => 20.5)
25
+ Sale.create(:date => Date.today, :tags => ['music', 'pop'], :sale_amount => 20.5)
26
+ Sale.create(:date => Date.today, :tags => ['music', 'classical'], :sale_amount => 20.5)
27
+ Sale.create(:date => Date.today, :tags => ['music', 'alternative'], :sale_amount => 20.5)
28
+ Sale.create(:date => Date.today, :tags => ['book', 'fiction'], :sale_amount => 40)
29
+ Sale.create(:date => Date.today, :tags => ['book', 'fantasy'], :sale_amount => 40)
30
+ Sale.create(:date => Date.today, :tags => ['book', 'fantasy'], :sale_amount => 40)
31
+ Sale.create(:date => Date.today, :tags => ['book', 'non-fiction'], :sale_amount => 40)
32
+ end
33
+
34
+ class Sale
35
+ include MongoMapper::Document
36
+ include Reduceable
37
+
38
+ key :date, String # or Time YYYYMMDD format
39
+ key :sale_amount, Float
40
+ key :tags, Array # a list of tags you might want to query on
41
+ # eg: you can query on 'book' to find out
42
+ # how many book sales you have per day
43
+ end
44
+ end
@@ -0,0 +1,41 @@
1
+ $: << File.dirname(__FILE__)
2
+ require 'helpers'
3
+
4
+ RSpec.configure do |c|
5
+ c.include Helpers
6
+ end
7
+
8
+ describe "Reduceable" do
9
+ before(:each) do
10
+ clear_database
11
+ load_data
12
+ end
13
+ it "should count_by" do
14
+ Sale.respond_to?(:count_by).should eql true
15
+ end
16
+ it "should sum_of" do
17
+ Sale.respond_to?(:sum_of).should eql true
18
+ end
19
+ it "returns a cursor when asked to count" do
20
+ values = Sale.count_by(:tags)
21
+ values.class.should eql(Mongo::Cursor)
22
+ end
23
+ it "should be able to count" do
24
+ values = Sale.count_by(:tags).to_a
25
+ answers = count_answers
26
+ values.each do |value|
27
+ id = value['_id']
28
+ answer = answers[id]
29
+ answer.should eql(value['value']), "Number of #{id}s calculated. Expected #{value['value']} got #{answer}"
30
+ end
31
+ end
32
+ it "should be able to add" do
33
+ values = Sale.sum_of(:sale_amount,:tags).to_a
34
+ answers = sum_answers
35
+ values.each do |value|
36
+ id = value['_id']
37
+ answer = answers[id]
38
+ answer.should eql(value['value']), "Number of #{id}s calculated. Expected #{value['value']} got #{answer}"
39
+ end
40
+ end
41
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: reduceable
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Leonard Garvey
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-05-16 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: &2156485540 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.0.0
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *2156485540
25
+ - !ruby/object:Gem::Dependency
26
+ name: rspec
27
+ requirement: &2156484980 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: 2.3.0
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *2156484980
36
+ description: ''
37
+ email:
38
+ - lengarvey@gmail.com
39
+ executables: []
40
+ extensions: []
41
+ extra_rdoc_files: []
42
+ files:
43
+ - .gitignore
44
+ - Gemfile
45
+ - README.markdown
46
+ - Rakefile
47
+ - example.rb
48
+ - lib/reduceable.rb
49
+ - lib/reduceable/version.rb
50
+ - reduceable.gemspec
51
+ - spec/helpers.rb
52
+ - spec/reduceable_spec.rb
53
+ homepage: http://github.com/lengarvey/reduceable
54
+ licenses: []
55
+ post_install_message:
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ! '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ! '>='
69
+ - !ruby/object:Gem::Version
70
+ version: 1.3.6
71
+ requirements: []
72
+ rubyforge_project:
73
+ rubygems_version: 1.7.2
74
+ signing_key:
75
+ specification_version: 3
76
+ summary: Reduceable makes map reduce in mongo easy
77
+ test_files: []