reduceable 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ pkg/*
2
+ *.gem
3
+ .bundle
4
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source :rubygems
2
+ gem "mongo_mapper", "~>0.9.0"
3
+ gem "bson_ext"
4
+ gem "activesupport", "~>3.0.7"
5
+
6
+ source :gemcutter
7
+
8
+ # Specify your gem's dependencies in the .gemspec file
9
+ gemspec
@@ -0,0 +1,99 @@
1
+ Reducable
2
+ =========
3
+
4
+ This is a module for MongoMapper which provides an easy way to add in some simple
5
+ map/reduce functions to your data. If you have time series data and you want to show
6
+ some sort of counter per date or time, then this should do it.
7
+
8
+ Concept
9
+ -------
10
+ You have a bunch of objects in your MongoDB. You need to get some basic information
11
+ about them such as:
12
+ Simple aggregation of documents per key,
13
+ Finding an average of a value,
14
+ Counting the number of documents that contain a key.
15
+
16
+ You've probably read you can do this sort of stuff with MongoDB's map/reduce
17
+ functionality, maybe you already know exactly how that works or maybe you don't
18
+ really have a clue. Every guide I've seen for MongoMapper recommends you execute
19
+ the map/reduce calculation every single time it's accessed, and they all demand
20
+ that you write your own map and reduce functions.
21
+
22
+ Here are some use cases
23
+
24
+ ```ruby
25
+ # Count how many times each tag is used
26
+ Model.count_by(:tag, query = {})
27
+ # Sum all the weights of the different types of wresters
28
+ Model.sum_of(:weight, :wrester_type, query = {})
29
+ ```
30
+
31
+ Coming Soon
32
+ ----------
33
+ + mongoid support
34
+ + Sum by composite index
35
+ + Averages
36
+ + More Unit Tests :(
37
+
38
+ Installation
39
+ ------------
40
+ ```
41
+ gem install reduceable
42
+ # or
43
+ sudo gem install reduceable
44
+ ```
45
+
46
+ Usage
47
+ -----
48
+ ```ruby
49
+ require 'mongo_mapper'
50
+ require 'reduceable'
51
+
52
+ MongoMapper.database = 'my_database_name'
53
+
54
+ class BlogPost
55
+ include MongoMapper::Document
56
+ include Reduceable
57
+
58
+ key :article_body, String
59
+ key :categories, Array
60
+ key :time_posted, Time
61
+ key :article_length, Integer
62
+ end
63
+
64
+ # Insert some data
65
+
66
+ BlogPost.count_by(:categories).to_a.each do |x|
67
+ puts "You have posted #{x['value']} posts from catefory #{x['_id']}"
68
+ end
69
+ BlogPost.sum_of(:article_length, :categories).to_a.each do |x|
70
+ puts "You have written #{x['value']} characters in category #{x['_id']}"
71
+ end
72
+ ```
73
+
74
+ See example.rb
75
+
76
+ ```ruby
77
+ # require the example model
78
+ require './example.rb' #=> true
79
+ # setup some base data
80
+ setup #=> #<Test _id: BSON: ......
81
+ #
82
+ # Calculate how many times each tag is used
83
+ # You will use a similar map/reduce for a tag cloud
84
+ Test.count_by(:tags).to_a
85
+ #=> [{"_id"=>"alternative", "value"=>1.0}, {"_id"=>"book", "value"=>5.0}, {"_id"=>"classical", "value"=>1.0}, {"_id"=>"fantasy", "value"=>2.0}, {"_id"=>"fiction", "value"=>2.0}, {"_id"=>"music", "value"=>4.0}, {"_id"=>"non-fiction", "value"=>1.0}, {"_id"=>"pop", "value"=>1.0}, {"_id"=>"rock", "value"=>1.0}]
86
+
87
+ # Sum up the sale_amounts per tag
88
+ Test.sum_of(:sale_amount, :tags).to_a
89
+
90
+ # Sum up the sale_amounts per tag where tags contains 'book'
91
+ Test.sum_of(:sale_amount, :tags, {:tags => 'book'}).to_a
92
+ # you can optionally pass in a mongo query that limits the initial dataset being
93
+ # fed to the map function.
94
+ ```
95
+
96
+ For such a small collection the speed benefits aren't present, but once you get to
97
+ several hundred thousand record, recreating the map_reduce collection on every call
98
+ really slows things down. Reduceable solves that problem.
99
+
@@ -0,0 +1,15 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:core) do |spec|
7
+ spec.pattern = 'spec/**/*_spec.rb'
8
+ spec.rspec_opts = %w(-fs --color --format documentation)
9
+ end
10
+
11
+ Dir["tasks/*.rake"].each do |rake_file|
12
+ load rake_file
13
+ end
14
+
15
+ task :default => [:core]
@@ -0,0 +1,29 @@
1
+ require 'mongo_mapper'
2
+ require 'reduceable'
3
+
4
+ MongoMapper.database = 'mr_test'
5
+
6
+ def setup
7
+ # add some data
8
+ Test.collection.remove
9
+ Test.create(:date => Date.today, :tags => ['book', 'fiction'], :sale_amount => 40)
10
+ Test.create(:date => Date.today, :tags => ['music', 'rock'], :sale_amount => 20.5)
11
+ Test.create(:date => Date.today, :tags => ['music', 'pop'], :sale_amount => 20.5)
12
+ Test.create(:date => Date.today, :tags => ['music', 'classical'], :sale_amount => 20.5)
13
+ Test.create(:date => Date.today, :tags => ['music', 'alternative'], :sale_amount => 20.5)
14
+ Test.create(:date => Date.today, :tags => ['book', 'fiction'], :sale_amount => 40)
15
+ Test.create(:date => Date.today, :tags => ['book', 'fantasy'], :sale_amount => 40)
16
+ Test.create(:date => Date.today, :tags => ['book', 'fantasy'], :sale_amount => 40)
17
+ Test.create(:date => Date.today, :tags => ['book', 'non-fiction'], :sale_amount => 40)
18
+ end
19
+
20
+ class Test
21
+ include MongoMapper::Document
22
+ include Reduceable
23
+
24
+ key :date, String # or Time YYYYMMDD format
25
+ key :sale_amount, Float
26
+ key :tags, Array # a list of tags you might want to query on
27
+ # eg: you can query on 'book' to find out
28
+ # how many book sales you have per day
29
+ end
@@ -0,0 +1,121 @@
1
+ require 'active_support'
2
+ require 'mongo_mapper'
3
+ require 'base64'
4
+
5
+ class MrStatus
6
+ include MongoMapper::Document
7
+ safe
8
+
9
+ # mr collection to query
10
+ key :collection_name, String, :unique => true
11
+ # the base class the mr results are calculated from
12
+ key :base_class, String
13
+ key :status, Boolean # true = dirty, clean = false
14
+
15
+ end
16
+
17
+ module Reduceable
18
+ extend ActiveSupport::Concern
19
+
20
+ included do
21
+ after_save :mr_dirty!
22
+ end
23
+
24
+ module ClassMethods
25
+ def sum_of(property, index, query={})
26
+ collection = mr_collection_name("sum_of_#{property}_by_#{index}", query)
27
+ map = sum_map(property, index)
28
+ reduce = sum_reduce
29
+ return build(collection, map, reduce, query).find
30
+ end
31
+ def sum_map(property, index)
32
+ index = index.to_s if index.is_a? Symbol
33
+ if self.keys[index].type == Array
34
+ "function(){var amount = this.#{property};this.#{index}.forEach(function(value){emit(value, amount);});}"
35
+ else
36
+ "function(){emit(this.#{index}, this.#{property});}"
37
+ end
38
+ end
39
+ def sum_reduce
40
+ <<-REDUCE
41
+ function(key, values) {
42
+ var total = 0;
43
+ for (var i=0; i<values.length; i++){
44
+ total += values[i];
45
+ }
46
+ return total;
47
+ }
48
+ REDUCE
49
+ end
50
+
51
+ def count_by(index, query={})
52
+ collection = mr_collection_name("count_by_#{index}", query)
53
+ map = count_map(index)
54
+ reduce = count_reduce
55
+
56
+ return build(collection, map, reduce, query).find
57
+ end
58
+ def count_map(key)
59
+ # Not sure how to handle hashes yet
60
+ key = key.to_s if key.is_a? Symbol
61
+ if self.keys[key].type == Array
62
+ "function(){this.#{key}.forEach(function(value){emit(value, 1);});}"
63
+ else
64
+ "function(){emit(this.#{key}, 1);}"
65
+ end
66
+ end
67
+ def count_reduce
68
+ <<-REDUCE
69
+ function(key, values) {
70
+ var total = 0;
71
+ for (var i=0; i<values.length; i++){
72
+ total += values[i];
73
+ }
74
+ return total;
75
+ }
76
+ REDUCE
77
+ end
78
+
79
+ def mr_collection_name(action, query = {})
80
+ # we need a unique collection name based off the query
81
+ # this introduces a fun bug where if your query params are in a random order
82
+ # you won't get the performance increase of reusing map/reduce collections
83
+ # TODO: come up with a better way of getting the collection name
84
+ name = (self.to_s.downcase + "#{action}_mr_" + Base64.urlsafe_encode64(query.to_s)).gsub('=','_').gsub(':','_')
85
+ return name
86
+ end
87
+
88
+ def build(collection, map, reduce, query = {})
89
+ if requires_mr_update collection
90
+ mr_status = MrStatus.new
91
+ mr_status.collection_name = collection
92
+ mr_status.status = false
93
+ mr_status.base_class = self.to_s
94
+ mr_status.save
95
+ opts = {:out => {:replace => collection}, :query => query}
96
+ self.collection.map_reduce(map, reduce, opts)
97
+ else
98
+ self.database[collection]
99
+ end
100
+ end
101
+
102
+ # Does this particular map reduce require an update?
103
+ def requires_mr_update(collection)
104
+ status_list = MrStatus.where(:collection_name => collection).all
105
+ return true if status_list.count == 0
106
+ status_list.each do |status|
107
+ return status.status
108
+ end
109
+ end
110
+ end
111
+
112
+ module InstanceMethods
113
+ def mr_dirty!
114
+ MrStatus.where({:base_class => self.class.to_s}).all.each do |m|
115
+ m.status = true
116
+ m.save
117
+ end
118
+ end
119
+ end
120
+ end
121
+
@@ -0,0 +1,3 @@
1
+ module Reduceable
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path("../lib/reduceable/version", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "reduceable"
6
+ s.version = Reduceable::VERSION
7
+ s.platform = Gem::Platform::RUBY
8
+ s.authors = ['Leonard Garvey']
9
+ s.email = ['lengarvey@gmail.com']
10
+ s.homepage = "http://github.com/lengarvey/reduceable"
11
+ s.summary = "Reduceable makes map reduce in mongo easy"
12
+ s.description = ""
13
+
14
+ s.required_rubygems_version = ">= 1.3.6"
15
+
16
+ s.add_development_dependency "bundler", ">= 1.0.0"
17
+ s.add_development_dependency "rspec", ">= 2.3.0"
18
+
19
+ s.files = `git ls-files`.split("\n")
20
+ s.executables = `git ls-files`.split("\n").map{|f| f =~ /^bin\/(.*)/ ? $1 : nil}.compact
21
+ s.require_path = 'lib'
22
+ end
@@ -0,0 +1,44 @@
1
+ require 'mongo_mapper'
2
+ require './lib/reduceable'
3
+
4
+ module Helpers
5
+ MongoMapper.database = 'reduceable_dev'
6
+
7
+ def clear_database
8
+ MongoMapper.database.collections.each do |collection|
9
+ begin
10
+ collection.drop
11
+ rescue Exception => e
12
+ end
13
+ end
14
+ end
15
+ def count_answers
16
+ {'book'=>5.0, 'fiction'=>2.0, 'music'=>4.0, 'fantasy'=>2.0, 'non-fiction'=>1.0, 'rock'=>1.0, 'pop'=>1.0, 'classical'=>1.0, 'alternative'=>1.0}
17
+ end
18
+ def sum_answers
19
+ {'book'=>200.0, 'fiction'=>80.0, 'music'=>82.0, 'fantasy'=>80.0, 'non-fiction'=>40.0, 'rock'=>20.5, 'pop'=>20.5, 'classical'=>20.5, 'alternative'=>20.5}
20
+ end
21
+ def load_data
22
+ # add some data
23
+ Sale.create(:date => Date.today, :tags => ['book', 'fiction'], :sale_amount => 40)
24
+ Sale.create(:date => Date.today, :tags => ['music', 'rock'], :sale_amount => 20.5)
25
+ Sale.create(:date => Date.today, :tags => ['music', 'pop'], :sale_amount => 20.5)
26
+ Sale.create(:date => Date.today, :tags => ['music', 'classical'], :sale_amount => 20.5)
27
+ Sale.create(:date => Date.today, :tags => ['music', 'alternative'], :sale_amount => 20.5)
28
+ Sale.create(:date => Date.today, :tags => ['book', 'fiction'], :sale_amount => 40)
29
+ Sale.create(:date => Date.today, :tags => ['book', 'fantasy'], :sale_amount => 40)
30
+ Sale.create(:date => Date.today, :tags => ['book', 'fantasy'], :sale_amount => 40)
31
+ Sale.create(:date => Date.today, :tags => ['book', 'non-fiction'], :sale_amount => 40)
32
+ end
33
+
34
+ class Sale
35
+ include MongoMapper::Document
36
+ include Reduceable
37
+
38
+ key :date, String # or Time YYYYMMDD format
39
+ key :sale_amount, Float
40
+ key :tags, Array # a list of tags you might want to query on
41
+ # eg: you can query on 'book' to find out
42
+ # how many book sales you have per day
43
+ end
44
+ end
@@ -0,0 +1,41 @@
1
+ $: << File.dirname(__FILE__)
2
+ require 'helpers'
3
+
4
+ RSpec.configure do |c|
5
+ c.include Helpers
6
+ end
7
+
8
+ describe "Reduceable" do
9
+ before(:each) do
10
+ clear_database
11
+ load_data
12
+ end
13
+ it "should count_by" do
14
+ Sale.respond_to?(:count_by).should eql true
15
+ end
16
+ it "should sum_of" do
17
+ Sale.respond_to?(:sum_of).should eql true
18
+ end
19
+ it "returns a cursor when asked to count" do
20
+ values = Sale.count_by(:tags)
21
+ values.class.should eql(Mongo::Cursor)
22
+ end
23
+ it "should be able to count" do
24
+ values = Sale.count_by(:tags).to_a
25
+ answers = count_answers
26
+ values.each do |value|
27
+ id = value['_id']
28
+ answer = answers[id]
29
+ answer.should eql(value['value']), "Number of #{id}s calculated. Expected #{value['value']} got #{answer}"
30
+ end
31
+ end
32
+ it "should be able to add" do
33
+ values = Sale.sum_of(:sale_amount,:tags).to_a
34
+ answers = sum_answers
35
+ values.each do |value|
36
+ id = value['_id']
37
+ answer = answers[id]
38
+ answer.should eql(value['value']), "Number of #{id}s calculated. Expected #{value['value']} got #{answer}"
39
+ end
40
+ end
41
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: reduceable
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Leonard Garvey
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-05-16 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: &2156485540 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.0.0
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *2156485540
25
+ - !ruby/object:Gem::Dependency
26
+ name: rspec
27
+ requirement: &2156484980 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: 2.3.0
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *2156484980
36
+ description: ''
37
+ email:
38
+ - lengarvey@gmail.com
39
+ executables: []
40
+ extensions: []
41
+ extra_rdoc_files: []
42
+ files:
43
+ - .gitignore
44
+ - Gemfile
45
+ - README.markdown
46
+ - Rakefile
47
+ - example.rb
48
+ - lib/reduceable.rb
49
+ - lib/reduceable/version.rb
50
+ - reduceable.gemspec
51
+ - spec/helpers.rb
52
+ - spec/reduceable_spec.rb
53
+ homepage: http://github.com/lengarvey/reduceable
54
+ licenses: []
55
+ post_install_message:
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ! '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ! '>='
69
+ - !ruby/object:Gem::Version
70
+ version: 1.3.6
71
+ requirements: []
72
+ rubyforge_project:
73
+ rubygems_version: 1.7.2
74
+ signing_key:
75
+ specification_version: 3
76
+ summary: Reduceable makes map reduce in mongo easy
77
+ test_files: []