reduceable 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/Gemfile +9 -0
- data/README.markdown +99 -0
- data/Rakefile +15 -0
- data/example.rb +29 -0
- data/lib/reduceable.rb +121 -0
- data/lib/reduceable/version.rb +3 -0
- data/reduceable.gemspec +22 -0
- data/spec/helpers.rb +44 -0
- data/spec/reduceable_spec.rb +41 -0
- metadata +77 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.markdown
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
Reducable
|
2
|
+
=========
|
3
|
+
|
4
|
+
This is a module for MongoMapper which provides an easy way to add in some simple
|
5
|
+
map/reduce functions to your data. If you have time series data and you want to show
|
6
|
+
some sort of counter per date or time, then this should do it.
|
7
|
+
|
8
|
+
Concept
|
9
|
+
-------
|
10
|
+
You have a bunch of objects in your MongoDB. You need to get some basic information
|
11
|
+
about them such as:
|
12
|
+
Simple aggregation of documents per key,
|
13
|
+
Finding an average of a value,
|
14
|
+
Counting the number of documents that contain a key.
|
15
|
+
|
16
|
+
You've probably read you can do this sort of stuff with MongoDB's map/reduce
|
17
|
+
functionality, maybe you already know exactly how that works or maybe you don't
|
18
|
+
really have a clue. Every guide I've seen for MongoMapper recommends you execute
|
19
|
+
the map/reduce calculation every single time it's accessed, and they all demand
|
20
|
+
that you write your own map and reduce functions.
|
21
|
+
|
22
|
+
Here are some use cases
|
23
|
+
|
24
|
+
```ruby
|
25
|
+
# Count how many times each tag is used
|
26
|
+
Model.count_by(:tag, query = {})
|
27
|
+
# Sum all the weights of the different types of wresters
|
28
|
+
Model.sum_of(:weight, :wrester_type, query = {})
|
29
|
+
```
|
30
|
+
|
31
|
+
Coming Soon
|
32
|
+
----------
|
33
|
+
+ mongoid support
|
34
|
+
+ Sum by composite index
|
35
|
+
+ Averages
|
36
|
+
+ More Unit Tests :(
|
37
|
+
|
38
|
+
Installation
|
39
|
+
------------
|
40
|
+
```
|
41
|
+
gem install reduceable
|
42
|
+
# or
|
43
|
+
sudo gem install reduceable
|
44
|
+
```
|
45
|
+
|
46
|
+
Usage
|
47
|
+
-----
|
48
|
+
```ruby
|
49
|
+
require 'mongo_mapper'
|
50
|
+
require 'reduceable'
|
51
|
+
|
52
|
+
MongoMapper.database = 'my_database_name'
|
53
|
+
|
54
|
+
class BlogPost
|
55
|
+
include MongoMapper::Document
|
56
|
+
include Reduceable
|
57
|
+
|
58
|
+
key :article_body, String
|
59
|
+
key :categories, Array
|
60
|
+
key :time_posted, Time
|
61
|
+
key :article_length, Integer
|
62
|
+
end
|
63
|
+
|
64
|
+
# Insert some data
|
65
|
+
|
66
|
+
BlogPost.count_by(:categories).to_a.each do |x|
|
67
|
+
puts "You have posted #{x['value']} posts from catefory #{x['_id']}"
|
68
|
+
end
|
69
|
+
BlogPost.sum_of(:article_length, :categories).to_a.each do |x|
|
70
|
+
puts "You have written #{x['value']} characters in category #{x['_id']}"
|
71
|
+
end
|
72
|
+
```
|
73
|
+
|
74
|
+
See example.rb
|
75
|
+
|
76
|
+
```ruby
|
77
|
+
# require the example model
|
78
|
+
require './example.rb' #=> true
|
79
|
+
# setup some base data
|
80
|
+
setup #=> #<Test _id: BSON: ......
|
81
|
+
#
|
82
|
+
# Calculate how many times each tag is used
|
83
|
+
# You will use a similar map/reduce for a tag cloud
|
84
|
+
Test.count_by(:tags).to_a
|
85
|
+
#=> [{"_id"=>"alternative", "value"=>1.0}, {"_id"=>"book", "value"=>5.0}, {"_id"=>"classical", "value"=>1.0}, {"_id"=>"fantasy", "value"=>2.0}, {"_id"=>"fiction", "value"=>2.0}, {"_id"=>"music", "value"=>4.0}, {"_id"=>"non-fiction", "value"=>1.0}, {"_id"=>"pop", "value"=>1.0}, {"_id"=>"rock", "value"=>1.0}]
|
86
|
+
|
87
|
+
# Sum up the sale_amounts per tag
|
88
|
+
Test.sum_of(:sale_amount, :tags).to_a
|
89
|
+
|
90
|
+
# Sum up the sale_amounts per tag where tags contains 'book'
|
91
|
+
Test.sum_of(:sale_amount, :tags, {:tags => 'book'}).to_a
|
92
|
+
# you can optionally pass in a mongo query that limits the initial dataset being
|
93
|
+
# fed to the map function.
|
94
|
+
```
|
95
|
+
|
96
|
+
For such a small collection the speed benefits aren't present, but once you get to
|
97
|
+
several hundred thousand record, recreating the map_reduce collection on every call
|
98
|
+
really slows things down. Reduceable solves that problem.
|
99
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
3
|
+
|
4
|
+
require "rspec/core/rake_task"
|
5
|
+
|
6
|
+
RSpec::Core::RakeTask.new(:core) do |spec|
|
7
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
8
|
+
spec.rspec_opts = %w(-fs --color --format documentation)
|
9
|
+
end
|
10
|
+
|
11
|
+
Dir["tasks/*.rake"].each do |rake_file|
|
12
|
+
load rake_file
|
13
|
+
end
|
14
|
+
|
15
|
+
task :default => [:core]
|
data/example.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'mongo_mapper'
|
2
|
+
require 'reduceable'
|
3
|
+
|
4
|
+
MongoMapper.database = 'mr_test'
|
5
|
+
|
6
|
+
def setup
|
7
|
+
# add some data
|
8
|
+
Test.collection.remove
|
9
|
+
Test.create(:date => Date.today, :tags => ['book', 'fiction'], :sale_amount => 40)
|
10
|
+
Test.create(:date => Date.today, :tags => ['music', 'rock'], :sale_amount => 20.5)
|
11
|
+
Test.create(:date => Date.today, :tags => ['music', 'pop'], :sale_amount => 20.5)
|
12
|
+
Test.create(:date => Date.today, :tags => ['music', 'classical'], :sale_amount => 20.5)
|
13
|
+
Test.create(:date => Date.today, :tags => ['music', 'alternative'], :sale_amount => 20.5)
|
14
|
+
Test.create(:date => Date.today, :tags => ['book', 'fiction'], :sale_amount => 40)
|
15
|
+
Test.create(:date => Date.today, :tags => ['book', 'fantasy'], :sale_amount => 40)
|
16
|
+
Test.create(:date => Date.today, :tags => ['book', 'fantasy'], :sale_amount => 40)
|
17
|
+
Test.create(:date => Date.today, :tags => ['book', 'non-fiction'], :sale_amount => 40)
|
18
|
+
end
|
19
|
+
|
20
|
+
class Test
|
21
|
+
include MongoMapper::Document
|
22
|
+
include Reduceable
|
23
|
+
|
24
|
+
key :date, String # or Time YYYYMMDD format
|
25
|
+
key :sale_amount, Float
|
26
|
+
key :tags, Array # a list of tags you might want to query on
|
27
|
+
# eg: you can query on 'book' to find out
|
28
|
+
# how many book sales you have per day
|
29
|
+
end
|
data/lib/reduceable.rb
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
require 'mongo_mapper'
|
3
|
+
require 'base64'
|
4
|
+
|
5
|
+
class MrStatus
|
6
|
+
include MongoMapper::Document
|
7
|
+
safe
|
8
|
+
|
9
|
+
# mr collection to query
|
10
|
+
key :collection_name, String, :unique => true
|
11
|
+
# the base class the mr results are calculated from
|
12
|
+
key :base_class, String
|
13
|
+
key :status, Boolean # true = dirty, clean = false
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
module Reduceable
|
18
|
+
extend ActiveSupport::Concern
|
19
|
+
|
20
|
+
included do
|
21
|
+
after_save :mr_dirty!
|
22
|
+
end
|
23
|
+
|
24
|
+
module ClassMethods
|
25
|
+
def sum_of(property, index, query={})
|
26
|
+
collection = mr_collection_name("sum_of_#{property}_by_#{index}", query)
|
27
|
+
map = sum_map(property, index)
|
28
|
+
reduce = sum_reduce
|
29
|
+
return build(collection, map, reduce, query).find
|
30
|
+
end
|
31
|
+
def sum_map(property, index)
|
32
|
+
index = index.to_s if index.is_a? Symbol
|
33
|
+
if self.keys[index].type == Array
|
34
|
+
"function(){var amount = this.#{property};this.#{index}.forEach(function(value){emit(value, amount);});}"
|
35
|
+
else
|
36
|
+
"function(){emit(this.#{index}, this.#{property});}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
def sum_reduce
|
40
|
+
<<-REDUCE
|
41
|
+
function(key, values) {
|
42
|
+
var total = 0;
|
43
|
+
for (var i=0; i<values.length; i++){
|
44
|
+
total += values[i];
|
45
|
+
}
|
46
|
+
return total;
|
47
|
+
}
|
48
|
+
REDUCE
|
49
|
+
end
|
50
|
+
|
51
|
+
def count_by(index, query={})
|
52
|
+
collection = mr_collection_name("count_by_#{index}", query)
|
53
|
+
map = count_map(index)
|
54
|
+
reduce = count_reduce
|
55
|
+
|
56
|
+
return build(collection, map, reduce, query).find
|
57
|
+
end
|
58
|
+
def count_map(key)
|
59
|
+
# Not sure how to handle hashes yet
|
60
|
+
key = key.to_s if key.is_a? Symbol
|
61
|
+
if self.keys[key].type == Array
|
62
|
+
"function(){this.#{key}.forEach(function(value){emit(value, 1);});}"
|
63
|
+
else
|
64
|
+
"function(){emit(this.#{key}, 1);}"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
def count_reduce
|
68
|
+
<<-REDUCE
|
69
|
+
function(key, values) {
|
70
|
+
var total = 0;
|
71
|
+
for (var i=0; i<values.length; i++){
|
72
|
+
total += values[i];
|
73
|
+
}
|
74
|
+
return total;
|
75
|
+
}
|
76
|
+
REDUCE
|
77
|
+
end
|
78
|
+
|
79
|
+
def mr_collection_name(action, query = {})
|
80
|
+
# we need a unique collection name based off the query
|
81
|
+
# this introduces a fun bug where if your query params are in a random order
|
82
|
+
# you won't get the performance increase of reusing map/reduce collections
|
83
|
+
# TODO: come up with a better way of getting the collection name
|
84
|
+
name = (self.to_s.downcase + "#{action}_mr_" + Base64.urlsafe_encode64(query.to_s)).gsub('=','_').gsub(':','_')
|
85
|
+
return name
|
86
|
+
end
|
87
|
+
|
88
|
+
def build(collection, map, reduce, query = {})
|
89
|
+
if requires_mr_update collection
|
90
|
+
mr_status = MrStatus.new
|
91
|
+
mr_status.collection_name = collection
|
92
|
+
mr_status.status = false
|
93
|
+
mr_status.base_class = self.to_s
|
94
|
+
mr_status.save
|
95
|
+
opts = {:out => {:replace => collection}, :query => query}
|
96
|
+
self.collection.map_reduce(map, reduce, opts)
|
97
|
+
else
|
98
|
+
self.database[collection]
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Does this particular map reduce require an update?
|
103
|
+
def requires_mr_update(collection)
|
104
|
+
status_list = MrStatus.where(:collection_name => collection).all
|
105
|
+
return true if status_list.count == 0
|
106
|
+
status_list.each do |status|
|
107
|
+
return status.status
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
module InstanceMethods
|
113
|
+
def mr_dirty!
|
114
|
+
MrStatus.where({:base_class => self.class.to_s}).all.each do |m|
|
115
|
+
m.status = true
|
116
|
+
m.save
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
data/reduceable.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path("../lib/reduceable/version", __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "reduceable"
|
6
|
+
s.version = Reduceable::VERSION
|
7
|
+
s.platform = Gem::Platform::RUBY
|
8
|
+
s.authors = ['Leonard Garvey']
|
9
|
+
s.email = ['lengarvey@gmail.com']
|
10
|
+
s.homepage = "http://github.com/lengarvey/reduceable"
|
11
|
+
s.summary = "Reduceable makes map reduce in mongo easy"
|
12
|
+
s.description = ""
|
13
|
+
|
14
|
+
s.required_rubygems_version = ">= 1.3.6"
|
15
|
+
|
16
|
+
s.add_development_dependency "bundler", ">= 1.0.0"
|
17
|
+
s.add_development_dependency "rspec", ">= 2.3.0"
|
18
|
+
|
19
|
+
s.files = `git ls-files`.split("\n")
|
20
|
+
s.executables = `git ls-files`.split("\n").map{|f| f =~ /^bin\/(.*)/ ? $1 : nil}.compact
|
21
|
+
s.require_path = 'lib'
|
22
|
+
end
|
data/spec/helpers.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'mongo_mapper'
|
2
|
+
require './lib/reduceable'
|
3
|
+
|
4
|
+
module Helpers
|
5
|
+
MongoMapper.database = 'reduceable_dev'
|
6
|
+
|
7
|
+
def clear_database
|
8
|
+
MongoMapper.database.collections.each do |collection|
|
9
|
+
begin
|
10
|
+
collection.drop
|
11
|
+
rescue Exception => e
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
def count_answers
|
16
|
+
{'book'=>5.0, 'fiction'=>2.0, 'music'=>4.0, 'fantasy'=>2.0, 'non-fiction'=>1.0, 'rock'=>1.0, 'pop'=>1.0, 'classical'=>1.0, 'alternative'=>1.0}
|
17
|
+
end
|
18
|
+
def sum_answers
|
19
|
+
{'book'=>200.0, 'fiction'=>80.0, 'music'=>82.0, 'fantasy'=>80.0, 'non-fiction'=>40.0, 'rock'=>20.5, 'pop'=>20.5, 'classical'=>20.5, 'alternative'=>20.5}
|
20
|
+
end
|
21
|
+
def load_data
|
22
|
+
# add some data
|
23
|
+
Sale.create(:date => Date.today, :tags => ['book', 'fiction'], :sale_amount => 40)
|
24
|
+
Sale.create(:date => Date.today, :tags => ['music', 'rock'], :sale_amount => 20.5)
|
25
|
+
Sale.create(:date => Date.today, :tags => ['music', 'pop'], :sale_amount => 20.5)
|
26
|
+
Sale.create(:date => Date.today, :tags => ['music', 'classical'], :sale_amount => 20.5)
|
27
|
+
Sale.create(:date => Date.today, :tags => ['music', 'alternative'], :sale_amount => 20.5)
|
28
|
+
Sale.create(:date => Date.today, :tags => ['book', 'fiction'], :sale_amount => 40)
|
29
|
+
Sale.create(:date => Date.today, :tags => ['book', 'fantasy'], :sale_amount => 40)
|
30
|
+
Sale.create(:date => Date.today, :tags => ['book', 'fantasy'], :sale_amount => 40)
|
31
|
+
Sale.create(:date => Date.today, :tags => ['book', 'non-fiction'], :sale_amount => 40)
|
32
|
+
end
|
33
|
+
|
34
|
+
class Sale
|
35
|
+
include MongoMapper::Document
|
36
|
+
include Reduceable
|
37
|
+
|
38
|
+
key :date, String # or Time YYYYMMDD format
|
39
|
+
key :sale_amount, Float
|
40
|
+
key :tags, Array # a list of tags you might want to query on
|
41
|
+
# eg: you can query on 'book' to find out
|
42
|
+
# how many book sales you have per day
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
$: << File.dirname(__FILE__)
|
2
|
+
require 'helpers'
|
3
|
+
|
4
|
+
RSpec.configure do |c|
|
5
|
+
c.include Helpers
|
6
|
+
end
|
7
|
+
|
8
|
+
describe "Reduceable" do
|
9
|
+
before(:each) do
|
10
|
+
clear_database
|
11
|
+
load_data
|
12
|
+
end
|
13
|
+
it "should count_by" do
|
14
|
+
Sale.respond_to?(:count_by).should eql true
|
15
|
+
end
|
16
|
+
it "should sum_of" do
|
17
|
+
Sale.respond_to?(:sum_of).should eql true
|
18
|
+
end
|
19
|
+
it "returns a cursor when asked to count" do
|
20
|
+
values = Sale.count_by(:tags)
|
21
|
+
values.class.should eql(Mongo::Cursor)
|
22
|
+
end
|
23
|
+
it "should be able to count" do
|
24
|
+
values = Sale.count_by(:tags).to_a
|
25
|
+
answers = count_answers
|
26
|
+
values.each do |value|
|
27
|
+
id = value['_id']
|
28
|
+
answer = answers[id]
|
29
|
+
answer.should eql(value['value']), "Number of #{id}s calculated. Expected #{value['value']} got #{answer}"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
it "should be able to add" do
|
33
|
+
values = Sale.sum_of(:sale_amount,:tags).to_a
|
34
|
+
answers = sum_answers
|
35
|
+
values.each do |value|
|
36
|
+
id = value['_id']
|
37
|
+
answer = answers[id]
|
38
|
+
answer.should eql(value['value']), "Number of #{id}s calculated. Expected #{value['value']} got #{answer}"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: reduceable
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Leonard Garvey
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-05-16 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: &2156485540 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.0.0
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *2156485540
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rspec
|
27
|
+
requirement: &2156484980 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.3.0
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *2156484980
|
36
|
+
description: ''
|
37
|
+
email:
|
38
|
+
- lengarvey@gmail.com
|
39
|
+
executables: []
|
40
|
+
extensions: []
|
41
|
+
extra_rdoc_files: []
|
42
|
+
files:
|
43
|
+
- .gitignore
|
44
|
+
- Gemfile
|
45
|
+
- README.markdown
|
46
|
+
- Rakefile
|
47
|
+
- example.rb
|
48
|
+
- lib/reduceable.rb
|
49
|
+
- lib/reduceable/version.rb
|
50
|
+
- reduceable.gemspec
|
51
|
+
- spec/helpers.rb
|
52
|
+
- spec/reduceable_spec.rb
|
53
|
+
homepage: http://github.com/lengarvey/reduceable
|
54
|
+
licenses: []
|
55
|
+
post_install_message:
|
56
|
+
rdoc_options: []
|
57
|
+
require_paths:
|
58
|
+
- lib
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
none: false
|
61
|
+
requirements:
|
62
|
+
- - ! '>='
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ! '>='
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: 1.3.6
|
71
|
+
requirements: []
|
72
|
+
rubyforge_project:
|
73
|
+
rubygems_version: 1.7.2
|
74
|
+
signing_key:
|
75
|
+
specification_version: 3
|
76
|
+
summary: Reduceable makes map reduce in mongo easy
|
77
|
+
test_files: []
|