reduceable 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/Gemfile +9 -0
- data/README.markdown +99 -0
- data/Rakefile +15 -0
- data/example.rb +29 -0
- data/lib/reduceable.rb +121 -0
- data/lib/reduceable/version.rb +3 -0
- data/reduceable.gemspec +22 -0
- data/spec/helpers.rb +44 -0
- data/spec/reduceable_spec.rb +41 -0
- metadata +77 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.markdown
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
Reducable
|
2
|
+
=========
|
3
|
+
|
4
|
+
This is a module for MongoMapper which provides an easy way to add in some simple
|
5
|
+
map/reduce functions to your data. If you have time series data and you want to show
|
6
|
+
some sort of counter per date or time, then this should do it.
|
7
|
+
|
8
|
+
Concept
|
9
|
+
-------
|
10
|
+
You have a bunch of objects in your MongoDB. You need to get some basic information
|
11
|
+
about them such as:
|
12
|
+
Simple aggregation of documents per key,
|
13
|
+
Finding an average of a value,
|
14
|
+
Counting the number of documents that contain a key.
|
15
|
+
|
16
|
+
You've probably read you can do this sort of stuff with MongoDB's map/reduce
|
17
|
+
functionality, maybe you already know exactly how that works or maybe you don't
|
18
|
+
really have a clue. Every guide I've seen for MongoMapper recommends you execute
|
19
|
+
the map/reduce calculation every single time it's accessed, and they all demand
|
20
|
+
that you write your own map and reduce functions.
|
21
|
+
|
22
|
+
Here are some use cases
|
23
|
+
|
24
|
+
```ruby
|
25
|
+
# Count how many times each tag is used
|
26
|
+
Model.count_by(:tag, query = {})
|
27
|
+
# Sum all the weights of the different types of wresters
|
28
|
+
Model.sum_of(:weight, :wrester_type, query = {})
|
29
|
+
```
|
30
|
+
|
31
|
+
Coming Soon
|
32
|
+
----------
|
33
|
+
+ mongoid support
|
34
|
+
+ Sum by composite index
|
35
|
+
+ Averages
|
36
|
+
+ More Unit Tests :(
|
37
|
+
|
38
|
+
Installation
|
39
|
+
------------
|
40
|
+
```
|
41
|
+
gem install reduceable
|
42
|
+
# or
|
43
|
+
sudo gem install reduceable
|
44
|
+
```
|
45
|
+
|
46
|
+
Usage
|
47
|
+
-----
|
48
|
+
```ruby
|
49
|
+
require 'mongo_mapper'
|
50
|
+
require 'reduceable'
|
51
|
+
|
52
|
+
MongoMapper.database = 'my_database_name'
|
53
|
+
|
54
|
+
class BlogPost
|
55
|
+
include MongoMapper::Document
|
56
|
+
include Reduceable
|
57
|
+
|
58
|
+
key :article_body, String
|
59
|
+
key :categories, Array
|
60
|
+
key :time_posted, Time
|
61
|
+
key :article_length, Integer
|
62
|
+
end
|
63
|
+
|
64
|
+
# Insert some data
|
65
|
+
|
66
|
+
BlogPost.count_by(:categories).to_a.each do |x|
|
67
|
+
puts "You have posted #{x['value']} posts from catefory #{x['_id']}"
|
68
|
+
end
|
69
|
+
BlogPost.sum_of(:article_length, :categories).to_a.each do |x|
|
70
|
+
puts "You have written #{x['value']} characters in category #{x['_id']}"
|
71
|
+
end
|
72
|
+
```
|
73
|
+
|
74
|
+
See example.rb
|
75
|
+
|
76
|
+
```ruby
|
77
|
+
# require the example model
|
78
|
+
require './example.rb' #=> true
|
79
|
+
# setup some base data
|
80
|
+
setup #=> #<Test _id: BSON: ......
|
81
|
+
#
|
82
|
+
# Calculate how many times each tag is used
|
83
|
+
# You will use a similar map/reduce for a tag cloud
|
84
|
+
Test.count_by(:tags).to_a
|
85
|
+
#=> [{"_id"=>"alternative", "value"=>1.0}, {"_id"=>"book", "value"=>5.0}, {"_id"=>"classical", "value"=>1.0}, {"_id"=>"fantasy", "value"=>2.0}, {"_id"=>"fiction", "value"=>2.0}, {"_id"=>"music", "value"=>4.0}, {"_id"=>"non-fiction", "value"=>1.0}, {"_id"=>"pop", "value"=>1.0}, {"_id"=>"rock", "value"=>1.0}]
|
86
|
+
|
87
|
+
# Sum up the sale_amounts per tag
|
88
|
+
Test.sum_of(:sale_amount, :tags).to_a
|
89
|
+
|
90
|
+
# Sum up the sale_amounts per tag where tags contains 'book'
|
91
|
+
Test.sum_of(:sale_amount, :tags, {:tags => 'book'}).to_a
|
92
|
+
# you can optionally pass in a mongo query that limits the initial dataset being
|
93
|
+
# fed to the map function.
|
94
|
+
```
|
95
|
+
|
96
|
+
For such a small collection the speed benefits aren't present, but once you get to
|
97
|
+
several hundred thousand record, recreating the map_reduce collection on every call
|
98
|
+
really slows things down. Reduceable solves that problem.
|
99
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
3
|
+
|
4
|
+
require "rspec/core/rake_task"
|
5
|
+
|
6
|
+
RSpec::Core::RakeTask.new(:core) do |spec|
|
7
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
8
|
+
spec.rspec_opts = %w(-fs --color --format documentation)
|
9
|
+
end
|
10
|
+
|
11
|
+
Dir["tasks/*.rake"].each do |rake_file|
|
12
|
+
load rake_file
|
13
|
+
end
|
14
|
+
|
15
|
+
task :default => [:core]
|
data/example.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'mongo_mapper'
|
2
|
+
require 'reduceable'
|
3
|
+
|
4
|
+
MongoMapper.database = 'mr_test'
|
5
|
+
|
6
|
+
def setup
|
7
|
+
# add some data
|
8
|
+
Test.collection.remove
|
9
|
+
Test.create(:date => Date.today, :tags => ['book', 'fiction'], :sale_amount => 40)
|
10
|
+
Test.create(:date => Date.today, :tags => ['music', 'rock'], :sale_amount => 20.5)
|
11
|
+
Test.create(:date => Date.today, :tags => ['music', 'pop'], :sale_amount => 20.5)
|
12
|
+
Test.create(:date => Date.today, :tags => ['music', 'classical'], :sale_amount => 20.5)
|
13
|
+
Test.create(:date => Date.today, :tags => ['music', 'alternative'], :sale_amount => 20.5)
|
14
|
+
Test.create(:date => Date.today, :tags => ['book', 'fiction'], :sale_amount => 40)
|
15
|
+
Test.create(:date => Date.today, :tags => ['book', 'fantasy'], :sale_amount => 40)
|
16
|
+
Test.create(:date => Date.today, :tags => ['book', 'fantasy'], :sale_amount => 40)
|
17
|
+
Test.create(:date => Date.today, :tags => ['book', 'non-fiction'], :sale_amount => 40)
|
18
|
+
end
|
19
|
+
|
20
|
+
class Test
|
21
|
+
include MongoMapper::Document
|
22
|
+
include Reduceable
|
23
|
+
|
24
|
+
key :date, String # or Time YYYYMMDD format
|
25
|
+
key :sale_amount, Float
|
26
|
+
key :tags, Array # a list of tags you might want to query on
|
27
|
+
# eg: you can query on 'book' to find out
|
28
|
+
# how many book sales you have per day
|
29
|
+
end
|
data/lib/reduceable.rb
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
require 'mongo_mapper'
|
3
|
+
require 'base64'
|
4
|
+
|
5
|
+
class MrStatus
|
6
|
+
include MongoMapper::Document
|
7
|
+
safe
|
8
|
+
|
9
|
+
# mr collection to query
|
10
|
+
key :collection_name, String, :unique => true
|
11
|
+
# the base class the mr results are calculated from
|
12
|
+
key :base_class, String
|
13
|
+
key :status, Boolean # true = dirty, clean = false
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
module Reduceable
|
18
|
+
extend ActiveSupport::Concern
|
19
|
+
|
20
|
+
included do
|
21
|
+
after_save :mr_dirty!
|
22
|
+
end
|
23
|
+
|
24
|
+
module ClassMethods
|
25
|
+
def sum_of(property, index, query={})
|
26
|
+
collection = mr_collection_name("sum_of_#{property}_by_#{index}", query)
|
27
|
+
map = sum_map(property, index)
|
28
|
+
reduce = sum_reduce
|
29
|
+
return build(collection, map, reduce, query).find
|
30
|
+
end
|
31
|
+
def sum_map(property, index)
|
32
|
+
index = index.to_s if index.is_a? Symbol
|
33
|
+
if self.keys[index].type == Array
|
34
|
+
"function(){var amount = this.#{property};this.#{index}.forEach(function(value){emit(value, amount);});}"
|
35
|
+
else
|
36
|
+
"function(){emit(this.#{index}, this.#{property});}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
def sum_reduce
|
40
|
+
<<-REDUCE
|
41
|
+
function(key, values) {
|
42
|
+
var total = 0;
|
43
|
+
for (var i=0; i<values.length; i++){
|
44
|
+
total += values[i];
|
45
|
+
}
|
46
|
+
return total;
|
47
|
+
}
|
48
|
+
REDUCE
|
49
|
+
end
|
50
|
+
|
51
|
+
def count_by(index, query={})
|
52
|
+
collection = mr_collection_name("count_by_#{index}", query)
|
53
|
+
map = count_map(index)
|
54
|
+
reduce = count_reduce
|
55
|
+
|
56
|
+
return build(collection, map, reduce, query).find
|
57
|
+
end
|
58
|
+
def count_map(key)
|
59
|
+
# Not sure how to handle hashes yet
|
60
|
+
key = key.to_s if key.is_a? Symbol
|
61
|
+
if self.keys[key].type == Array
|
62
|
+
"function(){this.#{key}.forEach(function(value){emit(value, 1);});}"
|
63
|
+
else
|
64
|
+
"function(){emit(this.#{key}, 1);}"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
def count_reduce
|
68
|
+
<<-REDUCE
|
69
|
+
function(key, values) {
|
70
|
+
var total = 0;
|
71
|
+
for (var i=0; i<values.length; i++){
|
72
|
+
total += values[i];
|
73
|
+
}
|
74
|
+
return total;
|
75
|
+
}
|
76
|
+
REDUCE
|
77
|
+
end
|
78
|
+
|
79
|
+
def mr_collection_name(action, query = {})
|
80
|
+
# we need a unique collection name based off the query
|
81
|
+
# this introduces a fun bug where if your query params are in a random order
|
82
|
+
# you won't get the performance increase of reusing map/reduce collections
|
83
|
+
# TODO: come up with a better way of getting the collection name
|
84
|
+
name = (self.to_s.downcase + "#{action}_mr_" + Base64.urlsafe_encode64(query.to_s)).gsub('=','_').gsub(':','_')
|
85
|
+
return name
|
86
|
+
end
|
87
|
+
|
88
|
+
def build(collection, map, reduce, query = {})
|
89
|
+
if requires_mr_update collection
|
90
|
+
mr_status = MrStatus.new
|
91
|
+
mr_status.collection_name = collection
|
92
|
+
mr_status.status = false
|
93
|
+
mr_status.base_class = self.to_s
|
94
|
+
mr_status.save
|
95
|
+
opts = {:out => {:replace => collection}, :query => query}
|
96
|
+
self.collection.map_reduce(map, reduce, opts)
|
97
|
+
else
|
98
|
+
self.database[collection]
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Does this particular map reduce require an update?
|
103
|
+
def requires_mr_update(collection)
|
104
|
+
status_list = MrStatus.where(:collection_name => collection).all
|
105
|
+
return true if status_list.count == 0
|
106
|
+
status_list.each do |status|
|
107
|
+
return status.status
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
module InstanceMethods
|
113
|
+
def mr_dirty!
|
114
|
+
MrStatus.where({:base_class => self.class.to_s}).all.each do |m|
|
115
|
+
m.status = true
|
116
|
+
m.save
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
data/reduceable.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path("../lib/reduceable/version", __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "reduceable"
|
6
|
+
s.version = Reduceable::VERSION
|
7
|
+
s.platform = Gem::Platform::RUBY
|
8
|
+
s.authors = ['Leonard Garvey']
|
9
|
+
s.email = ['lengarvey@gmail.com']
|
10
|
+
s.homepage = "http://github.com/lengarvey/reduceable"
|
11
|
+
s.summary = "Reduceable makes map reduce in mongo easy"
|
12
|
+
s.description = ""
|
13
|
+
|
14
|
+
s.required_rubygems_version = ">= 1.3.6"
|
15
|
+
|
16
|
+
s.add_development_dependency "bundler", ">= 1.0.0"
|
17
|
+
s.add_development_dependency "rspec", ">= 2.3.0"
|
18
|
+
|
19
|
+
s.files = `git ls-files`.split("\n")
|
20
|
+
s.executables = `git ls-files`.split("\n").map{|f| f =~ /^bin\/(.*)/ ? $1 : nil}.compact
|
21
|
+
s.require_path = 'lib'
|
22
|
+
end
|
data/spec/helpers.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'mongo_mapper'
|
2
|
+
require './lib/reduceable'
|
3
|
+
|
4
|
+
module Helpers
|
5
|
+
MongoMapper.database = 'reduceable_dev'
|
6
|
+
|
7
|
+
def clear_database
|
8
|
+
MongoMapper.database.collections.each do |collection|
|
9
|
+
begin
|
10
|
+
collection.drop
|
11
|
+
rescue Exception => e
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
def count_answers
|
16
|
+
{'book'=>5.0, 'fiction'=>2.0, 'music'=>4.0, 'fantasy'=>2.0, 'non-fiction'=>1.0, 'rock'=>1.0, 'pop'=>1.0, 'classical'=>1.0, 'alternative'=>1.0}
|
17
|
+
end
|
18
|
+
def sum_answers
|
19
|
+
{'book'=>200.0, 'fiction'=>80.0, 'music'=>82.0, 'fantasy'=>80.0, 'non-fiction'=>40.0, 'rock'=>20.5, 'pop'=>20.5, 'classical'=>20.5, 'alternative'=>20.5}
|
20
|
+
end
|
21
|
+
def load_data
|
22
|
+
# add some data
|
23
|
+
Sale.create(:date => Date.today, :tags => ['book', 'fiction'], :sale_amount => 40)
|
24
|
+
Sale.create(:date => Date.today, :tags => ['music', 'rock'], :sale_amount => 20.5)
|
25
|
+
Sale.create(:date => Date.today, :tags => ['music', 'pop'], :sale_amount => 20.5)
|
26
|
+
Sale.create(:date => Date.today, :tags => ['music', 'classical'], :sale_amount => 20.5)
|
27
|
+
Sale.create(:date => Date.today, :tags => ['music', 'alternative'], :sale_amount => 20.5)
|
28
|
+
Sale.create(:date => Date.today, :tags => ['book', 'fiction'], :sale_amount => 40)
|
29
|
+
Sale.create(:date => Date.today, :tags => ['book', 'fantasy'], :sale_amount => 40)
|
30
|
+
Sale.create(:date => Date.today, :tags => ['book', 'fantasy'], :sale_amount => 40)
|
31
|
+
Sale.create(:date => Date.today, :tags => ['book', 'non-fiction'], :sale_amount => 40)
|
32
|
+
end
|
33
|
+
|
34
|
+
class Sale
|
35
|
+
include MongoMapper::Document
|
36
|
+
include Reduceable
|
37
|
+
|
38
|
+
key :date, String # or Time YYYYMMDD format
|
39
|
+
key :sale_amount, Float
|
40
|
+
key :tags, Array # a list of tags you might want to query on
|
41
|
+
# eg: you can query on 'book' to find out
|
42
|
+
# how many book sales you have per day
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
$: << File.dirname(__FILE__)
|
2
|
+
require 'helpers'
|
3
|
+
|
4
|
+
RSpec.configure do |c|
|
5
|
+
c.include Helpers
|
6
|
+
end
|
7
|
+
|
8
|
+
describe "Reduceable" do
|
9
|
+
before(:each) do
|
10
|
+
clear_database
|
11
|
+
load_data
|
12
|
+
end
|
13
|
+
it "should count_by" do
|
14
|
+
Sale.respond_to?(:count_by).should eql true
|
15
|
+
end
|
16
|
+
it "should sum_of" do
|
17
|
+
Sale.respond_to?(:sum_of).should eql true
|
18
|
+
end
|
19
|
+
it "returns a cursor when asked to count" do
|
20
|
+
values = Sale.count_by(:tags)
|
21
|
+
values.class.should eql(Mongo::Cursor)
|
22
|
+
end
|
23
|
+
it "should be able to count" do
|
24
|
+
values = Sale.count_by(:tags).to_a
|
25
|
+
answers = count_answers
|
26
|
+
values.each do |value|
|
27
|
+
id = value['_id']
|
28
|
+
answer = answers[id]
|
29
|
+
answer.should eql(value['value']), "Number of #{id}s calculated. Expected #{value['value']} got #{answer}"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
it "should be able to add" do
|
33
|
+
values = Sale.sum_of(:sale_amount,:tags).to_a
|
34
|
+
answers = sum_answers
|
35
|
+
values.each do |value|
|
36
|
+
id = value['_id']
|
37
|
+
answer = answers[id]
|
38
|
+
answer.should eql(value['value']), "Number of #{id}s calculated. Expected #{value['value']} got #{answer}"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: reduceable
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Leonard Garvey
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-05-16 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: &2156485540 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.0.0
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *2156485540
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rspec
|
27
|
+
requirement: &2156484980 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.3.0
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *2156484980
|
36
|
+
description: ''
|
37
|
+
email:
|
38
|
+
- lengarvey@gmail.com
|
39
|
+
executables: []
|
40
|
+
extensions: []
|
41
|
+
extra_rdoc_files: []
|
42
|
+
files:
|
43
|
+
- .gitignore
|
44
|
+
- Gemfile
|
45
|
+
- README.markdown
|
46
|
+
- Rakefile
|
47
|
+
- example.rb
|
48
|
+
- lib/reduceable.rb
|
49
|
+
- lib/reduceable/version.rb
|
50
|
+
- reduceable.gemspec
|
51
|
+
- spec/helpers.rb
|
52
|
+
- spec/reduceable_spec.rb
|
53
|
+
homepage: http://github.com/lengarvey/reduceable
|
54
|
+
licenses: []
|
55
|
+
post_install_message:
|
56
|
+
rdoc_options: []
|
57
|
+
require_paths:
|
58
|
+
- lib
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
none: false
|
61
|
+
requirements:
|
62
|
+
- - ! '>='
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ! '>='
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: 1.3.6
|
71
|
+
requirements: []
|
72
|
+
rubyforge_project:
|
73
|
+
rubygems_version: 1.7.2
|
74
|
+
signing_key:
|
75
|
+
specification_version: 3
|
76
|
+
summary: Reduceable makes map reduce in mongo easy
|
77
|
+
test_files: []
|