mongo_mapper_parallel 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/lib/mongo_mapper_parallel.rb +105 -0
- metadata +95 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
---
|
|
2
|
+
!binary "U0hBMQ==":
|
|
3
|
+
metadata.gz: !binary |-
|
|
4
|
+
ZjgxNGEzYTAwMmQxOWJjMzY1YTM4YjZjYzRmZjBkYzQ3MWUxMThlMw==
|
|
5
|
+
data.tar.gz: !binary |-
|
|
6
|
+
Njk4YjEyYjNmY2I5YjExODAyNzA5ZTNiNzRhNWJlYWUwNzA0N2NkYQ==
|
|
7
|
+
SHA512:
|
|
8
|
+
metadata.gz: !binary |-
|
|
9
|
+
ODBjOTBhYjIzMTUyNTZjMTkyYTQ0N2IyMDA2YmI5YTVlNWQxMDZhN2VmZTVh
|
|
10
|
+
Y2VhOWFjZDk3NTM5NmQzYmU1MTRmYTlmYWJiY2NiMzYxNmRlZTRlYTljN2Q5
|
|
11
|
+
MjdjNjk2YmY1NjY0ZTViMTI3N2Y4ZmQ2NWZjMjVjYjgxYzczZjc=
|
|
12
|
+
data.tar.gz: !binary |-
|
|
13
|
+
NDg0NmY2OWJjYzk1ZjVjNzQ1NTljMzQwYjViMjRiZWMzZTJkMmJmZThjZWYy
|
|
14
|
+
ZWM4ZDAzYmQ1OTE4ZmQzZGRjMjA5ZGM0NjZkNTU2MTNlOWI3YjllY2EzNGI3
|
|
15
|
+
MzNhYTc1YTcwN2Y5NGE0YWU3ZmI2OTVhYjgyZWI1MzMzZDk1ZDg=
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# @title Mongo Mapper Parallel
|
|
2
|
+
# @author Jonathan Raiman
|
|
3
|
+
require 'parallel' # for parallel processing
|
|
4
|
+
require 'colorize' # for colored output
|
|
5
|
+
require 'mongo_mapper' # for access to collections.
|
|
6
|
+
|
|
7
|
+
class MongoMapperParallel
|
|
8
|
+
# runs distributed computation over a Mongo collection
|
|
9
|
+
|
|
10
|
+
attr_reader :split_keys
|
|
11
|
+
attr_accessor :command_class
|
|
12
|
+
attr_accessor :javascript
|
|
13
|
+
attr_accessor :args
|
|
14
|
+
|
|
15
|
+
class Key
|
|
16
|
+
# A chunk that will be parallelized
|
|
17
|
+
attr_accessor :future_key
|
|
18
|
+
attr_accessor :key
|
|
19
|
+
attr_accessor :completed
|
|
20
|
+
attr_reader :compiler
|
|
21
|
+
|
|
22
|
+
# A chunk that will be parallelized
|
|
23
|
+
#
|
|
24
|
+
# @param key [String] the lower bound of the range of resources to retrieve
|
|
25
|
+
# @param future_key [String] the upper bound for the range of resources to retrieve
|
|
26
|
+
# @param compiler [MongoMapperParallel] the Parallel execution object that holds the keys, javascript, and arguments.
|
|
27
|
+
#
|
|
28
|
+
def initialize(opts={})
|
|
29
|
+
@key = opts[:key]
|
|
30
|
+
@compiler = opts[:compiler]
|
|
31
|
+
@future_key = opts[:future_key]
|
|
32
|
+
@completed = false
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# The javascript function to run on the resources
|
|
36
|
+
#
|
|
37
|
+
# @return [String] The function to run.
|
|
38
|
+
def javascript; @compiler.javascript; end
|
|
39
|
+
|
|
40
|
+
# The arguments to pass to the Javascript function to run on the resources
|
|
41
|
+
#
|
|
42
|
+
# @return [Array, Hash] The arguments to pass to the javascript function
|
|
43
|
+
#
|
|
44
|
+
def args; @compiler.args; end
|
|
45
|
+
|
|
46
|
+
# The Ruby Class representing the collection containing the resources
|
|
47
|
+
#
|
|
48
|
+
# @return [Class]
|
|
49
|
+
def command_class; @compiler.command_class; end
|
|
50
|
+
|
|
51
|
+
# Sends the Javascript function, the range, and the arguments to the MongoDB instance for computation via the `db.runCommand` command.
|
|
52
|
+
#
|
|
53
|
+
def compile
|
|
54
|
+
search_opts = {:name => {:$gte => @key}}
|
|
55
|
+
if @future_key then search_opts[:name][:$lte] = @future_key end
|
|
56
|
+
command_class.database.command({
|
|
57
|
+
:"$eval" => javascript,
|
|
58
|
+
:args => [@key, @future_key, args],
|
|
59
|
+
:nolock => true
|
|
60
|
+
})
|
|
61
|
+
@completed = true
|
|
62
|
+
puts "Completed chunk".green
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Obtains the splitVectors keys to find chunks to parallelize via the `splitVector` command.
|
|
67
|
+
#
|
|
68
|
+
# @return list [Array<MongoMapperParallel::Key>] the list of the keys that will be used for parallel operation
|
|
69
|
+
#
|
|
70
|
+
def get_split_keys
|
|
71
|
+
@split_keys, splits = [], @command_class.database.command({splitVector: "#{@command_class.database.name}.#{@command_class.collection.name}", keyPattern: {@split.to_sym => 1}, maxChunkSizeBytes: 32*1024*1024 })["splitKeys"]
|
|
72
|
+
splits.each_with_index do |split_key,k|
|
|
73
|
+
@split_keys << MongoMapperParallel::Key.new(:compiler => self, :key => split_key[@split.to_s], :future_key => (splits[k+1] ? splits[k+1][@split.to_s] : nil))
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Instantiates the parallel operation object with the right class, javascript function, and field
|
|
78
|
+
#
|
|
79
|
+
# @param class [Class] the Mongo collection's Ruby Class to execute operations on.
|
|
80
|
+
# @param javascript [String] the Javascript function in String format
|
|
81
|
+
# @param args [Array, Hash] the arguments to pass to the Javascript function
|
|
82
|
+
# @param split [String, Symbol] the field to split the computation on -- typically an indexed unique property of the resources in the collection.
|
|
83
|
+
# @return list [Array<MongoMapperParallel::Key>] the list of the keys that will be used for parallel operation.
|
|
84
|
+
#
|
|
85
|
+
def initialize(opts={})
|
|
86
|
+
@command_class = opts[:class]
|
|
87
|
+
@javascript = opts[:javascript]
|
|
88
|
+
@args = opts[:args]
|
|
89
|
+
@split = opts[:split] # name, title, etc...
|
|
90
|
+
get_split_keys()
|
|
91
|
+
self
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Starts the parallel processing using {https://github.com/grosser/parallel Parallel}.
|
|
95
|
+
#
|
|
96
|
+
def run
|
|
97
|
+
total = @split_keys.length
|
|
98
|
+
Parallel.each_with_index(@split_keys) do |section,k|
|
|
99
|
+
if !section.completed then section.compile end
|
|
100
|
+
# ProgressBar.displayPosition(k,total)
|
|
101
|
+
end
|
|
102
|
+
puts "Success".green
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: mongo_mapper_parallel
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 1.0.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Jonathan Raiman
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2013-11-06 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: mongo_mapper
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ! '>='
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '0'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ! '>='
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: colorize
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ! '>='
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '0'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ! '>='
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '0'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: parallel
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ! '>='
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '0'
|
|
48
|
+
type: :runtime
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ! '>='
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '0'
|
|
55
|
+
description: Mongo Mapper Parallel can perform MapReduce-like operations on an entire
|
|
56
|
+
collection in parallel. This is a non-blocking operation, so the scripts can invoke
|
|
57
|
+
database methods (db.collection.update, db.collection.insert, etc...) at blazing
|
|
58
|
+
speed.
|
|
59
|
+
email: jraiman@mit.edu
|
|
60
|
+
executables: []
|
|
61
|
+
extensions: []
|
|
62
|
+
extra_rdoc_files: []
|
|
63
|
+
files:
|
|
64
|
+
- lib/mongo_mapper_parallel.rb
|
|
65
|
+
homepage: http://github.org/JonathanRaiman/mongo_mapper_parallel
|
|
66
|
+
licenses:
|
|
67
|
+
- MIT
|
|
68
|
+
metadata: {}
|
|
69
|
+
post_install_message:
|
|
70
|
+
rdoc_options: []
|
|
71
|
+
require_paths:
|
|
72
|
+
- lib
|
|
73
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
74
|
+
requirements:
|
|
75
|
+
- - ! '>='
|
|
76
|
+
- !ruby/object:Gem::Version
|
|
77
|
+
version: '0'
|
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - ! '>='
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: '0'
|
|
83
|
+
requirements:
|
|
84
|
+
- MongoDB, 2.4+
|
|
85
|
+
- mongo_mapper
|
|
86
|
+
- colorize
|
|
87
|
+
- Parallel
|
|
88
|
+
rubyforge_project:
|
|
89
|
+
rubygems_version: 2.1.10
|
|
90
|
+
signing_key:
|
|
91
|
+
specification_version: 4
|
|
92
|
+
summary: Extremely fast non-blocking parallel javascripts on entire MongoDB collection
|
|
93
|
+
with MongoMapper adapter.
|
|
94
|
+
test_files: []
|
|
95
|
+
has_rdoc: yard
|