mongo_mapper_parallel 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +15 -0
  2. data/lib/mongo_mapper_parallel.rb +105 -0
  3. metadata +95 -0
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZjgxNGEzYTAwMmQxOWJjMzY1YTM4YjZjYzRmZjBkYzQ3MWUxMThlMw==
5
+ data.tar.gz: !binary |-
6
+ Njk4YjEyYjNmY2I5YjExODAyNzA5ZTNiNzRhNWJlYWUwNzA0N2NkYQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ODBjOTBhYjIzMTUyNTZjMTkyYTQ0N2IyMDA2YmI5YTVlNWQxMDZhN2VmZTVh
10
+ Y2VhOWFjZDk3NTM5NmQzYmU1MTRmYTlmYWJiY2NiMzYxNmRlZTRlYTljN2Q5
11
+ MjdjNjk2YmY1NjY0ZTViMTI3N2Y4ZmQ2NWZjMjVjYjgxYzczZjc=
12
+ data.tar.gz: !binary |-
13
+ NDg0NmY2OWJjYzk1ZjVjNzQ1NTljMzQwYjViMjRiZWMzZTJkMmJmZThjZWYy
14
+ ZWM4ZDAzYmQ1OTE4ZmQzZGRjMjA5ZGM0NjZkNTU2MTNlOWI3YjllY2EzNGI3
15
+ MzNhYTc1YTcwN2Y5NGE0YWU3ZmI2OTVhYjgyZWI1MzMzZDk1ZDg=
@@ -0,0 +1,105 @@
1
+ # @title Mongo Mapper Parallel
2
+ # @author Jonathan Raiman
3
+ require 'parallel' # for parallel processing
4
+ require 'colorize' # for colored output
5
+ require 'mongo_mapper' # for access to collections.
6
+
7
+ class MongoMapperParallel
8
+ # runs distributed computation over a Mongo collection
9
+
10
+ attr_reader :split_keys
11
+ attr_accessor :command_class
12
+ attr_accessor :javascript
13
+ attr_accessor :args
14
+
15
+ class Key
16
+ # A chunk that will be parallelized
17
+ attr_accessor :future_key
18
+ attr_accessor :key
19
+ attr_accessor :completed
20
+ attr_reader :compiler
21
+
22
+ # A chunk that will be parallelized
23
+ #
24
+ # @param key [String] the lower bound of the range of resources to retrieve
25
+ # @param future_key [String] the upper bound for the range of resources to retrieve
26
+ # @param compiler [MongoMapperParallel] the Parallel execution object that holds the keys, javascript, and arguments.
27
+ #
28
+ def initialize(opts={})
29
+ @key = opts[:key]
30
+ @compiler = opts[:compiler]
31
+ @future_key = opts[:future_key]
32
+ @completed = false
33
+ end
34
+
35
+ # The javascript function to run on the resources
36
+ #
37
+ # @return [String] The function to run.
38
+ def javascript; @compiler.javascript; end
39
+
40
+ # The arguments to pass to the Javascript function to run on the resources
41
+ #
42
+ # @return [Array, Hash] The arguments to pass to the javascript function
43
+ #
44
+ def args; @compiler.args; end
45
+
46
+ # The Ruby Class representing the collection containing the resources
47
+ #
48
+ # @return [Class]
49
+ def command_class; @compiler.command_class; end
50
+
51
+ # Sends the Javascript function, the range, and the arguments to the MongoDB instance for computation via the `db.runCommand` command.
52
+ #
53
+ def compile
54
+ search_opts = {:name => {:$gte => @key}}
55
+ if @future_key then search_opts[:name][:$lte] = @future_key end
56
+ command_class.database.command({
57
+ :"$eval" => javascript,
58
+ :args => [@key, @future_key, args],
59
+ :nolock => true
60
+ })
61
+ @completed = true
62
+ puts "Completed chunk".green
63
+ end
64
+ end
65
+
66
+ # Obtains the splitVectors keys to find chunks to parallelize via the `splitVector` command.
67
+ #
68
+ # @return list [Array<MongoMapperParallel::Key>] the list of the keys that will be used for parallel operation
69
+ #
70
+ def get_split_keys
71
+ @split_keys, splits = [], @command_class.database.command({splitVector: "#{@command_class.database.name}.#{@command_class.collection.name}", keyPattern: {@split.to_sym => 1}, maxChunkSizeBytes: 32*1024*1024 })["splitKeys"]
72
+ splits.each_with_index do |split_key,k|
73
+ @split_keys << MongoMapperParallel::Key.new(:compiler => self, :key => split_key[@split.to_s], :future_key => (splits[k+1] ? splits[k+1][@split.to_s] : nil))
74
+ end
75
+ end
76
+
77
+ # Instantiates the parallel operation object with the right class, javascript function, and field
78
+ #
79
+ # @param class [Class] the Mongo collection's Ruby Class to execute operations on.
80
+ # @param javascript [String] the Javascript function in String format
81
+ # @param args [Array, Hash] the arguments to pass to the Javascript function
82
+ # @param split [String, Symbol] the field to split the computation on -- typically an indexed unique property of the resources in the collection.
83
+ # @return list [Array<MongoMapperParallel::Key>] the list of the keys that will be used for parallel operation.
84
+ #
85
+ def initialize(opts={})
86
+ @command_class = opts[:class]
87
+ @javascript = opts[:javascript]
88
+ @args = opts[:args]
89
+ @split = opts[:split] # name, title, etc...
90
+ get_split_keys()
91
+ self
92
+ end
93
+
94
+ # Starts the parallel processing using {https://github.com/grosser/parallel Parallel}.
95
+ #
96
+ def run
97
+ total = @split_keys.length
98
+ Parallel.each_with_index(@split_keys) do |section,k|
99
+ if !section.completed then section.compile end
100
+ # ProgressBar.displayPosition(k,total)
101
+ end
102
+ puts "Success".green
103
+ end
104
+
105
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mongo_mapper_parallel
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Jonathan Raiman
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-11-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mongo_mapper
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: colorize
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: parallel
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: Mongo Mapper Parallel can perform MapReduce-like operations on an entire
56
+ collection in parallel. This is a non-blocking operation, so the scripts can invoke
57
+ database methods (db.collection.update, db.collection.insert, etc...) at blazing
58
+ speed.
59
+ email: jraiman@mit.edu
60
+ executables: []
61
+ extensions: []
62
+ extra_rdoc_files: []
63
+ files:
64
+ - lib/mongo_mapper_parallel.rb
65
+ homepage: http://github.org/JonathanRaiman/mongo_mapper_parallel
66
+ licenses:
67
+ - MIT
68
+ metadata: {}
69
+ post_install_message:
70
+ rdoc_options: []
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ requirements:
84
+ - MongoDB, 2.4+
85
+ - mongo_mapper
86
+ - colorize
87
+ - Parallel
88
+ rubyforge_project:
89
+ rubygems_version: 2.1.10
90
+ signing_key:
91
+ specification_version: 4
92
+ summary: Extremely fast non-blocking parallel javascripts on entire MongoDB collection
93
+ with MongoMapper adapter.
94
+ test_files: []
95
+ has_rdoc: yard