mongo_mapper_parallel 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +15 -0
  2. data/lib/mongo_mapper_parallel.rb +105 -0
  3. metadata +95 -0
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZjgxNGEzYTAwMmQxOWJjMzY1YTM4YjZjYzRmZjBkYzQ3MWUxMThlMw==
5
+ data.tar.gz: !binary |-
6
+ Njk4YjEyYjNmY2I5YjExODAyNzA5ZTNiNzRhNWJlYWUwNzA0N2NkYQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ODBjOTBhYjIzMTUyNTZjMTkyYTQ0N2IyMDA2YmI5YTVlNWQxMDZhN2VmZTVh
10
+ Y2VhOWFjZDk3NTM5NmQzYmU1MTRmYTlmYWJiY2NiMzYxNmRlZTRlYTljN2Q5
11
+ MjdjNjk2YmY1NjY0ZTViMTI3N2Y4ZmQ2NWZjMjVjYjgxYzczZjc=
12
+ data.tar.gz: !binary |-
13
+ NDg0NmY2OWJjYzk1ZjVjNzQ1NTljMzQwYjViMjRiZWMzZTJkMmJmZThjZWYy
14
+ ZWM4ZDAzYmQ1OTE4ZmQzZGRjMjA5ZGM0NjZkNTU2MTNlOWI3YjllY2EzNGI3
15
+ MzNhYTc1YTcwN2Y5NGE0YWU3ZmI2OTVhYjgyZWI1MzMzZDk1ZDg=
@@ -0,0 +1,105 @@
1
+ # @title Mongo Mapper Parallel
2
+ # @author Jonathan Raiman
3
+ require 'parallel' # for parallel processing
4
+ require 'colorize' # for colored output
5
+ require 'mongo_mapper' # for access to collections.
6
+
7
+ class MongoMapperParallel
8
+ # runs distributed computation over a Mongo collection
9
+
10
+ attr_reader :split_keys
11
+ attr_accessor :command_class
12
+ attr_accessor :javascript
13
+ attr_accessor :args
14
+
15
+ class Key
16
+ # A chunk that will be parallelized
17
+ attr_accessor :future_key
18
+ attr_accessor :key
19
+ attr_accessor :completed
20
+ attr_reader :compiler
21
+
22
+ # A chunk that will be parallelized
23
+ #
24
+ # @param key [String] the lower bound of the range of resources to retrieve
25
+ # @param future_key [String] the upper bound for the range of resources to retrieve
26
+ # @param compiler [MongoMapperParallel] the Parallel execution object that holds the keys, javascript, and arguments.
27
+ #
28
+ def initialize(opts={})
29
+ @key = opts[:key]
30
+ @compiler = opts[:compiler]
31
+ @future_key = opts[:future_key]
32
+ @completed = false
33
+ end
34
+
35
+ # The javascript function to run on the resources
36
+ #
37
+ # @return [String] The function to run.
38
+ def javascript; @compiler.javascript; end
39
+
40
+ # The arguments to pass to the Javascript function to run on the resources
41
+ #
42
+ # @return [Array, Hash] The arguments to pass to the javascript function
43
+ #
44
+ def args; @compiler.args; end
45
+
46
+ # The Ruby Class representing the collection containing the resources
47
+ #
48
+ # @return [Class]
49
+ def command_class; @compiler.command_class; end
50
+
51
+ # Sends the Javascript function, the range, and the arguments to the MongoDB instance for computation via the `db.runCommand` command.
52
+ #
53
+ def compile
54
+ search_opts = {:name => {:$gte => @key}}
55
+ if @future_key then search_opts[:name][:$lte] = @future_key end
56
+ command_class.database.command({
57
+ :"$eval" => javascript,
58
+ :args => [@key, @future_key, args],
59
+ :nolock => true
60
+ })
61
+ @completed = true
62
+ puts "Completed chunk".green
63
+ end
64
+ end
65
+
66
+ # Obtains the splitVectors keys to find chunks to parallelize via the `splitVector` command.
67
+ #
68
+ # @return list [Array<MongoMapperParallel::Key>] the list of the keys that will be used for parallel operation
69
+ #
70
+ def get_split_keys
71
+ @split_keys, splits = [], @command_class.database.command({splitVector: "#{@command_class.database.name}.#{@command_class.collection.name}", keyPattern: {@split.to_sym => 1}, maxChunkSizeBytes: 32*1024*1024 })["splitKeys"]
72
+ splits.each_with_index do |split_key,k|
73
+ @split_keys << MongoMapperParallel::Key.new(:compiler => self, :key => split_key[@split.to_s], :future_key => (splits[k+1] ? splits[k+1][@split.to_s] : nil))
74
+ end
75
+ end
76
+
77
+ # Instantiates the parallel operation object with the right class, javascript function, and field
78
+ #
79
+ # @param class [Class] the Mongo collection's Ruby Class to execute operations on.
80
+ # @param javascript [String] the Javascript function in String format
81
+ # @param args [Array, Hash] the arguments to pass to the Javascript function
82
+ # @param split [String, Symbol] the field to split the computation on -- typically an indexed unique property of the resources in the collection.
83
+ # @return list [Array<MongoMapperParallel::Key>] the list of the keys that will be used for parallel operation.
84
+ #
85
+ def initialize(opts={})
86
+ @command_class = opts[:class]
87
+ @javascript = opts[:javascript]
88
+ @args = opts[:args]
89
+ @split = opts[:split] # name, title, etc...
90
+ get_split_keys()
91
+ self
92
+ end
93
+
94
+ # Starts the parallel processing using {https://github.com/grosser/parallel Parallel}.
95
+ #
96
+ def run
97
+ total = @split_keys.length
98
+ Parallel.each_with_index(@split_keys) do |section,k|
99
+ if !section.completed then section.compile end
100
+ # ProgressBar.displayPosition(k,total)
101
+ end
102
+ puts "Success".green
103
+ end
104
+
105
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mongo_mapper_parallel
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Jonathan Raiman
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-11-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mongo_mapper
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: colorize
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: parallel
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: Mongo Mapper Parallel can perform MapReduce-like operations on an entire
56
+ collection in parallel. This is a non-blocking operation, so the scripts can invoke
57
+ database methods (db.collection.update, db.collection.insert, etc...) at blazing
58
+ speed.
59
+ email: jraiman@mit.edu
60
+ executables: []
61
+ extensions: []
62
+ extra_rdoc_files: []
63
+ files:
64
+ - lib/mongo_mapper_parallel.rb
65
+ homepage: http://github.org/JonathanRaiman/mongo_mapper_parallel
66
+ licenses:
67
+ - MIT
68
+ metadata: {}
69
+ post_install_message:
70
+ rdoc_options: []
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ requirements:
84
+ - MongoDB, 2.4+
85
+ - mongo_mapper
86
+ - colorize
87
+ - Parallel
88
+ rubyforge_project:
89
+ rubygems_version: 2.1.10
90
+ signing_key:
91
+ specification_version: 4
92
+ summary: Extremely fast non-blocking parallel javascripts on entire MongoDB collection
93
+ with MongoMapper adapter.
94
+ test_files: []
95
+ has_rdoc: yard